001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 */ 020 021package org.biojava.nbio.structure.ecod; 022 023import java.io.BufferedReader; 024import java.io.File; 025import java.io.FileReader; 026import java.io.IOException; 027import java.io.Reader; 028import java.net.MalformedURLException; 029import java.net.URL; 030import java.util.ArrayList; 031import java.util.Collections; 032import java.util.HashMap; 033import java.util.LinkedHashSet; 034import java.util.LinkedList; 035import java.util.List; 036import java.util.Map; 037import java.util.Set; 038import java.util.concurrent.locks.ReadWriteLock; 039import java.util.concurrent.locks.ReentrantReadWriteLock; 040import java.util.regex.Matcher; 041import java.util.regex.Pattern; 042 043import org.biojava.nbio.structure.align.util.UserConfiguration; 044import org.biojava.nbio.structure.io.util.FileDownloadUtils; 045import org.slf4j.Logger; 046import org.slf4j.LoggerFactory; 047 048/** 049 * Provides access to the Evolutionary Classification of Protein Domains (ECOD). 050 * 051 * The preferred mechanism for obtaining instances of this class is through the 052 * {@link EcodFactory} class. 053 * 054 * Reference: 055 * H. Cheng, R. D. Schaeffer, Y. Liao, L. N. Kinch, J. Pei, S. Shi, B. H.\ 056 * Kim, N. V. Grishin. (2014) ECOD: An evolutionary classification of protein 057 * domains. PLoS Comput Biol 10(12): e1003926. 058 * http://prodata.swmed.edu/ecod/ 059 * 060 * @author Spencer Bliven 061 * 062 */ 063public class EcodInstallation implements EcodDatabase { 064 private static final Logger logger = LoggerFactory.getLogger(EcodInstallation.class); 065 066 public static final String DEFAULT_VERSION = "latest"; 067 private static final String DOMAINS_FILENAME_FORMAT = "ecod.%s.domains.txt"; 068 069 public static final String ECOD_URL = "http://prodata.swmed.edu"; 070 public static final String DOMAINS_PATH = "/ecod/distributions/"; 071 072 // ECOD identifiers are e<pdbID><chain><domain>, where chain and domain 073 // Chain and domain can both be multi-letter (e.g. e2q7zA10) 074 public static final Pattern ECOD_RE = Pattern.compile("^e(....).+\\d+$"); 075 076 077 private String cacheLocation; 078 private String requestedVersion; // version requested, e.g. "latest". Used for the paths 079 private String parsedVersion; // actual version parsed 080 081 // lock to prevent multiple threads from downloading simultaneously 082 // Should hold the lock when reading/writing allDomains or domainMap 083 private ReadWriteLock domainsFileLock; 084 private List<EcodDomain> allDomains; 085 private Map<String,List<EcodDomain>> domainMap;//PDB ID -> domains, lazily constructed from allDomains 086 087 private String url; 088 089 /** 090 * Use EcodFactory to create instances. The instantiation of multiple 091 * installations at the same path can lead to race conditions when downloading 092 * files. 093 * @param cacheLocation Location to save files, typically from the PDB_CACHE_DIR parameter 094 * @param requestedVersion ECOD requestedVersion to fetch 095 */ 096 public EcodInstallation(String cacheLocation, String version) { 097 domainsFileLock = new ReentrantReadWriteLock(); 098 099 this.cacheLocation = cacheLocation; 100 101 this.requestedVersion = version; 102 this.url = ECOD_URL; 103 104 allDomains = null; // null signals it needs to be parsed 105 domainMap = null; // null signals it needs to be constructed from allDomains 106 } 107 108 /** 109 * @see EcodFactory#getEcodDatabase() 110 */ 111 EcodInstallation() { 112 this( new UserConfiguration().getCacheFilePath(), DEFAULT_VERSION ); 113 } 114 /** 115 public EcodInstallation(String cacheLocation) { 116 this( cacheLocation, DEFAULT_VERSION ); 117 } 118 119 /** 120 * Get a list of all ECOD domains for a particular PDB ID 121 * @param pdbId 122 * @return the list of domains, or null if no matching domains were found 123 * @throws IOException 124 */ 125 @Override 126 public List<EcodDomain> getDomainsForPdb(String pdbId) throws IOException { 127 domainsFileLock.readLock().lock(); 128 try { 129 logger.trace("LOCK readlock"); 130 while( domainMap == null ) { 131 // unlock to allow ensureDomainsFileInstalled to get the write lock 132 logger.trace("UNLOCK readlock"); 133 domainsFileLock.readLock().unlock(); 134 indexDomains(); 135 domainsFileLock.readLock().lock(); 136 logger.trace("LOCK readlock"); 137 } 138 139 if(pdbId != null) 140 pdbId = pdbId.toLowerCase(); 141 List<EcodDomain> doms = domainMap.get(pdbId); 142 if(doms == null) { 143 return null; 144 } 145 // Deep clone 146 List<EcodDomain> clonedDoms = new ArrayList<EcodDomain>(doms.size()); 147 for(EcodDomain d : doms) { 148 clonedDoms.add( new EcodDomain(d) ); 149 } 150 return clonedDoms; 151 } finally { 152 logger.trace("UNLOCK readlock"); 153 domainsFileLock.readLock().unlock(); 154 } 155 } 156 157 /** 158 * Get a list of domains within a particular level of the hierarchy 159 * @param hierarchy A dot-separated list giving the X-group, H-group, and/or 160 * T-group (e.g. "1.1" for all members of the RIFT-related H-group) 161 * @return 162 * @throws IOException 163 */ 164 @Override 165 public List<EcodDomain> filterByHierarchy(String hierarchy) throws IOException { 166 String[] xhtGroup = hierarchy.split("\\."); 167 Integer xGroup = xhtGroup.length>0 ? Integer.parseInt(xhtGroup[0]) : null; 168 Integer hGroup = xhtGroup.length>1 ? Integer.parseInt(xhtGroup[1]) : null; 169 Integer tGroup = xhtGroup.length>2 ? Integer.parseInt(xhtGroup[2]) : null; 170 171 List<EcodDomain> filtered = new ArrayList<EcodDomain>(); 172 for(EcodDomain d: getAllDomains()) { 173 boolean match = true; 174 if(xhtGroup.length>0) { 175 match = match && xGroup.equals(d.getXGroup()); 176 } 177 if(xhtGroup.length>1) { 178 match = match && hGroup.equals(d.getHGroup()); 179 } 180 if(xhtGroup.length>2) { 181 match = match && tGroup.equals(d.getTGroup()); 182 } 183 if(xhtGroup.length>3) { 184 logger.warn("Ignoring unexpected additional parts of ECOD {}",hierarchy); 185 } 186 if(match) { 187 filtered.add(d); 188 } 189 } 190 return filtered; 191 } 192 193 /** 194 * Get a particular ECOD domain by the domain ID (e.g. "e4hhbA1") 195 * @param ecodId 196 * @return 197 * @throws IOException 198 */ 199 @Override 200 public EcodDomain getDomainsById(String ecodId) throws IOException { 201 if(ecodId == null || ecodId.isEmpty()) { 202 return null; 203 } 204 205 Matcher match = ECOD_RE.matcher(ecodId); 206 String pdbId = null; 207 if( match.matches() ) 208 pdbId = match.group(1); 209 List<EcodDomain> doms = getDomainsForPdb(pdbId); 210 if(doms == null) { 211 logger.debug("Null domains for {} from {}",pdbId,ecodId); 212 return null; 213 } 214 logger.debug("Got {} domains from {}",doms.size(),pdbId); 215 for(EcodDomain d: doms) { 216 if(ecodId.equals(d.getDomainId())) { 217 return d; 218 } 219 } 220 return null; 221 } 222 223 /** 224 * Get all ECOD domains 225 * @return 226 * @throws IOException 227 */ 228 @Override 229 public List<EcodDomain> getAllDomains() throws IOException { 230 domainsFileLock.readLock().lock(); 231 logger.trace("LOCK readlock"); 232 try { 233 while( allDomains == null) { 234 // unlock to allow ensureDomainsFileInstalled to get the write lock 235 logger.trace("UNLOCK readlock"); 236 domainsFileLock.readLock().unlock(); 237 ensureDomainsFileInstalled(); 238 domainsFileLock.readLock().lock(); 239 logger.trace("LOCK readlock"); 240 } 241 return allDomains; 242 } finally { 243 logger.trace("UNLOCK readlock"); 244 domainsFileLock.readLock().unlock(); 245 } 246 247 } 248 249 /** 250 * Clears all domains, requiring the file to be reparsed for subsequent accesses 251 */ 252 public void clear() { 253 domainsFileLock.writeLock().lock(); 254 logger.trace("LOCK writelock"); 255 allDomains = null; 256 domainMap = null; 257 logger.trace("UNLOCK writelock"); 258 domainsFileLock.writeLock().unlock(); 259 } 260 /** 261 * Return the ECOD version, as parsed from the file. 262 * 263 * Note that this may differ from the version requested in the constructor 264 * for the special case of "latest" 265 * @return the ECOD version 266 * @throws IOException If an error occurs while downloading or parsing the file 267 */ 268 @Override 269 public String getVersion() throws IOException { 270 ensureDomainsFileInstalled(); 271 272 if( parsedVersion == null) { 273 return requestedVersion; 274 } 275 return parsedVersion; 276 } 277 278 /** 279 * Get the top-level ECOD server URL. Defaults to "http://prodata.swmed.edu" 280 * @return the url to the ecod server 281 */ 282 public String getUrl() { 283 return url; 284 } 285 286 /** 287 * Specify a different mirror for the ECOD server. 288 * @param urlFormat the urlFormat to set 289 */ 290 public void setUrl(String url) { 291 this.url = url; 292 } 293 294 /** 295 * Get the location of the cache directory (usually set to the PDB_CACHE_DIR 296 * property). ECOD files will be downloaded to this directory 297 * @return 298 */ 299 public String getCacheLocation() { 300 return cacheLocation; 301 } 302 /** 303 * Set an alternate download location for files 304 * @param cacheLocation 305 */ 306 public void setCacheLocation(String cacheLocation) { 307 if(cacheLocation.equals(this.cacheLocation)) { 308 return; //no change 309 } 310 // update location 311 domainsFileLock.writeLock().lock(); 312 logger.trace("LOCK writelock"); 313 this.cacheLocation = cacheLocation; 314 logger.trace("UNLOCK writelock"); 315 domainsFileLock.writeLock().unlock(); 316 } 317 318 /** 319 * Blocks until ECOD domains file has been downloaded and parsed. 320 * 321 * This may be useful in multithreaded environments. 322 * @throws IOException 323 */ 324 // Populates allDomains 325 public void ensureDomainsFileInstalled() throws IOException{ 326 // Quick check for availability 327 domainsFileLock.readLock().lock(); 328 logger.trace("LOCK readlock"); 329 try { 330 if( allDomains != null ) { 331 return; 332 } 333 } finally { 334 logger.trace("UNLOCK readlock"); 335 domainsFileLock.readLock().unlock(); 336 } 337 338 // Download domains 339 domainsFileLock.writeLock().lock(); 340 logger.trace("LOCK writelock"); 341 try { 342 if( !domainsAvailable() ) { 343 downloadDomains(); 344 } 345 parseDomains(); 346 } finally { 347 logger.trace("UNLOCK writelock"); 348 domainsFileLock.writeLock().unlock(); 349 } 350 } 351 352 /** 353 * Checks that the domains file has been downloaded 354 * @return 355 */ 356 private boolean domainsAvailable() { 357 domainsFileLock.readLock().lock(); 358 logger.trace("LOCK readlock"); 359 try { 360 File f = getDomainFile(); 361 362 return f.exists() && f.length()>0; 363 } finally { 364 logger.trace("UNLOCK readlock"); 365 domainsFileLock.readLock().unlock(); 366 } 367 } 368 369 /** 370 * Downloads the domains file, overwriting any existing file 371 * @throws IOException 372 */ 373 private void downloadDomains() throws IOException { 374 domainsFileLock.writeLock().lock(); 375 logger.trace("LOCK writelock"); 376 try { 377 URL domainsURL = new URL( url + DOMAINS_PATH + getDomainFilename()); 378 File localFile = getDomainFile(); 379 380 logger.info("Downloading {} to: {}",domainsURL, localFile); 381 FileDownloadUtils.downloadFile(domainsURL, localFile); 382 } catch (MalformedURLException e) { 383 logger.error("Malformed url: "+ url + DOMAINS_PATH + getDomainFilename(),e); 384 } finally { 385 logger.trace("UNLOCK writelock"); 386 domainsFileLock.writeLock().unlock(); 387 } 388 } 389 390 /** 391 * Basename for the domains file with the current requestedVersion. 392 * @return 393 */ 394 private String getDomainFilename() { 395 return String.format(DOMAINS_FILENAME_FORMAT,requestedVersion); 396 } 397 398 /** 399 * Local location for the domain file 400 * @return 401 */ 402 private File getDomainFile() { 403 return new File(getCacheLocation(),getDomainFilename()); 404 } 405 406 /** 407 * Parses the domains from the local file 408 * @throws IOException 409 */ 410 private void parseDomains() throws IOException { 411 domainsFileLock.writeLock().lock(); 412 logger.trace("LOCK writelock"); 413 try { 414 EcodParser parser = new EcodParser(getDomainFile()); 415 allDomains = parser.getDomains(); 416 parsedVersion = parser.getVersion(); 417 } finally { 418 logger.trace("UNLOCK writelock"); 419 domainsFileLock.writeLock().unlock(); 420 } 421 } 422 423 /** 424 * Populates domainMap from allDomains 425 * @throws IOException 426 */ 427 private void indexDomains() throws IOException { 428 domainsFileLock.writeLock().lock(); 429 logger.trace("LOCK writelock"); 430 try { 431 if( allDomains == null) { 432 ensureDomainsFileInstalled(); 433 } 434 435 // Leave enough space for all PDBs as of 2015 436 domainMap = new HashMap<String, List<EcodDomain>>((int) (150000/.85),.85f); 437 438 // Index with domainMap 439 for(EcodDomain d : allDomains) { 440 // Get the PDB ID, either directly or from the domain ID 441 String pdbId = d.getPdbId(); 442 if( pdbId == null ) { 443 String ecodId = d.getDomainId(); 444 if( ecodId != null && !ecodId.isEmpty() ) { 445 Matcher match = ECOD_RE.matcher(ecodId); 446 pdbId = match.group(1); 447 } 448 } 449 450 // Add current domain to the map 451 List<EcodDomain> currDomains; 452 if( domainMap.containsKey(pdbId) ) { 453 currDomains = domainMap.get(pdbId); 454 } else { 455 currDomains = new LinkedList<EcodDomain>(); 456 domainMap.put(pdbId,currDomains); 457 } 458 currDomains.add(d); 459 } 460 } finally { 461 logger.trace("UNLOCK writelock"); 462 domainsFileLock.writeLock().unlock(); 463 } 464 465 } 466 467 468 public static class EcodParser { 469 /* 470Version Notes 471 472Current version (1.4) contains the following columns: 473 474Column 1: ECOD uid - internal domain unique identifier 475Column 2: ECOD domain id - domain identifier 476Column 3: ECOD representative status - manual (curated) or automated nonrep 477Column 4: ECOD hierachy identifier - [X-group].[H-group].[T-group].[F-group] 478 * In develop45-66 these also include single numbers in the range 1-265 479Column 5: PDB identifier 480Column 6: Chain identifier (note: case-sensitive) 481Column 7: PDB residue number range 482 * These are sometimes incorrect up to at least develop124. Examples are: 483 e4lxaA2 (should be A:184-385), e4lxmC3 (should be C:46P-183) 484Column 8: seq_id number range (based on internal PDB indices) 485Column 9: Architecture name 486Column 10: X-group name 487Column 11: H-group name 488Column 12: T-group name 489Column 13: F-group name (F_UNCLASSIFIED denotes that domain has not been assigned to an F-group) 490Column 14: Domain assembly status (if domain is member of assembly, partners' ecod domain ids listed) 491Column 15: Comma-separated value list of non-polymer entities within 4 A of at least one residue of domain 492 493Notes older versions: 494changelog: 495v1.0 - original version (8/04/2014) 496v1.1 - added rep/nonrep data (1/15/2015) 497v1.2 - added f-group identifiers to fasta file, domain description file. ECODf identifiers now used when available for F-group name. 498 Domain assemblies now represented by assembly uid in domain assembly status. 499v1.4 - added seqid_range and headers (develop101) 500 */ 501 502 /** String for unclassified F-groups */ 503 public static final String F_UNCLASSIFIED = "F_UNCLASSIFIED"; 504 /** String for single-domain assemblies */ 505 public static final String NOT_DOMAIN_ASSEMBLY = "NOT_DOMAIN_ASSEMBLY"; 506 /** Deprecated way of indicating there is an assembly. replaced by the assembly id */ 507 public static final String IS_DOMAIN_ASSEMBLY = "IS_DOMAIN_ASSEMBLY"; 508 /** Indicates a manual representative */ 509 public static final String IS_REPRESENTATIVE = "MANUAL_REP"; 510 /** Indicates not a manual representative */ 511 public static final String NOT_REPRESENTATIVE = "AUTO_NONREP"; 512 513 private List<EcodDomain> domains; 514 private String version; 515 516 public EcodParser(String filename) throws IOException { 517 this(new File(filename)); 518 } 519 public EcodParser(File file) throws IOException { 520 this(new FileReader(file)); 521 } 522 public EcodParser(Reader reader) throws IOException { 523 this(new BufferedReader(reader)); 524 } 525 public EcodParser(BufferedReader reader) throws IOException { 526 version = null; 527 parse(reader); 528 } 529 530 private void parse(BufferedReader in) throws IOException { 531 try { 532 // Allocate plenty of space for ECOD as of 2015 533 ArrayList<EcodDomain> domainsList = new ArrayList<EcodDomain>(500000); 534 535 Pattern versionRE = Pattern.compile("^\\s*#.*ECOD\\s*version\\s+(\\S+).*"); 536 Pattern commentRE = Pattern.compile("^\\s*#.*"); 537 538 // prevent too many warnings; negative numbers print all warnings 539 int warnIsDomainAssembly = 1; 540 int warnHierarchicalFormat = 5; 541 int warnNumberOfFields = 10; 542 543 String line = in.readLine(); 544 int lineNum = 1; 545 while( line != null ) { 546 // Check for requestedVersion string 547 Matcher match = versionRE.matcher(line); 548 if(match.matches()) { 549 // special requestedVersion comment 550 this.version = match.group(1); 551 } else { 552 match = commentRE.matcher(line); 553 if(match.matches()) { 554 // ignore comments 555 } else { 556 // data line 557 String[] fields = line.split("\t"); 558 if( fields.length == 13 || fields.length == 14 || fields.length == 15) { 559 try { 560 int i = 0; // field number, to allow future insertion of fields 561 562 //Column 1: ECOD uid - internal domain unique identifier 563 Long uid = Long.parseLong(fields[i++]); 564 //Column 2: ECOD domain id - domain identifier 565 String domainId = fields[i++]; 566 567 //Column 3: ECOD representative status - manual (curated) or automated nonrep 568 // Manual column may be missing in version 1.0 files 569 Boolean manual = null; 570 if( fields.length >= 14) { 571 String manualString = fields[i++]; 572 if(manualString.equalsIgnoreCase(IS_REPRESENTATIVE)) { 573 manual = true; 574 } else if(manualString.equalsIgnoreCase(NOT_REPRESENTATIVE)) { 575 manual = false; 576 } else { 577 logger.warn("Unexpected value for manual field: {} in line {}",manualString,lineNum); 578 } 579 } 580 581 //Column 4: ECOD hierachy identifier - [X-group].[H-group].[T-group].[F-group] 582 // hierarchical field, e.g. "1.1.4.1" 583 String[] xhtGroup = fields[i++].split("\\."); 584 if(xhtGroup.length < 3 || 4 < xhtGroup.length) { 585 if(warnHierarchicalFormat > 1) { 586 logger.warn("Unexpected format for hierarchical field \"{}\" in line {}",fields[i-1],lineNum); 587 warnHierarchicalFormat--; 588 } else if(warnHierarchicalFormat != 0) { 589 logger.warn("Unexpected format for hierarchical field \"{}\" in line {}. Not printing future similar warnings.",fields[i-1],lineNum); 590 warnHierarchicalFormat--; 591 } 592 } 593 Integer xGroup = xhtGroup.length>0 ? Integer.parseInt(xhtGroup[0]) : null; 594 Integer hGroup = xhtGroup.length>1 ? Integer.parseInt(xhtGroup[1]) : null; 595 Integer tGroup = xhtGroup.length>2 ? Integer.parseInt(xhtGroup[2]) : null; 596 Integer fGroup = xhtGroup.length>3 ? Integer.parseInt(xhtGroup[3]) : null; 597 598 //Column 5: PDB identifier 599 String pdbId = fields[i++]; 600 //Column 6: Chain identifier (note: case-sensitive) 601 String chainId = fields[i++]; 602 //Column 7: PDB residue number range 603 String range = fields[i++]; 604 605 //Column 8: seq_id number range (based on internal PDB indices) 606 //Added in version 1.4 607 String seqId = null; 608 if( fields.length >= 15) { 609 seqId = fields[i++]; 610 } 611 612 //Column 9: Architecture name 613 // Intern strings likely to be shared by many domains 614 String architectureName = fields[i++].intern(); 615 //Column 10: X-group name 616 String xGroupName = fields[i++].intern(); 617 //Column 11: H-group name 618 String hGroupName = fields[i++].intern(); 619 //Column 12: T-group name 620 String tGroupName = fields[i++].intern(); 621 //Column 13: F-group name (F_UNCLASSIFIED denotes that domain has not been assigned to an F-group) 622 //Contents changed in version 1.3 623 String fGroupName = fields[i++].intern(); 624 625 //Column 14: Domain assembly status (if domain is member of assembly, partners' ecod domain ids listed) 626 //Column 15: Comma-separated value list of non-polymer entities within 4 A of at least one residue of domain 627 Long assemblyId = null; 628 String assemblyStr = fields[i++]; 629 if(assemblyStr.equals(NOT_DOMAIN_ASSEMBLY)) { 630 assemblyId = uid; 631 } else if(assemblyStr.equals("IS_DOMAIN_ASSEMBLY") ) { 632 if(warnIsDomainAssembly > 1) { 633 logger.info("Deprecated 'IS_DOMAIN_ASSEMBLY' value ignored in line {}.",lineNum); 634 warnIsDomainAssembly--; 635 } else if(warnIsDomainAssembly == 0) { 636 logger.info("Deprecated 'IS_DOMAIN_ASSEMBLY' value ignored in line {}. Not printing future similar warnings.",lineNum); 637 warnIsDomainAssembly--; 638 } 639 //assemblyId = null; 640 } else { 641 assemblyId = Long.parseLong(assemblyStr); 642 } 643 644 String ligandStr = fields[i++]; 645 Set<String> ligands = null; 646 if( ligandStr.equals("NO_LIGANDS_4A") || ligandStr.isEmpty() ) { 647 ligands = Collections.emptySet(); 648 } else { 649 String[] ligSplit = ligandStr.split(","); 650 ligands = new LinkedHashSet<String>(ligSplit.length); 651 for(String s : ligSplit) { 652 ligands.add(s.intern()); 653 } 654 } 655 656 657 EcodDomain domain = new EcodDomain(uid, domainId, manual, xGroup, hGroup, tGroup, fGroup,pdbId, chainId, range, seqId, architectureName, xGroupName, hGroupName, tGroupName, fGroupName, assemblyId, ligands); 658 domainsList.add(domain); 659 } catch(NumberFormatException e) { 660 logger.warn("Error in ECOD parsing at line "+lineNum,e); 661 } 662 } else { 663 if(warnNumberOfFields > 1) { 664 logger.warn("Unexpected number of fields in line {}.",lineNum); 665 warnNumberOfFields--; 666 } else if(warnNumberOfFields == 0) { 667 logger.warn("Unexpected number of fields in line {}. Not printing future similar warnings",lineNum); 668 warnIsDomainAssembly--; 669 } 670 } 671 } 672 } 673 674 line = in.readLine(); 675 lineNum++; 676 } 677 if(this.version == null) 678 logger.info("Parsed {} ECOD domains",domainsList.size()); 679 else 680 logger.info("Parsed {} ECOD domains from version {}",domainsList.size(),this.version); 681 682 683 this.domains = Collections.unmodifiableList( domainsList ); 684 685 } finally { 686 if(in != null) { 687 in.close(); 688 } 689 } 690 } 691 692 /** 693 * @return a list of all EcodDomains 694 */ 695 public List<EcodDomain> getDomains() { 696 return domains; 697 } 698 699 /** 700 * @return the requestedVersion for this file, or null if none was parsed 701 */ 702 public String getVersion() { 703 return version; 704 } 705 } 706 707 708 @Override 709 public String toString() { 710 String version = null; 711 try { 712 version = getVersion(); 713 } catch (IOException e) { 714 // For parsing errors, use the requested version 715 version = requestedVersion; 716 } 717 718 return "EcodInstallation [cacheLocation=" + cacheLocation 719 + ", version=" + version + "]"; 720 } 721 722 public static void main(String[] args) { 723 if( args.length!= 1) { 724 System.out.println("usage: ecod_domains.txt"); 725 System.exit(1); return; 726 } 727 728 String filename = args[0]; 729 730 try { 731 EcodParser parser = new EcodParser(filename); 732 733 List<EcodDomain> domains = parser.getDomains(); 734 735 System.out.format("Found %d ECOD domains.%n",domains.size()); 736 737 System.out.println("First 10 domains:"); 738 int i = 0; 739 for(EcodDomain d: domains) { 740 if( i>10) break; 741 742 System.out.println(d.getDomainId()); 743 i++; 744 } 745 } catch (IOException e) { 746 e.printStackTrace(); 747 } 748 } 749}