001/*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 */
020
021package org.biojava.nbio.structure.ecod;
022
023import java.io.BufferedReader;
024import java.io.File;
025import java.io.FileReader;
026import java.io.IOException;
027import java.io.Reader;
028import java.net.MalformedURLException;
029import java.net.URL;
030import java.util.ArrayList;
031import java.util.Collections;
032import java.util.HashMap;
033import java.util.LinkedHashSet;
034import java.util.LinkedList;
035import java.util.List;
036import java.util.Map;
037import java.util.Set;
038import java.util.concurrent.locks.ReadWriteLock;
039import java.util.concurrent.locks.ReentrantReadWriteLock;
040import java.util.regex.Matcher;
041import java.util.regex.Pattern;
042
043import org.biojava.nbio.structure.align.util.UserConfiguration;
044import org.biojava.nbio.structure.io.util.FileDownloadUtils;
045import org.slf4j.Logger;
046import org.slf4j.LoggerFactory;
047
048/**
049 * Provides access to the Evolutionary Classification of Protein Domains (ECOD).
050 *
051 * The preferred mechanism for obtaining instances of this class is through the
052 * {@link EcodFactory} class.
053 *
054 * Reference:
055 * H. Cheng, R. D. Schaeffer, Y. Liao, L. N. Kinch, J. Pei, S. Shi, B. H.\
056 *   Kim, N. V. Grishin. (2014) ECOD: An evolutionary classification of protein
057 *   domains. PLoS Comput Biol 10(12): e1003926.
058 * http://prodata.swmed.edu/ecod/
059 *
060 * @author Spencer Bliven
061 *
062 */
063public class EcodInstallation implements EcodDatabase {
064        private static final Logger logger = LoggerFactory.getLogger(EcodInstallation.class);
065
066        public static final String DEFAULT_VERSION = "latest";
067        private static final String DOMAINS_FILENAME_FORMAT = "ecod.%s.domains.txt";
068
069        public static final String ECOD_URL = "http://prodata.swmed.edu";
070        public static final String DOMAINS_PATH = "/ecod/distributions/";
071
072        // ECOD identifiers are e<pdbID><chain><domain>, where chain and domain
073        // Chain and domain can both be multi-letter (e.g. e2q7zA10)
074        public static final Pattern ECOD_RE = Pattern.compile("^e(....).+\\d+$");
075
076
077        private String cacheLocation;
078        private String requestedVersion; // version requested, e.g. "latest". Used for the paths
079        private String parsedVersion; // actual version parsed
080
081        // lock to prevent multiple threads from downloading simultaneously
082        // Should hold the lock when reading/writing allDomains or domainMap
083        private ReadWriteLock domainsFileLock;
084        private List<EcodDomain> allDomains;
085        private Map<String,List<EcodDomain>> domainMap;//PDB ID -> domains, lazily constructed from allDomains
086
087        private String url;
088
089        /**
090         * Use EcodFactory to create instances. The instantiation of multiple
091         * installations at the same path can lead to race conditions when downloading
092         * files.
093         * @param cacheLocation Location to save files, typically from the PDB_CACHE_DIR parameter
094         * @param requestedVersion ECOD requestedVersion to fetch
095         */
096        public EcodInstallation(String cacheLocation, String version) {
097                domainsFileLock = new ReentrantReadWriteLock();
098
099                this.cacheLocation = cacheLocation;
100
101                this.requestedVersion = version;
102                this.url = ECOD_URL;
103
104                allDomains = null; // null signals it needs to be parsed
105                domainMap = null; // null signals it needs to be constructed from allDomains
106        }
107
108        /**
109         * @see EcodFactory#getEcodDatabase()
110         */
111        EcodInstallation() {
112                this( new UserConfiguration().getCacheFilePath(), DEFAULT_VERSION );
113        }
114        /**
115        public EcodInstallation(String cacheLocation) {
116                this( cacheLocation, DEFAULT_VERSION );
117        }
118
119        /**
120         * Get a list of all ECOD domains for a particular PDB ID
121         * @param pdbId
122         * @return the list of domains, or null if no matching domains were found
123         * @throws IOException
124         */
125        @Override
126        public List<EcodDomain> getDomainsForPdb(String pdbId) throws IOException {
127                domainsFileLock.readLock().lock();
128                try {
129                        logger.trace("LOCK readlock");
130                        while( domainMap == null ) {
131                                // unlock to allow ensureDomainsFileInstalled to get the write lock
132                                logger.trace("UNLOCK readlock");
133                                domainsFileLock.readLock().unlock();
134                                indexDomains();
135                                domainsFileLock.readLock().lock();
136                                logger.trace("LOCK readlock");
137                        }
138
139                        if(pdbId != null)
140                                pdbId = pdbId.toLowerCase();
141                        List<EcodDomain> doms = domainMap.get(pdbId);
142                        if(doms == null) {
143                                return null;
144                        }
145                        // Deep clone
146                        List<EcodDomain> clonedDoms = new ArrayList<EcodDomain>(doms.size());
147                        for(EcodDomain d : doms) {
148                                clonedDoms.add( new EcodDomain(d) );
149                        }
150                        return clonedDoms;
151                } finally {
152                        logger.trace("UNLOCK readlock");
153                        domainsFileLock.readLock().unlock();
154                }
155        }
156
157        /**
158         * Get a list of domains within a particular level of the hierarchy
159         * @param hierarchy A dot-separated list giving the X-group, H-group, and/or
160         *  T-group (e.g. "1.1" for all members of the RIFT-related H-group)
161         * @return
162         * @throws IOException
163         */
164        @Override
165        public List<EcodDomain> filterByHierarchy(String hierarchy) throws IOException {
166                String[] xhtGroup = hierarchy.split("\\.");
167                Integer xGroup = xhtGroup.length>0 ? Integer.parseInt(xhtGroup[0]) : null;
168                Integer hGroup = xhtGroup.length>1 ? Integer.parseInt(xhtGroup[1]) : null;
169                Integer tGroup = xhtGroup.length>2 ? Integer.parseInt(xhtGroup[2]) : null;
170
171                List<EcodDomain> filtered = new ArrayList<EcodDomain>();
172                for(EcodDomain d: getAllDomains()) {
173                        boolean match = true;
174                        if(xhtGroup.length>0) {
175                                match = match && xGroup.equals(d.getXGroup());
176                        }
177                        if(xhtGroup.length>1) {
178                                match = match && hGroup.equals(d.getHGroup());
179                        }
180                        if(xhtGroup.length>2) {
181                                match = match && tGroup.equals(d.getTGroup());
182                        }
183                        if(xhtGroup.length>3) {
184                                logger.warn("Ignoring unexpected additional parts of ECOD {}",hierarchy);
185                        }
186                        if(match) {
187                                filtered.add(d);
188                        }
189                }
190                return filtered;
191        }
192
193        /**
194         * Get a particular ECOD domain by the domain ID (e.g. "e4hhbA1")
195         * @param ecodId
196         * @return
197         * @throws IOException
198         */
199        @Override
200        public EcodDomain getDomainsById(String ecodId) throws IOException {
201                if(ecodId == null || ecodId.isEmpty()) {
202                        return null;
203                }
204
205                Matcher match = ECOD_RE.matcher(ecodId);
206                String pdbId = null;
207                if( match.matches() )
208                        pdbId = match.group(1);
209                List<EcodDomain> doms = getDomainsForPdb(pdbId);
210                if(doms == null) {
211                        logger.debug("Null domains for {} from {}",pdbId,ecodId);
212                        return null;
213                }
214                logger.debug("Got {} domains from {}",doms.size(),pdbId);
215                for(EcodDomain d: doms) {
216                        if(ecodId.equals(d.getDomainId())) {
217                                return d;
218                        }
219                }
220                return null;
221        }
222
223        /**
224         * Get all ECOD domains
225         * @return
226         * @throws IOException
227         */
228        @Override
229        public List<EcodDomain> getAllDomains() throws IOException {
230                domainsFileLock.readLock().lock();
231                logger.trace("LOCK readlock");
232                try {
233                        while( allDomains == null) {
234                                // unlock to allow ensureDomainsFileInstalled to get the write lock
235                                logger.trace("UNLOCK readlock");
236                                domainsFileLock.readLock().unlock();
237                                ensureDomainsFileInstalled();
238                                domainsFileLock.readLock().lock();
239                                logger.trace("LOCK readlock");
240                        }
241                        return allDomains;
242                } finally {
243                        logger.trace("UNLOCK readlock");
244                        domainsFileLock.readLock().unlock();
245                }
246
247        }
248
249        /**
250         * Clears all domains, requiring the file to be reparsed for subsequent accesses
251         */
252        public void clear() {
253                domainsFileLock.writeLock().lock();
254                logger.trace("LOCK writelock");
255                allDomains = null;
256                domainMap = null;
257                logger.trace("UNLOCK writelock");
258                domainsFileLock.writeLock().unlock();
259        }
260        /**
261         * Return the ECOD version, as parsed from the file.
262         *
263         * Note that this may differ from the version requested in the constructor
264         * for the special case of "latest"
265         * @return the ECOD version
266         * @throws IOException If an error occurs while downloading or parsing the file
267         */
268        @Override
269        public String getVersion() throws IOException {
270                ensureDomainsFileInstalled();
271
272                if( parsedVersion == null) {
273                        return requestedVersion;
274                }
275                return parsedVersion;
276        }
277
278        /**
279         * Get the top-level ECOD server URL. Defaults to "http://prodata.swmed.edu"
280         * @return the url to the ecod server
281         */
282        public String getUrl() {
283                return url;
284        }
285
286        /**
287         * Specify a different mirror for the ECOD server.
288         * @param urlFormat the urlFormat to set
289         */
290        public void setUrl(String url) {
291                this.url = url;
292        }
293
294        /**
295         * Get the location of the cache directory (usually set to the PDB_CACHE_DIR
296         * property). ECOD files will be downloaded to this directory
297         * @return
298         */
299        public String getCacheLocation() {
300                return cacheLocation;
301        }
302        /**
303         * Set an alternate download location for files
304         * @param cacheLocation
305         */
306        public void setCacheLocation(String cacheLocation) {
307                if(cacheLocation.equals(this.cacheLocation)) {
308                        return; //no change
309                }
310                // update location
311                domainsFileLock.writeLock().lock();
312                logger.trace("LOCK writelock");
313                this.cacheLocation = cacheLocation;
314                logger.trace("UNLOCK writelock");
315                domainsFileLock.writeLock().unlock();
316        }
317
318        /**
319         * Blocks until ECOD domains file has been downloaded and parsed.
320         *
321         * This may be useful in multithreaded environments.
322         * @throws IOException
323         */
324        // Populates allDomains
325        public void ensureDomainsFileInstalled() throws IOException{
326                // Quick check for availability
327                domainsFileLock.readLock().lock();
328                logger.trace("LOCK readlock");
329                try {
330                        if( allDomains != null ) {
331                                return;
332                        }
333                } finally {
334                        logger.trace("UNLOCK readlock");
335                        domainsFileLock.readLock().unlock();
336                }
337
338                // Download domains
339                domainsFileLock.writeLock().lock();
340                logger.trace("LOCK writelock");
341                try {
342                        if( !domainsAvailable() ) {
343                                downloadDomains();
344                        }
345                        parseDomains();
346                } finally {
347                        logger.trace("UNLOCK writelock");
348                        domainsFileLock.writeLock().unlock();
349                }
350        }
351
352        /**
353         * Checks that the domains file has been downloaded
354         * @return
355         */
356        private boolean domainsAvailable() {
357                domainsFileLock.readLock().lock();
358                logger.trace("LOCK readlock");
359                try {
360                        File f = getDomainFile();
361
362                        return f.exists() && f.length()>0;
363                } finally {
364                        logger.trace("UNLOCK readlock");
365                        domainsFileLock.readLock().unlock();
366                }
367        }
368
369        /**
370         * Downloads the domains file, overwriting any existing file
371         * @throws IOException
372         */
373        private void downloadDomains() throws IOException {
374                domainsFileLock.writeLock().lock();
375                logger.trace("LOCK writelock");
376                try {
377                        URL domainsURL = new URL( url + DOMAINS_PATH + getDomainFilename());
378                        File localFile = getDomainFile();
379
380                        logger.info("Downloading {} to: {}",domainsURL, localFile);
381                        FileDownloadUtils.downloadFile(domainsURL, localFile);
382                } catch (MalformedURLException e) {
383                        logger.error("Malformed url: "+ url + DOMAINS_PATH + getDomainFilename(),e);
384                } finally {
385                        logger.trace("UNLOCK writelock");
386                        domainsFileLock.writeLock().unlock();
387                }
388        }
389
390        /**
391         * Basename for the domains file with the current requestedVersion.
392         * @return
393         */
394        private String getDomainFilename() {
395                return  String.format(DOMAINS_FILENAME_FORMAT,requestedVersion);
396        }
397
398        /**
399         * Local location for the domain file
400         * @return
401         */
402        private File getDomainFile() {
403                return new File(getCacheLocation(),getDomainFilename());
404        }
405
406        /**
407         * Parses the domains from the local file
408         * @throws IOException
409         */
410        private void parseDomains() throws IOException {
411                domainsFileLock.writeLock().lock();
412                logger.trace("LOCK writelock");
413                try {
414                        EcodParser parser = new EcodParser(getDomainFile());
415                        allDomains = parser.getDomains();
416                        parsedVersion = parser.getVersion();
417                } finally {
418                        logger.trace("UNLOCK writelock");
419                        domainsFileLock.writeLock().unlock();
420                }
421        }
422
423        /**
424         * Populates domainMap from allDomains
425         * @throws IOException
426         */
427        private void indexDomains() throws IOException {
428                domainsFileLock.writeLock().lock();
429                logger.trace("LOCK writelock");
430                try {
431                        if( allDomains == null) {
432                                ensureDomainsFileInstalled();
433                        }
434
435                        // Leave enough space for all PDBs as of 2015
436                        domainMap = new HashMap<String, List<EcodDomain>>((int) (150000/.85),.85f);
437
438                        // Index with domainMap
439                        for(EcodDomain d : allDomains) {
440                                // Get the PDB ID, either directly or from the domain ID
441                                String pdbId = d.getPdbId();
442                                if( pdbId == null ) {
443                                        String ecodId = d.getDomainId();
444                                        if( ecodId != null && !ecodId.isEmpty() ) {
445                                                Matcher match = ECOD_RE.matcher(ecodId);
446                                                pdbId = match.group(1);
447                                        }
448                                }
449
450                                // Add current domain to the map
451                                List<EcodDomain> currDomains;
452                                if( domainMap.containsKey(pdbId) ) {
453                                        currDomains = domainMap.get(pdbId);
454                                } else {
455                                        currDomains = new LinkedList<EcodDomain>();
456                                        domainMap.put(pdbId,currDomains);
457                                }
458                                currDomains.add(d);
459                        }
460                } finally {
461                        logger.trace("UNLOCK writelock");
462                        domainsFileLock.writeLock().unlock();
463                }
464
465        }
466
467
468        public static class EcodParser {
469                /*
470Version Notes
471
472Current version (1.4) contains the following columns:
473
474Column 1: ECOD uid - internal domain unique identifier
475Column 2: ECOD domain id - domain identifier
476Column 3: ECOD representative status - manual (curated) or automated nonrep
477Column 4: ECOD hierachy identifier - [X-group].[H-group].[T-group].[F-group]
478        * In develop45-66 these also include single numbers in the range 1-265
479Column 5: PDB identifier
480Column 6: Chain identifier (note: case-sensitive)
481Column 7: PDB residue number range
482        * These are sometimes incorrect up to at least develop124. Examples are:
483          e4lxaA2 (should be A:184-385), e4lxmC3 (should be C:46P-183)
484Column 8: seq_id number range (based on internal PDB indices)
485Column 9: Architecture name
486Column 10: X-group name
487Column 11: H-group name
488Column 12: T-group name
489Column 13: F-group name (F_UNCLASSIFIED denotes that domain has not been assigned to an F-group)
490Column 14: Domain assembly status (if domain is member of assembly, partners' ecod domain ids listed)
491Column 15: Comma-separated value list of non-polymer entities within 4 A of at least one residue of domain
492
493Notes older versions:
494changelog:
495v1.0 - original version (8/04/2014)
496v1.1 - added rep/nonrep data (1/15/2015)
497v1.2 - added f-group identifiers to fasta file, domain description file. ECODf identifiers now used when available for F-group name.
498        Domain assemblies now represented by assembly uid in domain assembly status.
499v1.4 - added seqid_range and headers (develop101)
500                 */
501
502                /** String for unclassified F-groups */
503                public static final String F_UNCLASSIFIED = "F_UNCLASSIFIED";
504                /** String for single-domain assemblies */
505                public static final String NOT_DOMAIN_ASSEMBLY = "NOT_DOMAIN_ASSEMBLY";
506                /** Deprecated way of indicating there is an assembly. replaced by the assembly id */
507                public static final String IS_DOMAIN_ASSEMBLY = "IS_DOMAIN_ASSEMBLY";
508                /** Indicates a manual representative */
509                public static final String IS_REPRESENTATIVE = "MANUAL_REP";
510                /** Indicates not a manual representative */
511                public static final String NOT_REPRESENTATIVE = "AUTO_NONREP";
512
513                private List<EcodDomain> domains;
514                private String version;
515
516                public EcodParser(String filename) throws IOException {
517                        this(new File(filename));
518                }
519                public EcodParser(File file) throws IOException {
520                        this(new FileReader(file));
521                }
522                public EcodParser(Reader reader) throws IOException {
523                        this(new BufferedReader(reader));
524                }
525                public EcodParser(BufferedReader reader) throws IOException {
526                        version = null;
527                        parse(reader);
528                }
529
530                private void parse(BufferedReader in) throws IOException {
531                        try {
532                                // Allocate plenty of space for ECOD as of 2015
533                                ArrayList<EcodDomain> domainsList = new ArrayList<EcodDomain>(500000);
534
535                                Pattern versionRE = Pattern.compile("^\\s*#.*ECOD\\s*version\\s+(\\S+).*");
536                                Pattern commentRE = Pattern.compile("^\\s*#.*");
537
538                                // prevent too many warnings; negative numbers print all warnings
539                                int warnIsDomainAssembly = 1;
540                                int warnHierarchicalFormat = 5;
541                                int warnNumberOfFields = 10;
542
543                                String line = in.readLine();
544                                int lineNum = 1;
545                                while( line != null ) {
546                                        // Check for requestedVersion string
547                                        Matcher match = versionRE.matcher(line);
548                                        if(match.matches()) {
549                                                // special requestedVersion comment
550                                                this.version = match.group(1);
551                                        } else {
552                                                match = commentRE.matcher(line);
553                                                if(match.matches()) {
554                                                        // ignore comments
555                                                } else {
556                                                        // data line
557                                                        String[] fields = line.split("\t");
558                                                        if( fields.length == 13 || fields.length == 14 || fields.length == 15) {
559                                                                try {
560                                                                        int i = 0; // field number, to allow future insertion of fields
561
562                                                                        //Column 1: ECOD uid - internal domain unique identifier
563                                                                        Long uid = Long.parseLong(fields[i++]);
564                                                                        //Column 2: ECOD domain id - domain identifier
565                                                                        String domainId = fields[i++];
566
567                                                                        //Column 3: ECOD representative status - manual (curated) or automated nonrep
568                                                                        // Manual column may be missing in version 1.0 files
569                                                                        Boolean manual = null;
570                                                                        if( fields.length >= 14) {
571                                                                                String manualString = fields[i++];
572                                                                                if(manualString.equalsIgnoreCase(IS_REPRESENTATIVE)) {
573                                                                                        manual = true;
574                                                                                } else if(manualString.equalsIgnoreCase(NOT_REPRESENTATIVE)) {
575                                                                                        manual = false;
576                                                                                } else {
577                                                                                        logger.warn("Unexpected value for manual field: {} in line {}",manualString,lineNum);
578                                                                                }
579                                                                        }
580
581                                                                        //Column 4: ECOD hierachy identifier - [X-group].[H-group].[T-group].[F-group]
582                                                                        // hierarchical field, e.g. "1.1.4.1"
583                                                                        String[] xhtGroup = fields[i++].split("\\.");
584                                                                        if(xhtGroup.length < 3 || 4 < xhtGroup.length) {
585                                                                                if(warnHierarchicalFormat > 1) {
586                                                                                        logger.warn("Unexpected format for hierarchical field \"{}\" in line {}",fields[i-1],lineNum);
587                                                                                        warnHierarchicalFormat--;
588                                                                                } else if(warnHierarchicalFormat != 0) {
589                                                                                        logger.warn("Unexpected format for hierarchical field \"{}\" in line {}. Not printing future similar warnings.",fields[i-1],lineNum);
590                                                                                        warnHierarchicalFormat--;
591                                                                                }
592                                                                        }
593                                                                        Integer xGroup = xhtGroup.length>0 ? Integer.parseInt(xhtGroup[0]) : null;
594                                                                        Integer hGroup = xhtGroup.length>1 ? Integer.parseInt(xhtGroup[1]) : null;
595                                                                        Integer tGroup = xhtGroup.length>2 ? Integer.parseInt(xhtGroup[2]) : null;
596                                                                        Integer fGroup = xhtGroup.length>3 ? Integer.parseInt(xhtGroup[3]) : null;
597
598                                                                        //Column 5: PDB identifier
599                                                                        String pdbId = fields[i++];
600                                                                        //Column 6: Chain identifier (note: case-sensitive)
601                                                                        String chainId = fields[i++];
602                                                                        //Column 7: PDB residue number range
603                                                                        String range = fields[i++];
604
605                                                                        //Column 8: seq_id number range (based on internal PDB indices)
606                                                                        //Added in version 1.4
607                                                                        String seqId = null;
608                                                                        if( fields.length >= 15) {
609                                                                                seqId = fields[i++];
610                                                                        }
611
612                                                                        //Column 9: Architecture name
613                                                                        // Intern strings likely to be shared by many domains
614                                                                        String architectureName = fields[i++].intern();
615                                                                        //Column 10: X-group name
616                                                                        String xGroupName = fields[i++].intern();
617                                                                        //Column 11: H-group name
618                                                                        String hGroupName = fields[i++].intern();
619                                                                        //Column 12: T-group name
620                                                                        String tGroupName = fields[i++].intern();
621                                                                        //Column 13: F-group name (F_UNCLASSIFIED denotes that domain has not been assigned to an F-group)
622                                                                        //Contents changed in version 1.3
623                                                                        String fGroupName = fields[i++].intern();
624
625                                                                        //Column 14: Domain assembly status (if domain is member of assembly, partners' ecod domain ids listed)
626                                                                        //Column 15: Comma-separated value list of non-polymer entities within 4 A of at least one residue of domain
627                                                                        Long assemblyId = null;
628                                                                        String assemblyStr = fields[i++];
629                                                                        if(assemblyStr.equals(NOT_DOMAIN_ASSEMBLY)) {
630                                                                                assemblyId = uid;
631                                                                        } else if(assemblyStr.equals("IS_DOMAIN_ASSEMBLY") ) {
632                                                                                if(warnIsDomainAssembly > 1) {
633                                                                                        logger.info("Deprecated 'IS_DOMAIN_ASSEMBLY' value ignored in line {}.",lineNum);
634                                                                                        warnIsDomainAssembly--;
635                                                                                } else if(warnIsDomainAssembly == 0) {
636                                                                                        logger.info("Deprecated 'IS_DOMAIN_ASSEMBLY' value ignored in line {}. Not printing future similar warnings.",lineNum);
637                                                                                        warnIsDomainAssembly--;
638                                                                                }
639                                                                                //assemblyId = null;
640                                                                        } else {
641                                                                                assemblyId = Long.parseLong(assemblyStr);
642                                                                        }
643
644                                                                        String ligandStr = fields[i++];
645                                                                        Set<String> ligands = null;
646                                                                        if( ligandStr.equals("NO_LIGANDS_4A") || ligandStr.isEmpty() ) {
647                                                                                ligands = Collections.emptySet();
648                                                                        } else {
649                                                                                String[] ligSplit = ligandStr.split(",");
650                                                                                ligands = new LinkedHashSet<String>(ligSplit.length);
651                                                                                for(String s : ligSplit) {
652                                                                                        ligands.add(s.intern());
653                                                                                }
654                                                                        }
655
656
657                                                                        EcodDomain domain = new EcodDomain(uid, domainId, manual, xGroup, hGroup, tGroup, fGroup,pdbId, chainId, range, seqId, architectureName, xGroupName, hGroupName, tGroupName, fGroupName, assemblyId, ligands);
658                                                                        domainsList.add(domain);
659                                                                } catch(NumberFormatException e) {
660                                                                        logger.warn("Error in ECOD parsing at line "+lineNum,e);
661                                                                }
662                                                        } else {
663                                                                if(warnNumberOfFields > 1) {
664                                                                        logger.warn("Unexpected number of fields in line {}.",lineNum);
665                                                                        warnNumberOfFields--;
666                                                                } else if(warnNumberOfFields == 0) {
667                                                                        logger.warn("Unexpected number of fields in line {}. Not printing future similar warnings",lineNum);
668                                                                        warnIsDomainAssembly--;
669                                                                }
670                                                        }
671                                                }
672                                        }
673
674                                        line = in.readLine();
675                                        lineNum++;
676                                }
677                                if(this.version == null)
678                                        logger.info("Parsed {} ECOD domains",domainsList.size());
679                                else
680                                        logger.info("Parsed {} ECOD domains from version {}",domainsList.size(),this.version);
681
682
683                                this.domains = Collections.unmodifiableList( domainsList );
684
685                        } finally {
686                                if(in != null) {
687                                        in.close();
688                                }
689                        }
690                }
691
692                /**
693                 * @return a list of all EcodDomains
694                 */
695                public List<EcodDomain> getDomains() {
696                        return domains;
697                }
698
699                /**
700                 * @return the requestedVersion for this file, or null if none was parsed
701                 */
702                public String getVersion() {
703                        return version;
704                }
705        }
706
707
708        @Override
709        public String toString() {
710                String version = null;
711                try {
712                        version = getVersion();
713                } catch (IOException e) {
714                        // For parsing errors, use the requested version
715                        version = requestedVersion;
716                }
717
718                return "EcodInstallation [cacheLocation=" + cacheLocation
719                                + ", version=" + version + "]";
720        }
721
722        public static void main(String[] args) {
723                if( args.length!= 1) {
724                        System.out.println("usage: ecod_domains.txt");
725                        System.exit(1); return;
726                }
727
728                String filename = args[0];
729
730                try {
731                        EcodParser parser = new EcodParser(filename);
732
733                        List<EcodDomain> domains = parser.getDomains();
734
735                        System.out.format("Found %d ECOD domains.%n",domains.size());
736
737                        System.out.println("First 10 domains:");
738                        int i = 0;
739                        for(EcodDomain d: domains) {
740                                if( i>10) break;
741
742                                System.out.println(d.getDomainId());
743                                i++;
744                        }
745                } catch (IOException e) {
746                        e.printStackTrace();
747                }
748        }
749}