001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.align.util;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.Collection;
026import java.util.Collections;
027import java.util.List;
028import java.util.TreeSet;
029
030import org.biojava.nbio.core.util.InputStreamProvider;
031import org.biojava.nbio.structure.*;
032import org.biojava.nbio.structure.align.client.StructureName;
033import org.biojava.nbio.structure.cath.CathDatabase;
034import org.biojava.nbio.structure.cath.CathDomain;
035import org.biojava.nbio.structure.cath.CathFactory;
036import org.biojava.nbio.structure.io.BcifFileReader;
037import org.biojava.nbio.structure.io.CifFileReader;
038import org.biojava.nbio.structure.io.FileParsingParameters;
039import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior;
040import org.biojava.nbio.structure.io.LocalPDBDirectory.ObsoleteBehavior;
041import org.biojava.nbio.structure.io.PDBFileReader;
042import org.biojava.nbio.core.util.FileDownloadUtils;
043import org.biojava.nbio.structure.io.StructureFiletype;
044import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder;
045import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
046import org.biojava.nbio.structure.scop.ScopDatabase;
047import org.biojava.nbio.structure.scop.ScopDescription;
048import org.biojava.nbio.structure.scop.ScopDomain;
049import org.biojava.nbio.structure.scop.ScopFactory;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052
053/**
054 * A utility class that provides easy access to Structure objects. If you are running a script that is frequently
055 * re-using the same PDB structures, the AtomCache keeps an in-memory cache of the files for quicker access. The cache
056 * is a soft-cache, this means it won't cause out of memory exceptions, but garbage collects the data if the Java
057 * virtual machine needs to free up space. The AtomCache is thread-safe.
058 *
059 * @author Andreas Prlic
060 * @author Spencer Bliven
061 * @author Peter Rose
062 * @since 3.0
063 */
064public class AtomCache {
065        private static final Logger logger = LoggerFactory.getLogger(AtomCache.class);
066
067        /**
068         * The default output bioassembly style: if true the bioassemblies are multimodel,
069         * if false the bioassemblies are flat with renamed chains for symmetry-partners.
070         */
071        public static final boolean DEFAULT_BIOASSEMBLY_STYLE = false;
072
073        public static final String BIOL_ASSEMBLY_IDENTIFIER = "BIO:";
074        public static final String CHAIN_NR_SYMBOL = ":";
075        public static final String CHAIN_SPLIT_SYMBOL = ".";
076        public static final String UNDERSCORE = "_";
077
078        private static final String FILE_SEPARATOR = System.getProperty("file.separator");
079
080        protected FileParsingParameters params;
081        private FetchBehavior fetchBehavior;
082        private ObsoleteBehavior obsoleteBehavior;
083        private String cachePath;
084
085        // make sure IDs are loaded uniquely
086        private final Collection<String> currentlyLoading = Collections.synchronizedCollection(new TreeSet<>());
087
088        private String path;
089        private StructureFiletype filetype = StructureFiletype.BCIF;
090
091        /**
092         * Default AtomCache constructor.
093         *
094         * Usually stores files in a temp directory, but this can be overriden by setting the PDB_DIR variable at runtime.
095         *
096         * @see UserConfiguration#UserConfiguration()
097         */
098        public AtomCache() {
099                this(new UserConfiguration());
100        }
101
102        /**
103         * Creates an instance of an AtomCache that is pointed to the a particular path in the file system. It will use the same value for pdbFilePath and cachePath.
104         *
105         * @param pdbFilePath
106         *            a directory in the file system to use as a location to cache files.
107         */
108        public AtomCache(String pdbFilePath) {
109                this(pdbFilePath,pdbFilePath);
110        }
111
112        /**
113         * Creates an instance of an AtomCache that is pointed to the a particular path in the file system.
114         *
115         * @param pdbFilePath
116         *            a directory in the file system to use as a location to cache files.
117         * @param cachePath
118         */
119        public AtomCache(String pdbFilePath, String cachePath) {
120                logger.debug("Initialising AtomCache with pdbFilePath={}, cachePath={}", pdbFilePath, cachePath);
121                if (!pdbFilePath.endsWith(FILE_SEPARATOR)) {
122                        pdbFilePath += FILE_SEPARATOR;
123                }
124
125                // we are caching the binary files that contain the PDBs gzipped
126                // that is the most memory efficient way of caching...
127                // set the input stream provider to caching mode
128                System.setProperty(InputStreamProvider.CACHE_PROPERTY, "true");
129
130                setPath(pdbFilePath);
131
132                this.cachePath = cachePath;
133
134                fetchBehavior = FetchBehavior.DEFAULT;
135                obsoleteBehavior = ObsoleteBehavior.DEFAULT;
136
137                currentlyLoading.clear();
138                params = new FileParsingParameters();
139
140                setFiletype(StructureFiletype.BCIF);
141        }
142
143        /**
144         * Creates a new AtomCache object based on the provided UserConfiguration.
145         *
146         * @param config
147         *            the UserConfiguration to use for this cache.
148         */
149        public AtomCache(UserConfiguration config) {
150                this(config.getPdbFilePath(), config.getCacheFilePath());
151                fetchBehavior = config.getFetchBehavior();
152                obsoleteBehavior = config.getObsoleteBehavior();
153                filetype = config.getStructureFiletype();
154        }
155
156        /**
157         * Returns the CA atoms for the provided name. See {@link #getStructure(String)} for supported naming conventions.
158         * <p>
159         * This method only works with protein chains. Use {@link #getRepresentativeAtoms(String)}
160         * for a more general solution.
161         * @param name
162         * @return an array of Atoms.
163         * @throws IOException
164         * @throws StructureException
165         */
166        public Atom[] getAtoms(String name) throws IOException, StructureException {
167                return getAtoms(new StructureName(name));
168        }
169
170        public Atom[] getAtoms(StructureIdentifier name) throws IOException, StructureException {
171                Atom[] atoms;
172
173                // System.out.println("loading " + name);
174                Structure s = getStructure(name);
175                atoms = StructureTools.getAtomCAArray(s);
176
177                /*
178                 * synchronized (cache){ cache.put(name, atoms); }
179                 */
180                return atoms;
181        }
182
183        /**
184         * Returns the representative atoms for the provided name.
185         * See {@link #getStructure(String)} for supported naming conventions.
186         *
187         * @param name
188         * @return an array of Atoms.
189         * @throws IOException
190         * @throws StructureException
191         */
192        public Atom[] getRepresentativeAtoms(String name) throws IOException, StructureException {
193                return getRepresentativeAtoms(new StructureName(name));
194        }
195
196        public Atom[] getRepresentativeAtoms(StructureIdentifier name) throws IOException, StructureException {
197                Atom[] atoms;
198
199                Structure s = getStructure(name);
200                atoms = StructureTools.getRepresentativeAtomArray(s);
201
202                /*
203                 * synchronized (cache){ cache.put(name, atoms); }
204                 */
205                return atoms;
206        }
207
208        /**
209         * Returns the biological assembly for a given PDB ID and bioAssemblyId, by building the
210         * assembly from the biounit annotations found in {@link Structure#getPDBHeader()}
211         * <p>
212         * Note, the number of available biological unit files
213         * varies. Many entries don't have a biological assembly specified (e.g. NMR structures), many entries have only one
214         * biological assembly (bioAssemblyId=1), and some structures have multiple biological assemblies.
215         *
216         * @param pdbId
217         *            the PDB ID
218         * @param bioAssemblyId
219         *            the 1-based index of the biological assembly (0 gets the asymmetric unit)
220         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
221         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
222         * @return a structure object
223         * @throws IOException
224         * @throws StructureException if biassemblyId &lt; 0 or other problems while loading structure
225         * @since 3.2
226         */
227        public Structure getBiologicalAssembly(String pdbId, int bioAssemblyId, boolean multiModel)
228                        throws StructureException, IOException {
229                return getBiologicalAssembly(new PdbId(pdbId), bioAssemblyId, multiModel);
230        }
231        
232        /**
233         * Returns the biological assembly for a given PDB ID and bioAssemblyId, by building the
234         * assembly from the biounit annotations found in {@link Structure#getPDBHeader()}
235         * <p>
236         * Note, the number of available biological unit files
237         * varies. Many entries don't have a biological assembly specified (e.g. NMR structures), many entries have only one
238         * biological assembly (bioAssemblyId=1), and some structures have multiple biological assemblies.
239         *
240         * @param pdbId
241         *            the PDB ID
242         * @param bioAssemblyId
243         *            the 1-based index of the biological assembly (0 gets the asymmetric unit)
244         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
245         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
246         * @return a structure object
247         * @throws IOException
248         * @throws StructureException if biassemblyId &lt; 0 or other problems while loading structure
249         * @since 6.0.0
250         */
251        public Structure getBiologicalAssembly(PdbId pdbId, int bioAssemblyId, boolean multiModel)
252                        throws StructureException, IOException {
253                if (bioAssemblyId < 0) {
254                        throw new StructureException("bioAssemblyID must be nonnegative: " + pdbId + " bioAssemblyId "
255                                        + bioAssemblyId);
256                }
257
258                boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly();
259
260                if (!getFileParsingParams().isParseBioAssembly()) {
261                        getFileParsingParams().setParseBioAssembly(true);
262                }
263
264                Structure asymUnit = getStructureForPdbId(pdbId);
265
266                getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly);
267
268                if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies() == null) {
269                        logger.info("No bioassembly information found for {}, returning asymmetric unit as biological assembly", pdbId);
270                        return asymUnit;
271                }
272
273                // 0 ... asym unit
274                if (bioAssemblyId == 0) {
275                        logger.info("Requested biological assembly 0 for PDB id {}, returning asymmetric unit", pdbId);
276                        return asymUnit;
277                }
278                // does it exist?
279                if (!asymUnit.getPDBHeader().getBioAssemblies().containsKey(bioAssemblyId)) {
280                        throw new StructureException("No biological assembly available for biological assembly id " + bioAssemblyId + " of " + pdbId);
281                }
282
283                List<BiologicalAssemblyTransformation> transformations =
284                                asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms();
285
286
287                if (transformations == null || transformations.size() == 0) {
288                        throw new StructureException("Could not load transformations to recreate biological assembly id " + bioAssemblyId + " of " + pdbId);
289                }
290
291                BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
292
293                // if we use mmcif or mmtf, then we need to pass useAsymIds=true
294                boolean useAsymIds = false;
295                if (filetype == StructureFiletype.CIF || filetype == StructureFiletype.BCIF || filetype == StructureFiletype.MMTF) {
296                        useAsymIds = true;
297                }
298                return builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel);
299        }
300
301        /**
302         * Returns the default biological unit (bioassemblyId=1, known in PDB as pdb1.gz). If it is not available,
303         * the asymmetric unit will be returned, e.g. for NMR structures.
304         *
305         * <p>Biological assemblies can also be accessed using
306         * <code>getStructure("BIO:<i>[pdbId]</i>")</code>
307         * @param pdbId the PDB id
308         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
309         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
310         * @return a structure object
311         * @throws IOException
312         * @throws StructureException
313         * @since 4.2
314         */
315        public Structure getBiologicalAssembly(String pdbId, boolean multiModel) throws StructureException, IOException {
316                boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly();
317
318                if (!getFileParsingParams().isParseBioAssembly()) {
319                        getFileParsingParams().setParseBioAssembly(true);
320                }
321
322                Structure asymUnit = getStructureForPdbId(pdbId);
323                getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly);
324
325
326                if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies() == null) {
327                        logger.info("No bioassembly information found for {}, returning asymmetric unit as biological assembly", pdbId);
328                        return asymUnit;
329                }
330
331                int bioAssemblyId = 1;
332
333                // does it exist?
334                if (!asymUnit.getPDBHeader().getBioAssemblies().containsKey(bioAssemblyId)) {
335                        return asymUnit;
336                }
337
338                List<BiologicalAssemblyTransformation> transformations =
339                                asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms();
340
341
342                if (transformations == null || transformations.size() == 0) {
343                        throw new StructureException("Could not load transformations to recreate biological assembly id " + bioAssemblyId + " of " + pdbId);
344                }
345
346                BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
347
348                // if we use mmcif or mmtf, then we need to pass useAsymIds=true
349                boolean useAsymIds = false;
350                if (filetype == StructureFiletype.CIF || filetype == StructureFiletype.BCIF || filetype == StructureFiletype.MMTF) {
351                        useAsymIds = true;
352                }
353                return builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel);
354        }
355
356        /**
357         * Returns all biological assemblies for given PDB id.
358         * @param pdbId
359         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
360         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
361         * @return
362         * @throws StructureException
363         * @throws IOException
364         * @since 5.0
365         */
366        public List<Structure> getBiologicalAssemblies(String pdbId, boolean multiModel) throws StructureException, IOException {
367                List<Structure> assemblies = new ArrayList<>();
368
369                boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly();
370
371                if (!getFileParsingParams().isParseBioAssembly()) {
372                        getFileParsingParams().setParseBioAssembly(true);
373                }
374
375                Structure asymUnit = getStructureForPdbId(pdbId);
376                getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly);
377
378                if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies() == null) {
379                        logger.info("No bioassembly information found for {}, returning asymmetric unit as the only biological assembly", pdbId);
380                        assemblies.add(asymUnit);
381                        return assemblies;
382                }
383
384                for (int bioAssemblyId : asymUnit.getPDBHeader().getBioAssemblies().keySet()) {
385                        List<BiologicalAssemblyTransformation> transformations =
386                                        asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms();
387
388                        if (transformations == null || transformations.size() == 0) {
389                                logger.info("Could not load transformations to recreate biological assembly id {} of {}. Assembly " +
390                                                "id will be missing in biological assemblies.", bioAssemblyId, pdbId);
391                                continue;
392                        }
393
394                        BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
395
396                        // if we use mmcif or mmtf, then we need to pass useAsymIds=true
397                        boolean useAsymIds = false;
398                        if (filetype == StructureFiletype.CIF || filetype == StructureFiletype.BCIF || filetype == StructureFiletype.MMTF) {
399                                useAsymIds = true;
400                        }
401                        Structure s = builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel);
402                        assemblies.add(s);
403                }
404                return assemblies;
405        }
406
407        /**
408         * Returns the path that contains the caching file for utility data, such as domain definitions.
409         *
410         * @return
411         */
412        public String getCachePath() {
413                return cachePath;
414        }
415
416        public FileParsingParameters getFileParsingParams() {
417                return params;
418        }
419
420        /**
421         * Get the path that is used to cache PDB files.
422         *
423         * @return path to a directory
424         */
425        public String getPath() {
426                return path;
427        }
428
429        /**
430         * Request a Structure based on a <i>name</i>.
431         *
432         * <pre>
433         *              Formal specification for how to specify the <i>name</i>:
434         *
435         *              name     := pdbID
436         *                             | pdbID '.' chainID
437         *                             | pdbID '.' range
438         *                             | scopID
439         *              range         := '('? range (',' range)? ')'?
440         *                             | chainID
441         *                             | chainID '_' resNum '-' resNum
442         *              pdbID         := [1-9][a-zA-Z0-9]{3}
443         *                             | PDB_[a-zA-Z0-9]{8}
444         *              chainID       := [a-zA-Z0-9]
445         *              scopID        := 'd' pdbID [a-z_][0-9_]
446         *              resNum        := [-+]?[0-9]+[A-Za-z]?
447         *
448         *
449         *              Example structures:
450         *              1TIM                 #whole structure
451         *              4HHB.C               #single chain
452         *              4GCR.A_1-83          #one domain, by residue number
453         *              3AA0.A,B             #two chains treated as one structure
454         *              PDB_00001TIM         #whole structure (extended format)
455         *              PDB_00004HHB.C       #single chain (extended format)
456         *              PDB_00004GCR.A_1-83  #one domain, by residue number (extended format)
457         *              PDB_00003AA0.A,B     #two chains treated as one structure (extended format)
458         *              d2bq6a1              #scop domain
459         * </pre>
460         *
461         * With the additional set of rules:
462         *
463         * <ul>
464         * <li>If only a PDB code is provided, the whole structure will be return including ligands, but the first model
465         * only (for NMR).
466         * <li>Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A</li>
467         * <li>To specify a SCOP domain write a scopId e.g. d2bq6a1. </li>
468         * <li>URLs are accepted as well</li>
469         * </ul>
470         *
471         * <p>Note that this method should not be used in StructureIdentifier
472         * implementations to avoid circular calls.
473         * @param name
474         * @return a Structure object, or null if name appears improperly formated (eg too short, etc)
475         * @throws IOException
476         *             The PDB file cannot be cached due to IO errors
477         * @throws StructureException
478         *             The name appeared valid but did not correspond to a structure. Also thrown by some submethods upon
479         *             errors, eg for poorly formatted subranges.
480         */
481        public Structure getStructure(String name) throws IOException, StructureException {
482                StructureName structureName = new StructureName(name);
483                return getStructure(structureName);
484        }
485
486        /**
487         * Get the structure corresponding to the given {@link StructureIdentifier}.
488         * Equivalent to calling {@link StructureIdentifier#loadStructure(AtomCache)}
489         * followed by {@link StructureIdentifier#reduce(Structure)}.
490         *
491         * <p>Note that this method should not be used in StructureIdentifier
492         * implementations to avoid circular calls.
493         * @param strucId
494         * @return
495         * @throws IOException
496         * @throws StructureException
497         */
498        public Structure getStructure(StructureIdentifier strucId) throws IOException, StructureException {
499                Structure s = strucId.loadStructure(this);
500                Structure r = strucId.reduce(s);
501                r.setStructureIdentifier(strucId);
502                return r;
503        }
504
505        /**
506         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
507         *
508         * @param domain
509         *            a SCOP domain
510         * @return a Structure object
511         * @throws IOException
512         * @throws StructureException
513         */
514        public Structure getStructureForDomain(ScopDomain domain) throws IOException, StructureException {
515                return getStructureForDomain(domain, ScopFactory.getSCOP());
516        }
517
518        /**
519         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
520         *
521         * @param domain
522         *            a SCOP domain
523         * @param scopDatabase
524         *            A {@link ScopDatabase} to use
525         * @return a Structure object
526         * @throws IOException
527         * @throws StructureException
528         */
529        public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase) throws IOException,
530                        StructureException {
531                return getStructureForDomain(domain, scopDatabase, false);
532        }
533
534        /**
535         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
536         *
537         * @param domain
538         *            a SCOP domain
539         * @param scopDatabase
540         *            A {@link ScopDatabase} to use
541         * @param strictLigandHandling
542         *            If set to false, hetero-atoms are included if and only if they belong to a chain to which the SCOP
543         *            domain belongs; if set to true, hetero-atoms are included if and only if they are strictly within the
544         *            definition (residue numbers) of the SCOP domain
545         * @return a Structure object
546         * @throws IOException
547         * @throws StructureException
548         */
549        public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase, boolean strictLigandHandling)
550                        throws IOException, StructureException {
551                PdbId pdbId = domain.getPdbId();
552                Structure fullStructure = getStructureForPdbId(pdbId);
553                Structure structure = domain.reduce(fullStructure);
554
555                // TODO It would be better to move all of this into the reduce method,
556                // but that would require ligand handling properties in StructureIdentifiers
557
558                // because ligands sometimes occur after TER records in PDB files, we may need to add some ligands back in
559                // specifically, we add a ligand if and only if it occurs within the domain
560                AtomPositionMap map = null;
561                List<ResidueRangeAndLength> rrs = null;
562                if (strictLigandHandling) {
563                        map = new AtomPositionMap(StructureTools.getAllAtomArray(fullStructure), AtomPositionMap.ANYTHING_MATCHER);
564                        rrs = ResidueRangeAndLength.parseMultiple(domain.getRanges(), map);
565                }
566                for (Chain chain : fullStructure.getNonPolyChains()) {
567                        if (!structure.hasPdbChain(chain.getName())) {
568                                continue; // we can't do anything with a chain our domain
569                        }
570
571                        Chain newChain;
572                        if (!structure.hasNonPolyChain(chain.getId())) {
573                                newChain = new ChainImpl();
574                                newChain.setId(chain.getId());
575                                newChain.setName(chain.getName());
576                                newChain.setEntityInfo(chain.getEntityInfo());
577                                structure.addChain(newChain);
578                        } else {
579                                newChain = structure.getNonPolyChain(chain.getId());
580                        }
581
582                        List<Group> ligands = StructureTools.filterLigands(chain.getAtomGroups());
583                        for (Group group : ligands) {
584                                boolean shouldContain = true;
585                                if (strictLigandHandling) {
586                                        shouldContain = false; // whether the ligand occurs within the domain
587                                        for (ResidueRange rr : rrs) {
588                                                if (rr.contains(group.getResidueNumber(), map)) {
589                                                        shouldContain = true;
590                                                }
591                                        }
592                                }
593                                boolean alreadyContains = newChain.getAtomGroups().contains(group); // we don't want to add duplicate
594                                                                                                                                                                        // ligands
595                                if (shouldContain && !alreadyContains) {
596                                        newChain.addGroup(group);
597                                }
598                        }
599                }
600
601                // build a more meaningful description for the new structure
602                StringBuilder header = new StringBuilder();
603                header.append(domain.getClassificationId());
604                if (scopDatabase != null) {
605                        int sf = domain.getSuperfamilyId();
606                        ScopDescription description = scopDatabase.getScopDescriptionBySunid(sf);
607                        if (description != null) {
608                                header.append(" | ");
609                                header.append(description.getDescription());
610                        }
611                }
612                structure.getPDBHeader().setDescription(header.toString());
613
614                return structure;
615        }
616
617        /**
618         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
619         *
620         * @param scopId
621         *            a SCOP Id
622         * @return a Structure object
623         * @throws IOException
624         * @throws StructureException
625         */
626        public Structure getStructureForDomain(String scopId) throws IOException, StructureException {
627                return getStructureForDomain(scopId, ScopFactory.getSCOP());
628        }
629
630        /**
631         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
632         *
633         * @param scopId
634         *            a SCOP Id
635         * @param scopDatabase
636         *            A {@link ScopDatabase} to use
637         * @return a Structure object
638         * @throws IOException
639         * @throws StructureException
640         */
641        public Structure getStructureForDomain(String scopId, ScopDatabase scopDatabase) throws IOException,
642                        StructureException {
643                ScopDomain domain = scopDatabase.getDomainByScopID(scopId);
644                return getStructureForDomain(domain, scopDatabase);
645        }
646
647        /**
648         * set the location at which utility data should be cached.
649         *
650         * @param cachePath
651         */
652        public void setCachePath(String cachePath) {
653                this.cachePath = cachePath;
654        }
655
656        public void setFileParsingParams(FileParsingParameters params) {
657                this.params = params;
658        }
659
660        /**
661         * <b>[Optional]</b> This method changes the behavior when obsolete entries
662         * are requested. Current behaviors are:
663         * <ul>
664         * <li>{@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION}
665         *   Throw a {@link StructureException} (the default)
666         * <li>{@link ObsoleteBehavior#FETCH_OBSOLETE FETCH_OBSOLETE}
667         *   Load the requested ID from the PDB's obsolete repository
668         * <li>{@link ObsoleteBehavior#FETCH_CURRENT FETCH_CURRENT}
669         *   Load the most recent version of the requested structure
670         * </ul>
671         *
672         * <p>This setting may be silently ignored by implementations which do not have
673         * access to the server to determine whether an entry is obsolete, such as
674         * certain {@link FetchBehavior}s. Note that an obsolete entry may still be
675         * returned even this is FETCH_CURRENT if the entry is found locally.
676         *
677         * @param behavior Whether to fetch obsolete records
678         * @since 4.0.0
679         */
680        public void setObsoleteBehavior(ObsoleteBehavior behavior) {
681                obsoleteBehavior = behavior;
682        }
683
684        /**
685         * Returns how this instance deals with obsolete entries. Note that this
686         * setting may be ignored by some implementations or in some situations,
687         * such as certain {@link FetchBehavior}s.
688         *
689         * <p>For most implementations, the default value is
690         * {@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION}.
691         *
692         * @return The ObsoleteBehavior
693         * @since 4.0.0
694         */
695        public ObsoleteBehavior getObsoleteBehavior() {
696                return obsoleteBehavior;
697        }
698
699        /**
700         * Get the behavior for fetching files from the server
701         * @return
702         */
703        public FetchBehavior getFetchBehavior() {
704                return fetchBehavior;
705        }
706
707        /**
708         * Set the behavior for fetching files from the server
709         * @param fetchBehavior
710         */
711        public void setFetchBehavior(FetchBehavior fetchBehavior) {
712                this.fetchBehavior = fetchBehavior;
713        }
714
715        /**
716         * Set the path that is used to cache PDB files.
717         *
718         * @param path
719         *            to a directory
720         */
721        public void setPath(String path) {
722                this.path = FileDownloadUtils.expandUserHome(path);
723        }
724
725        /**
726         * Returns the currently active file type that will be parsed.
727         * @return a StructureFiletype
728         */
729        public StructureFiletype getFiletype() {
730                return filetype;
731        }
732
733        /**
734         * Set the file type that will be parsed.
735         * @param filetype a StructureFiletype
736         */
737        public void setFiletype(StructureFiletype filetype) {
738                this.filetype = filetype;
739        }
740
741        private boolean checkLoading(PdbId pdbId) {
742                return currentlyLoading.contains(pdbId.getId());
743        }
744
745        /**
746         * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the the {@link CathDatabase}
747         * at {@link CathFactory#getCathDatabase()}.
748         */
749        public Structure getStructureForCathDomain(StructureName structureName) throws IOException, StructureException {
750                return getStructureForCathDomain(structureName, CathFactory.getCathDatabase());
751        }
752
753        /**
754         * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the specified {@link CathDatabase}.
755         */
756        public Structure getStructureForCathDomain(StructureName structureName, CathDatabase cathInstall) throws IOException, StructureException {
757                CathDomain cathDomain = cathInstall.getDomainByCathId(structureName.getIdentifier());
758
759                Structure s = getStructureForPdbId(cathDomain.getIdentifier());
760                Structure n = cathDomain.reduce(s);
761
762                // add the ligands of the chain...
763                Chain newChain = n.getPolyChainByPDB(structureName.getChainId());
764                List<Chain> origChains = s.getNonPolyChainsByPDB(structureName.getChainId());
765                for (Chain origChain : origChains) {
766                        List<Group> ligands = origChain.getAtomGroups();
767
768                        for (Group g : ligands) {
769                                if (!newChain.getAtomGroups().contains(g)) {
770                                        newChain.addGroup(g);
771                                }
772                        }
773                }
774
775                return n;
776        }
777
778        protected void flagLoading(PdbId pdbId) {
779                String id = pdbId.getId();
780                if (!currentlyLoading.contains(id)) {
781                        currentlyLoading.add(id);
782                }
783        }
784
785        protected void flagLoadingFinished(PdbId pdbId) {
786                currentlyLoading.remove(pdbId.getId());
787        }
788
789        /**
790         * Loads a structure directly by PDB ID
791         * @param id
792         * @return
793         * @throws IOException
794         * @throws StructureException
795         */
796        public Structure getStructureForPdbId(String id) throws IOException, StructureException {
797                if (id == null)
798                        return null;
799                return getStructureForPdbId(new PdbId(id));
800        }
801        /**
802         * Loads a structure directly by PDB ID
803         * @param pdbId
804         * @return
805         * @throws IOException
806         */
807        public Structure getStructureForPdbId(PdbId pdbId) throws IOException {
808                if (pdbId == null)
809                        return null;
810                
811                while (checkLoading(pdbId)) {
812                        // waiting for loading to be finished...
813                        try {
814                                Thread.sleep(100);
815                        } catch (InterruptedException e) {
816                                logger.error(e.getMessage());
817                        }
818                }
819
820                switch (filetype) {
821                        case CIF:
822                                logger.debug("loading from mmcif");
823                                return loadStructureFromCifByPdbId(pdbId);
824                        case BCIF:
825                                logger.debug("loading from bcif");
826                                return loadStructureFromBcifByPdbId(pdbId);
827                        case PDB: default:
828                                logger.debug("loading from pdb");
829                                return loadStructureFromPdbByPdbId(pdbId);
830                }
831        }
832
833        protected Structure loadStructureFromCifByPdbId(String pdbId) throws IOException {
834                return loadStructureFromCifByPdbId(new PdbId(pdbId));
835        }
836        
837        protected Structure loadStructureFromCifByPdbId(PdbId pdbId) throws IOException {
838                logger.debug("Loading structure {} from mmCIF file {}.", pdbId, path);
839                Structure s;
840                flagLoading(pdbId);
841                try {
842                        CifFileReader reader = new CifFileReader(path);
843                        reader.setFetchBehavior(fetchBehavior);
844                        reader.setObsoleteBehavior(obsoleteBehavior);
845                        reader.setFileParsingParameters(params);
846                        s = reader.getStructureById(pdbId);
847                } finally {
848                        flagLoadingFinished(pdbId);
849                }
850
851                return s;
852        }
853
854        protected Structure loadStructureFromBcifByPdbId(String pdbId) throws IOException {
855                return loadStructureFromBcifByPdbId(new PdbId(pdbId));
856        }
857        protected Structure loadStructureFromBcifByPdbId(PdbId pdbId) throws IOException {
858                logger.debug("Loading structure {} from BinaryCIF file {}.", pdbId, path);
859                Structure s;
860                flagLoading(pdbId);
861                try {
862                        BcifFileReader reader = new BcifFileReader(path);
863                        reader.setFetchBehavior(fetchBehavior);
864                        reader.setObsoleteBehavior(obsoleteBehavior);
865                        reader.setFileParsingParameters(params);
866                        s = reader.getStructureById(pdbId);
867                } finally {
868                        flagLoadingFinished(pdbId);
869                }
870
871                return s;
872        }
873
874        protected Structure loadStructureFromPdbByPdbId(String pdbId) throws IOException {
875                return loadStructureFromPdbByPdbId(new PdbId(pdbId));
876        }
877
878        protected Structure loadStructureFromPdbByPdbId(PdbId pdbId) throws IOException {
879                logger.debug("Loading structure {} from PDB file {}.", pdbId, path);
880                Structure s;
881                flagLoading(pdbId);
882                try {
883                        PDBFileReader reader = new PDBFileReader(path);
884                        reader.setFetchBehavior(fetchBehavior);
885                        reader.setObsoleteBehavior(obsoleteBehavior);
886
887                        reader.setFileParsingParameters(params);
888
889                        s = reader.getStructureById(pdbId);
890                } finally {
891                        flagLoadingFinished(pdbId);
892                }
893
894                return s;
895        }
896}