001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.align.util;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.Collection;
026import java.util.Collections;
027import java.util.List;
028import java.util.TreeSet;
029
030import org.biojava.nbio.core.util.InputStreamProvider;
031import org.biojava.nbio.structure.*;
032import org.biojava.nbio.structure.align.client.StructureName;
033import org.biojava.nbio.structure.cath.CathDatabase;
034import org.biojava.nbio.structure.cath.CathDomain;
035import org.biojava.nbio.structure.cath.CathFactory;
036import org.biojava.nbio.structure.io.BcifFileReader;
037import org.biojava.nbio.structure.io.CifFileReader;
038import org.biojava.nbio.structure.io.FileParsingParameters;
039import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior;
040import org.biojava.nbio.structure.io.LocalPDBDirectory.ObsoleteBehavior;
041import org.biojava.nbio.structure.io.MMTFFileReader;
042import org.biojava.nbio.structure.io.PDBFileReader;
043import org.biojava.nbio.core.util.FileDownloadUtils;
044import org.biojava.nbio.structure.io.StructureFiletype;
045import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder;
046import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
047import org.biojava.nbio.structure.scop.ScopDatabase;
048import org.biojava.nbio.structure.scop.ScopDescription;
049import org.biojava.nbio.structure.scop.ScopDomain;
050import org.biojava.nbio.structure.scop.ScopFactory;
051import org.slf4j.Logger;
052import org.slf4j.LoggerFactory;
053
054/**
055 * A utility class that provides easy access to Structure objects. If you are running a script that is frequently
056 * re-using the same PDB structures, the AtomCache keeps an in-memory cache of the files for quicker access. The cache
057 * is a soft-cache, this means it won't cause out of memory exceptions, but garbage collects the data if the Java
058 * virtual machine needs to free up space. The AtomCache is thread-safe.
059 *
060 * @author Andreas Prlic
061 * @author Spencer Bliven
062 * @author Peter Rose
063 * @since 3.0
064 */
065public class AtomCache {
066        private static final Logger logger = LoggerFactory.getLogger(AtomCache.class);
067
068        /**
069         * The default output bioassembly style: if true the bioassemblies are multimodel,
070         * if false the bioassemblies are flat with renamed chains for symmetry-partners.
071         */
072        public static final boolean DEFAULT_BIOASSEMBLY_STYLE = false;
073
074        public static final String BIOL_ASSEMBLY_IDENTIFIER = "BIO:";
075        public static final String CHAIN_NR_SYMBOL = ":";
076        public static final String CHAIN_SPLIT_SYMBOL = ".";
077        public static final String UNDERSCORE = "_";
078
079        private static final String FILE_SEPARATOR = System.getProperty("file.separator");
080
081        protected FileParsingParameters params;
082        private FetchBehavior fetchBehavior;
083        private ObsoleteBehavior obsoleteBehavior;
084        private String cachePath;
085
086        // make sure IDs are loaded uniquely
087        private final Collection<String> currentlyLoading = Collections.synchronizedCollection(new TreeSet<>());
088
089        private String path;
090        private StructureFiletype filetype = StructureFiletype.BCIF;
091
092        /**
093         * Default AtomCache constructor.
094         *
095         * Usually stores files in a temp directory, but this can be overriden by setting the PDB_DIR variable at runtime.
096         *
097         * @see UserConfiguration#UserConfiguration()
098         */
099        public AtomCache() {
100                this(new UserConfiguration());
101        }
102
103        /**
104         * Creates an instance of an AtomCache that is pointed to the a particular path in the file system. It will use the same value for pdbFilePath and cachePath.
105         *
106         * @param pdbFilePath
107         *            a directory in the file system to use as a location to cache files.
108         */
109        public AtomCache(String pdbFilePath) {
110                this(pdbFilePath,pdbFilePath);
111        }
112
113        /**
114         * Creates an instance of an AtomCache that is pointed to the a particular path in the file system.
115         *
116         * @param pdbFilePath
117         *            a directory in the file system to use as a location to cache files.
118         * @param cachePath
119         */
120        public AtomCache(String pdbFilePath, String cachePath) {
121                logger.debug("Initialising AtomCache with pdbFilePath={}, cachePath={}", pdbFilePath, cachePath);
122                if (!pdbFilePath.endsWith(FILE_SEPARATOR)) {
123                        pdbFilePath += FILE_SEPARATOR;
124                }
125
126                // we are caching the binary files that contain the PDBs gzipped
127                // that is the most memory efficient way of caching...
128                // set the input stream provider to caching mode
129                System.setProperty(InputStreamProvider.CACHE_PROPERTY, "true");
130
131                setPath(pdbFilePath);
132
133                this.cachePath = cachePath;
134
135                fetchBehavior = FetchBehavior.DEFAULT;
136                obsoleteBehavior = ObsoleteBehavior.DEFAULT;
137
138                currentlyLoading.clear();
139                params = new FileParsingParameters();
140
141                setFiletype(StructureFiletype.BCIF);
142        }
143
144        /**
145         * Creates a new AtomCache object based on the provided UserConfiguration.
146         *
147         * @param config
148         *            the UserConfiguration to use for this cache.
149         */
150        public AtomCache(UserConfiguration config) {
151                this(config.getPdbFilePath(), config.getCacheFilePath());
152                fetchBehavior = config.getFetchBehavior();
153                obsoleteBehavior = config.getObsoleteBehavior();
154                filetype = config.getStructureFiletype();
155        }
156
157        /**
158         * Returns the CA atoms for the provided name. See {@link #getStructure(String)} for supported naming conventions.
159         * <p>
160         * This method only works with protein chains. Use {@link #getRepresentativeAtoms(String)}
161         * for a more general solution.
162         * @param name
163         * @return an array of Atoms.
164         * @throws IOException
165         * @throws StructureException
166         * @see
167         */
168        public Atom[] getAtoms(String name) throws IOException, StructureException {
169                return getAtoms(new StructureName(name));
170        }
171
172        public Atom[] getAtoms(StructureIdentifier name) throws IOException, StructureException {
173                Atom[] atoms;
174
175                // System.out.println("loading " + name);
176                Structure s = getStructure(name);
177                atoms = StructureTools.getAtomCAArray(s);
178
179                /*
180                 * synchronized (cache){ cache.put(name, atoms); }
181                 */
182                return atoms;
183        }
184
185        /**
186         * Returns the representative atoms for the provided name.
187         * See {@link #getStructure(String)} for supported naming conventions.
188         *
189         * @param name
190         * @return an array of Atoms.
191         * @throws IOException
192         * @throws StructureException
193         * @see
194         */
195        public Atom[] getRepresentativeAtoms(String name) throws IOException, StructureException {
196                return getRepresentativeAtoms(new StructureName(name));
197        }
198
199        public Atom[] getRepresentativeAtoms(StructureIdentifier name) throws IOException, StructureException {
200                Atom[] atoms;
201
202                Structure s = getStructure(name);
203                atoms = StructureTools.getRepresentativeAtomArray(s);
204
205                /*
206                 * synchronized (cache){ cache.put(name, atoms); }
207                 */
208                return atoms;
209        }
210
211        /**
212         * Returns the biological assembly for a given PDB ID and bioAssemblyId, by building the
213         * assembly from the biounit annotations found in {@link Structure#getPDBHeader()}
214         * <p>
215         * Note, the number of available biological unit files
216         * varies. Many entries don't have a biological assembly specified (e.g. NMR structures), many entries have only one
217         * biological assembly (bioAssemblyId=1), and some structures have multiple biological assemblies.
218         *
219         * @param pdbId
220         *            the PDB ID
221         * @param bioAssemblyId
222         *            the 1-based index of the biological assembly (0 gets the asymmetric unit)
223         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
224         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
225         * @return a structure object
226         * @throws IOException
227         * @throws StructureException if biassemblyId < 0 or other problems while loading structure
228         * @since 3.2
229         */
230        public Structure getBiologicalAssembly(String pdbId, int bioAssemblyId, boolean multiModel)
231                        throws StructureException, IOException {
232                return getBiologicalAssembly(new PdbId(pdbId), bioAssemblyId, multiModel);
233        }
234        
235        /**
236         * Returns the biological assembly for a given PDB ID and bioAssemblyId, by building the
237         * assembly from the biounit annotations found in {@link Structure#getPDBHeader()}
238         * <p>
239         * Note, the number of available biological unit files
240         * varies. Many entries don't have a biological assembly specified (e.g. NMR structures), many entries have only one
241         * biological assembly (bioAssemblyId=1), and some structures have multiple biological assemblies.
242         *
243         * @param pdbId
244         *            the PDB ID
245         * @param bioAssemblyId
246         *            the 1-based index of the biological assembly (0 gets the asymmetric unit)
247         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
248         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
249         * @return a structure object
250         * @throws IOException
251         * @throws StructureException if biassemblyId < 0 or other problems while loading structure
252         * @since 6.0.0
253         */
254        public Structure getBiologicalAssembly(PdbId pdbId, int bioAssemblyId, boolean multiModel)
255                        throws StructureException, IOException {
256                if (bioAssemblyId < 0) {
257                        throw new StructureException("bioAssemblyID must be nonnegative: " + pdbId + " bioAssemblyId "
258                                        + bioAssemblyId);
259                }
260
261                boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly();
262
263                if (!getFileParsingParams().isParseBioAssembly()) {
264                        getFileParsingParams().setParseBioAssembly(true);
265                }
266
267                Structure asymUnit = getStructureForPdbId(pdbId);
268
269                getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly);
270
271                if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies() == null) {
272                        logger.info("No bioassembly information found for {}, returning asymmetric unit as biological assembly", pdbId);
273                        return asymUnit;
274                }
275
276                // 0 ... asym unit
277                if (bioAssemblyId == 0) {
278                        logger.info("Requested biological assembly 0 for PDB id {}, returning asymmetric unit", pdbId);
279                        return asymUnit;
280                }
281                // does it exist?
282                if (!asymUnit.getPDBHeader().getBioAssemblies().containsKey(bioAssemblyId)) {
283                        throw new StructureException("No biological assembly available for biological assembly id " + bioAssemblyId + " of " + pdbId);
284                }
285
286                List<BiologicalAssemblyTransformation> transformations =
287                                asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms();
288
289
290                if (transformations == null || transformations.size() == 0) {
291                        throw new StructureException("Could not load transformations to recreate biological assembly id " + bioAssemblyId + " of " + pdbId);
292                }
293
294                BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
295
296                // if we use mmcif or mmtf, then we need to pass useAsymIds=true
297                boolean useAsymIds = false;
298                if (filetype == StructureFiletype.CIF || filetype == StructureFiletype.BCIF || filetype == StructureFiletype.MMTF) {
299                        useAsymIds = true;
300                }
301                return builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel);
302        }
303
304        /**
305         * Returns the default biological unit (bioassemblyId=1, known in PDB as pdb1.gz). If it is not available,
306         * the asymmetric unit will be returned, e.g. for NMR structures.
307         *
308         * <p>Biological assemblies can also be accessed using
309         * <tt>getStructure("BIO:<i>[pdbId]</i>")</tt>
310         * @param pdbId the PDB id
311         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
312         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
313         * @return a structure object
314         * @throws IOException
315         * @throws StructureException
316         * @since 4.2
317         */
318        public Structure getBiologicalAssembly(String pdbId, boolean multiModel) throws StructureException, IOException {
319                boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly();
320
321                if (!getFileParsingParams().isParseBioAssembly()) {
322                        getFileParsingParams().setParseBioAssembly(true);
323                }
324
325                Structure asymUnit = getStructureForPdbId(pdbId);
326                getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly);
327
328
329                if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies() == null) {
330                        logger.info("No bioassembly information found for {}, returning asymmetric unit as biological assembly", pdbId);
331                        return asymUnit;
332                }
333
334                int bioAssemblyId = 1;
335
336                // does it exist?
337                if (!asymUnit.getPDBHeader().getBioAssemblies().containsKey(bioAssemblyId)) {
338                        return asymUnit;
339                }
340
341                List<BiologicalAssemblyTransformation> transformations =
342                                asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms();
343
344
345                if (transformations == null || transformations.size() == 0) {
346                        throw new StructureException("Could not load transformations to recreate biological assembly id " + bioAssemblyId + " of " + pdbId);
347                }
348
349                BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
350
351                // if we use mmcif or mmtf, then we need to pass useAsymIds=true
352                boolean useAsymIds = false;
353                if (filetype == StructureFiletype.CIF || filetype == StructureFiletype.BCIF || filetype == StructureFiletype.MMTF) {
354                        useAsymIds = true;
355                }
356                return builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel);
357        }
358
359        /**
360         * Returns all biological assemblies for given PDB id.
361         * @param pdbId
362         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
363         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
364         * @return
365         * @throws StructureException
366         * @throws IOException
367         * @since 5.0
368         */
369        public List<Structure> getBiologicalAssemblies(String pdbId, boolean multiModel) throws StructureException, IOException {
370                List<Structure> assemblies = new ArrayList<>();
371
372                boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly();
373
374                if (!getFileParsingParams().isParseBioAssembly()) {
375                        getFileParsingParams().setParseBioAssembly(true);
376                }
377
378                Structure asymUnit = getStructureForPdbId(pdbId);
379                getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly);
380
381                if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies() == null) {
382                        logger.info("No bioassembly information found for {}, returning asymmetric unit as the only biological assembly", pdbId);
383                        assemblies.add(asymUnit);
384                        return assemblies;
385                }
386
387                for (int bioAssemblyId : asymUnit.getPDBHeader().getBioAssemblies().keySet()) {
388                        List<BiologicalAssemblyTransformation> transformations =
389                                        asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms();
390
391                        if (transformations == null || transformations.size() == 0) {
392                                logger.info("Could not load transformations to recreate biological assembly id {} of {}. Assembly " +
393                                                "id will be missing in biological assemblies.", bioAssemblyId, pdbId);
394                                continue;
395                        }
396
397                        BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
398
399                        // if we use mmcif or mmtf, then we need to pass useAsymIds=true
400                        boolean useAsymIds = false;
401                        if (filetype == StructureFiletype.CIF || filetype == StructureFiletype.BCIF || filetype == StructureFiletype.MMTF) {
402                                useAsymIds = true;
403                        }
404                        Structure s = builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel);
405                        assemblies.add(s);
406                }
407                return assemblies;
408        }
409
410        /**
411         * Returns the path that contains the caching file for utility data, such as domain definitions.
412         *
413         * @return
414         */
415        public String getCachePath() {
416                return cachePath;
417        }
418
419        public FileParsingParameters getFileParsingParams() {
420                return params;
421        }
422
423        /**
424         * Get the path that is used to cache PDB files.
425         *
426         * @return path to a directory
427         */
428        public String getPath() {
429                return path;
430        }
431
432        /**
433         * Request a Structure based on a <i>name</i>.
434         *
435         * <pre>
436         *              Formal specification for how to specify the <i>name</i>:
437         *
438         *              name     := pdbID
439         *                             | pdbID '.' chainID
440         *                             | pdbID '.' range
441         *                             | scopID
442         *              range         := '('? range (',' range)? ')'?
443         *                             | chainID
444         *                             | chainID '_' resNum '-' resNum
445         *              pdbID         := [1-9][a-zA-Z0-9]{3}
446         *                             | PDB_[a-zA-Z0-9]{8}
447         *              chainID       := [a-zA-Z0-9]
448         *              scopID        := 'd' pdbID [a-z_][0-9_]
449         *              resNum        := [-+]?[0-9]+[A-Za-z]?
450         *
451         *
452         *              Example structures:
453         *              1TIM                 #whole structure
454         *              4HHB.C               #single chain
455         *              4GCR.A_1-83          #one domain, by residue number
456         *              3AA0.A,B             #two chains treated as one structure
457         *              PDB_00001TIM         #whole structure (extended format)
458         *              PDB_00004HHB.C       #single chain (extended format)
459         *              PDB_00004GCR.A_1-83  #one domain, by residue number (extended format)
460         *              PDB_00003AA0.A,B     #two chains treated as one structure (extended format)
461         *              d2bq6a1              #scop domain
462         * </pre>
463         *
464         * With the additional set of rules:
465         *
466         * <ul>
467         * <li>If only a PDB code is provided, the whole structure will be return including ligands, but the first model
468         * only (for NMR).
469         * <li>Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A</li>
470         * <li>To specify a SCOP domain write a scopId e.g. d2bq6a1. Some flexibility can be allowed in SCOP domain names,
471         * see {@link #setStrictSCOP(boolean)}</li>
472         * <li>URLs are accepted as well</li>
473         * </ul>
474         *
475         * <p>Note that this method should not be used in StructureIdentifier
476         * implementations to avoid circular calls.
477         * @param name
478         * @return a Structure object, or null if name appears improperly formated (eg too short, etc)
479         * @throws IOException
480         *             The PDB file cannot be cached due to IO errors
481         * @throws StructureException
482         *             The name appeared valid but did not correspond to a structure. Also thrown by some submethods upon
483         *             errors, eg for poorly formatted subranges.
484         */
485        public Structure getStructure(String name) throws IOException, StructureException {
486                StructureName structureName = new StructureName(name);
487                return getStructure(structureName);
488        }
489
490        /**
491         * Get the structure corresponding to the given {@link StructureIdentifier}.
492         * Equivalent to calling {@link StructureIdentifier#loadStructure(AtomCache)}
493         * followed by {@link StructureIdentifier#reduce(Structure)}.
494         *
495         * <p>Note that this method should not be used in StructureIdentifier
496         * implementations to avoid circular calls.
497         * @param strucId
498         * @return
499         * @throws IOException
500         * @throws StructureException
501         */
502        public Structure getStructure(StructureIdentifier strucId) throws IOException, StructureException {
503                Structure s = strucId.loadStructure(this);
504                Structure r = strucId.reduce(s);
505                r.setStructureIdentifier(strucId);
506                return r;
507        }
508
509        /**
510         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
511         *
512         * @param domain
513         *            a SCOP domain
514         * @return a Structure object
515         * @throws IOException
516         * @throws StructureException
517         */
518        public Structure getStructureForDomain(ScopDomain domain) throws IOException, StructureException {
519                return getStructureForDomain(domain, ScopFactory.getSCOP());
520        }
521
522        /**
523         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
524         *
525         * @param domain
526         *            a SCOP domain
527         * @param scopDatabase
528         *            A {@link ScopDatabase} to use
529         * @return a Structure object
530         * @throws IOException
531         * @throws StructureException
532         */
533        public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase) throws IOException,
534                        StructureException {
535                return getStructureForDomain(domain, scopDatabase, false);
536        }
537
538        /**
539         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
540         *
541         * @param domain
542         *            a SCOP domain
543         * @param scopDatabase
544         *            A {@link ScopDatabase} to use
545         * @param strictLigandHandling
546         *            If set to false, hetero-atoms are included if and only if they belong to a chain to which the SCOP
547         *            domain belongs; if set to true, hetero-atoms are included if and only if they are strictly within the
548         *            definition (residue numbers) of the SCOP domain
549         * @return a Structure object
550         * @throws IOException
551         * @throws StructureException
552         */
553        public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase, boolean strictLigandHandling)
554                        throws IOException, StructureException {
555                PdbId pdbId = domain.getPdbId();
556                Structure fullStructure = getStructureForPdbId(pdbId);
557                Structure structure = domain.reduce(fullStructure);
558
559                // TODO It would be better to move all of this into the reduce method,
560                // but that would require ligand handling properties in StructureIdentifiers
561
562                // because ligands sometimes occur after TER records in PDB files, we may need to add some ligands back in
563                // specifically, we add a ligand if and only if it occurs within the domain
564                AtomPositionMap map = null;
565                List<ResidueRangeAndLength> rrs = null;
566                if (strictLigandHandling) {
567                        map = new AtomPositionMap(StructureTools.getAllAtomArray(fullStructure), AtomPositionMap.ANYTHING_MATCHER);
568                        rrs = ResidueRangeAndLength.parseMultiple(domain.getRanges(), map);
569                }
570                for (Chain chain : fullStructure.getNonPolyChains()) {
571                        if (!structure.hasPdbChain(chain.getName())) {
572                                continue; // we can't do anything with a chain our domain
573                        }
574
575                        Chain newChain;
576                        if (!structure.hasNonPolyChain(chain.getId())) {
577                                newChain = new ChainImpl();
578                                newChain.setId(chain.getId());
579                                newChain.setName(chain.getName());
580                                newChain.setEntityInfo(chain.getEntityInfo());
581                                structure.addChain(newChain);
582                        } else {
583                                newChain = structure.getNonPolyChain(chain.getId());
584                        }
585
586                        List<Group> ligands = StructureTools.filterLigands(chain.getAtomGroups());
587                        for (Group group : ligands) {
588                                boolean shouldContain = true;
589                                if (strictLigandHandling) {
590                                        shouldContain = false; // whether the ligand occurs within the domain
591                                        for (ResidueRange rr : rrs) {
592                                                if (rr.contains(group.getResidueNumber(), map)) {
593                                                        shouldContain = true;
594                                                }
595                                        }
596                                }
597                                boolean alreadyContains = newChain.getAtomGroups().contains(group); // we don't want to add duplicate
598                                                                                                                                                                        // ligands
599                                if (shouldContain && !alreadyContains) {
600                                        newChain.addGroup(group);
601                                }
602                        }
603                }
604
605                // build a more meaningful description for the new structure
606                StringBuilder header = new StringBuilder();
607                header.append(domain.getClassificationId());
608                if (scopDatabase != null) {
609                        int sf = domain.getSuperfamilyId();
610                        ScopDescription description = scopDatabase.getScopDescriptionBySunid(sf);
611                        if (description != null) {
612                                header.append(" | ");
613                                header.append(description.getDescription());
614                        }
615                }
616                structure.getPDBHeader().setDescription(header.toString());
617
618                return structure;
619        }
620
621        /**
622         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
623         *
624         * @param scopId
625         *            a SCOP Id
626         * @return a Structure object
627         * @throws IOException
628         * @throws StructureException
629         */
630        public Structure getStructureForDomain(String scopId) throws IOException, StructureException {
631                return getStructureForDomain(scopId, ScopFactory.getSCOP());
632        }
633
634        /**
635         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
636         *
637         * @param scopId
638         *            a SCOP Id
639         * @param scopDatabase
640         *            A {@link ScopDatabase} to use
641         * @return a Structure object
642         * @throws IOException
643         * @throws StructureException
644         */
645        public Structure getStructureForDomain(String scopId, ScopDatabase scopDatabase) throws IOException,
646                        StructureException {
647                ScopDomain domain = scopDatabase.getDomainByScopID(scopId);
648                return getStructureForDomain(domain, scopDatabase);
649        }
650
651        /**
652         * set the location at which utility data should be cached.
653         *
654         * @param cachePath
655         */
656        public void setCachePath(String cachePath) {
657                this.cachePath = cachePath;
658        }
659
660        public void setFileParsingParams(FileParsingParameters params) {
661                this.params = params;
662        }
663
664        /**
665         * <b>[Optional]</b> This method changes the behavior when obsolete entries
666         * are requested. Current behaviors are:
667         * <ul>
668         * <li>{@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION}
669         *   Throw a {@link StructureException} (the default)
670         * <li>{@link ObsoleteBehavior#FETCH_OBSOLETE FETCH_OBSOLETE}
671         *   Load the requested ID from the PDB's obsolete repository
672         * <li>{@link ObsoleteBehavior#FETCH_CURRENT FETCH_CURRENT}
673         *   Load the most recent version of the requested structure
674         *
675         * <p>This setting may be silently ignored by implementations which do not have
676         * access to the server to determine whether an entry is obsolete, such as
677         * if {@link #isAutoFetch()} is false. Note that an obsolete entry may still be
678         * returned even this is FETCH_CURRENT if the entry is found locally.
679         *
680         * @param fetchFileEvenIfObsolete Whether to fetch obsolete records
681         * @see #setFetchCurrent(boolean)
682         * @since 4.0.0
683         */
684        public void setObsoleteBehavior(ObsoleteBehavior behavior) {
685                obsoleteBehavior = behavior;
686        }
687
688        /**
689         * Returns how this instance deals with obsolete entries. Note that this
690         * setting may be ignored by some implementations or in some situations,
691         * such as when {@link #isAutoFetch()} is false.
692         *
693         * <p>For most implementations, the default value is
694         * {@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION}.
695         *
696         * @return The ObsoleteBehavior
697         * @since 4.0.0
698         */
699        public ObsoleteBehavior getObsoleteBehavior() {
700                return obsoleteBehavior;
701        }
702
703        /**
704         * Get the behavior for fetching files from the server
705         * @return
706         */
707        public FetchBehavior getFetchBehavior() {
708                return fetchBehavior;
709        }
710
711        /**
712         * Set the behavior for fetching files from the server
713         * @param fetchBehavior
714         */
715        public void setFetchBehavior(FetchBehavior fetchBehavior) {
716                this.fetchBehavior = fetchBehavior;
717        }
718
719        /**
720         * Set the path that is used to cache PDB files.
721         *
722         * @param path
723         *            to a directory
724         */
725        public void setPath(String path) {
726                this.path = FileDownloadUtils.expandUserHome(path);
727        }
728
729        /**
730         * Returns the currently active file type that will be parsed.
731         * @return a StructureFiletype
732         */
733        public StructureFiletype getFiletype() {
734                return filetype;
735        }
736
737        /**
738         * Set the file type that will be parsed.
739         * @param filetype a StructureFiletype
740         */
741        public void setFiletype(StructureFiletype filetype) {
742                this.filetype = filetype;
743        }
744
745        private boolean checkLoading(PdbId pdbId) {
746                return currentlyLoading.contains(pdbId.getId());
747        }
748
749        /**
750         * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the the {@link CathDatabase}
751         * at {@link CathFactory#getCathDatabase()}.
752         */
753        public Structure getStructureForCathDomain(StructureName structureName) throws IOException, StructureException {
754                return getStructureForCathDomain(structureName, CathFactory.getCathDatabase());
755        }
756
757        /**
758         * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the specified {@link CathDatabase}.
759         */
760        public Structure getStructureForCathDomain(StructureName structureName, CathDatabase cathInstall) throws IOException, StructureException {
761                CathDomain cathDomain = cathInstall.getDomainByCathId(structureName.getIdentifier());
762
763                Structure s = getStructureForPdbId(cathDomain.getIdentifier());
764                Structure n = cathDomain.reduce(s);
765
766                // add the ligands of the chain...
767                Chain newChain = n.getPolyChainByPDB(structureName.getChainId());
768                List<Chain> origChains = s.getNonPolyChainsByPDB(structureName.getChainId());
769                for (Chain origChain : origChains) {
770                        List<Group> ligands = origChain.getAtomGroups();
771
772                        for (Group g : ligands) {
773                                if (!newChain.getAtomGroups().contains(g)) {
774                                        newChain.addGroup(g);
775                                }
776                        }
777                }
778
779                return n;
780        }
781
782        protected void flagLoading(PdbId pdbId) {
783                String id = pdbId.getId();
784                if (!currentlyLoading.contains(id)) {
785                        currentlyLoading.add(id);
786                }
787        }
788
789        protected void flagLoadingFinished(PdbId pdbId) {
790                currentlyLoading.remove(pdbId.getId());
791        }
792
793        /**
794         * Loads a structure directly by PDB ID
795         * @param pdbId
796         * @return
797         * @throws IOException
798         * @throws StructureException
799         */
800        public Structure getStructureForPdbId(String id) throws IOException, StructureException {
801                if (id == null)
802                        return null;
803                return getStructureForPdbId(new PdbId(id));
804        }
805        /**
806         * Loads a structure directly by PDB ID
807         * @param pdbId
808         * @return
809         * @throws IOException
810         * @throws StructureException
811         */
812        public Structure getStructureForPdbId(PdbId pdbId) throws IOException {
813                if (pdbId == null)
814                        return null;
815                
816                while (checkLoading(pdbId)) {
817                        // waiting for loading to be finished...
818                        try {
819                                Thread.sleep(100);
820                        } catch (InterruptedException e) {
821                                logger.error(e.getMessage());
822                        }
823                }
824
825                switch (filetype) {
826                        case CIF:
827                                logger.debug("loading from mmcif");
828                                return loadStructureFromCifByPdbId(pdbId);
829                        case BCIF:
830                                logger.debug("loading from bcif");
831                                return loadStructureFromBcifByPdbId(pdbId);
832                        case MMTF:
833                                logger.debug("loading from mmtf");
834                                return loadStructureFromMmtfByPdbId(pdbId);
835                        case PDB: default:
836                                logger.debug("loading from pdb");
837                                return loadStructureFromPdbByPdbId(pdbId);
838                }
839        }
840
841        
842        protected Structure loadStructureFromMmtfByPdbId(String pdbId) throws IOException {
843                return loadStructureFromMmtfByPdbId(new PdbId(pdbId));
844        }
845
846        /**
847         * Load a {@link Structure} from MMTF either from the local file system.
848         * @param pdbId the input PDB id
849         * @return the {@link Structure} object of the parsed structure
850         * @throws IOException error reading from Web or file system
851         */
852        protected Structure loadStructureFromMmtfByPdbId(PdbId pdbId) throws IOException {
853                logger.debug("Loading structure {} from mmtf file.", pdbId);
854                MMTFFileReader reader = new MMTFFileReader();
855                reader.setFetchBehavior(fetchBehavior);
856                reader.setObsoleteBehavior(obsoleteBehavior);
857                return reader.getStructureById(pdbId);
858        }
859
860        protected Structure loadStructureFromCifByPdbId(String pdbId) throws IOException {
861                return loadStructureFromCifByPdbId(new PdbId(pdbId));
862        }
863        
864        protected Structure loadStructureFromCifByPdbId(PdbId pdbId) throws IOException {
865                logger.debug("Loading structure {} from mmCIF file {}.", pdbId, path);
866                Structure s;
867                flagLoading(pdbId);
868                try {
869                        CifFileReader reader = new CifFileReader(path);
870                        reader.setFetchBehavior(fetchBehavior);
871                        reader.setObsoleteBehavior(obsoleteBehavior);
872                        reader.setFileParsingParameters(params);
873                        s = reader.getStructureById(pdbId);
874                } finally {
875                        flagLoadingFinished(pdbId);
876                }
877
878                return s;
879        }
880
881        protected Structure loadStructureFromBcifByPdbId(String pdbId) throws IOException {
882                return loadStructureFromBcifByPdbId(new PdbId(pdbId));
883        }
884        protected Structure loadStructureFromBcifByPdbId(PdbId pdbId) throws IOException {
885                logger.debug("Loading structure {} from BinaryCIF file {}.", pdbId, path);
886                Structure s;
887                flagLoading(pdbId);
888                try {
889                        BcifFileReader reader = new BcifFileReader(path);
890                        reader.setFetchBehavior(fetchBehavior);
891                        reader.setObsoleteBehavior(obsoleteBehavior);
892                        reader.setFileParsingParameters(params);
893                        s = reader.getStructureById(pdbId);
894                } finally {
895                        flagLoadingFinished(pdbId);
896                }
897
898                return s;
899        }
900
901        protected Structure loadStructureFromPdbByPdbId(String pdbId) throws IOException {
902                return loadStructureFromPdbByPdbId(new PdbId(pdbId));
903        }
904
905        protected Structure loadStructureFromPdbByPdbId(PdbId pdbId) throws IOException {
906                logger.debug("Loading structure {} from PDB file {}.", pdbId, path);
907                Structure s;
908                flagLoading(pdbId);
909                try {
910                        PDBFileReader reader = new PDBFileReader(path);
911                        reader.setFetchBehavior(fetchBehavior);
912                        reader.setObsoleteBehavior(obsoleteBehavior);
913
914                        reader.setFileParsingParameters(params);
915
916                        s = reader.getStructureById(pdbId);
917                } finally {
918                        flagLoadingFinished(pdbId);
919                }
920
921                return s;
922        }
923}