001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.align.util;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.Collection;
026import java.util.Collections;
027import java.util.List;
028import java.util.TreeSet;
029
030import org.biojava.nbio.core.util.InputStreamProvider;
031import org.biojava.nbio.structure.*;
032import org.biojava.nbio.structure.align.client.StructureName;
033import org.biojava.nbio.structure.cath.CathDatabase;
034import org.biojava.nbio.structure.cath.CathDomain;
035import org.biojava.nbio.structure.cath.CathFactory;
036import org.biojava.nbio.structure.domain.PDPProvider;
037import org.biojava.nbio.structure.domain.RemotePDPProvider;
038import org.biojava.nbio.structure.io.FileParsingParameters;
039import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior;
040import org.biojava.nbio.structure.io.LocalPDBDirectory.ObsoleteBehavior;
041import org.biojava.nbio.structure.io.MMCIFFileReader;
042import org.biojava.nbio.structure.io.MMTFFileReader;
043import org.biojava.nbio.structure.io.PDBFileReader;
044import org.biojava.nbio.core.util.FileDownloadUtils;
045import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder;
046import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
047import org.biojava.nbio.structure.scop.CachedRemoteScopInstallation;
048import org.biojava.nbio.structure.scop.ScopDatabase;
049import org.biojava.nbio.structure.scop.ScopDescription;
050import org.biojava.nbio.structure.scop.ScopDomain;
051import org.biojava.nbio.structure.scop.ScopFactory;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054
055/**
056 * A utility class that provides easy access to Structure objects. If you are running a script that is frequently
057 * re-using the same PDB structures, the AtomCache keeps an in-memory cache of the files for quicker access. The cache
058 * is a soft-cache, this means it won't cause out of memory exceptions, but garbage collects the data if the Java
059 * virtual machine needs to free up space. The AtomCache is thread-safe.
060 *
061 * @author Andreas Prlic
062 * @author Spencer Bliven
063 * @author Peter Rose
064 * @since 3.0
065 */
066public class AtomCache {
067
068        private static final Logger logger = LoggerFactory.getLogger(AtomCache.class);
069        
070        /**
071         * The default output bioassembly style: if true the bioassemblies are multimodel,
072         * if false the bioassemblies are flat with renamed chains for symmetry-partners.
073         */
074        public static final boolean DEFAULT_BIOASSEMBLY_STYLE = false;
075
076        public static final String BIOL_ASSEMBLY_IDENTIFIER = "BIO:";
077        public static final String CHAIN_NR_SYMBOL = ":";
078        public static final String CHAIN_SPLIT_SYMBOL = ".";
079
080        public static final String PDP_DOMAIN_IDENTIFIER = "PDP:";
081
082        public static final String UNDERSCORE = "_";
083
084        private static final String FILE_SEPARATOR = System.getProperty("file.separator");
085
086        protected FileParsingParameters params;
087        protected PDPProvider pdpprovider;
088
089        private FetchBehavior fetchBehavior;
090        private ObsoleteBehavior obsoleteBehavior;
091
092        private String cachePath;
093
094        // make sure IDs are loaded uniquely
095        private Collection<String> currentlyLoading = Collections.synchronizedCollection(new TreeSet<String>());
096
097        private String path;
098
099        private boolean useMmCif;
100        private boolean useMmtf;
101
102        /**
103         * Default AtomCache constructor.
104         *
105         * Usually stores files in a temp directory, but this can be overriden by setting the PDB_DIR variable at runtime.
106         *
107         * @see UserConfiguration#UserConfiguration()
108         */
109        public AtomCache() {
110                this(new UserConfiguration());
111        }
112
113        /**
114         * Creates an instance of an AtomCache that is pointed to the a particular path in the file system. It will use the same value for pdbFilePath and cachePath.
115         *
116         * @param pdbFilePath
117         *            a directory in the file system to use as a location to cache files.
118         */
119        public AtomCache(String pdbFilePath) {
120                this(pdbFilePath,pdbFilePath);
121        }
122
123        /**
124         * Creates an instance of an AtomCache that is pointed to the a particular path in the file system.
125         *
126         * @param pdbFilePath
127         *            a directory in the file system to use as a location to cache files.
128         * @param cachePath
129         */
130        public AtomCache(String pdbFilePath, String cachePath) {
131
132                logger.debug("Initialising AtomCache with pdbFilePath={}, cachePath={}",pdbFilePath, cachePath);
133
134                if (!pdbFilePath.endsWith(FILE_SEPARATOR)) {
135                        pdbFilePath += FILE_SEPARATOR;
136                }
137
138                // we are caching the binary files that contain the PDBs gzipped
139                // that is the most memory efficient way of caching...
140                // set the input stream provider to caching mode
141                System.setProperty(InputStreamProvider.CACHE_PROPERTY, "true");
142
143                setPath(pdbFilePath);
144
145                this.cachePath = cachePath;
146
147                fetchBehavior = FetchBehavior.DEFAULT;
148                obsoleteBehavior = ObsoleteBehavior.DEFAULT;
149
150                currentlyLoading.clear();
151                params = new FileParsingParameters();
152
153                setUseMmCif(false);
154                setUseMmtf(true);
155
156        }
157
158        /**
159         * Creates a new AtomCache object based on the provided UserConfiguration.
160         *
161         * @param config
162         *            the UserConfiguration to use for this cache.
163         */
164        public AtomCache(UserConfiguration config) {
165                this(config.getPdbFilePath(), config.getCacheFilePath());
166                fetchBehavior = config.getFetchBehavior();
167                obsoleteBehavior = config.getObsoleteBehavior();
168                useMmCif = config.getFileFormat().equals( UserConfiguration.MMCIF_FORMAT );
169
170                if ( useMmCif)
171                        useMmtf = false;
172
173        }
174
175        /**
176         * Returns the CA atoms for the provided name. See {@link #getStructure(String)} for supported naming conventions.
177         * <p>
178         * This method only works with protein chains. Use {@link #getRepresentativeAtoms(String)}
179         * for a more general solution.
180         * @param name
181         * @return an array of Atoms.
182         * @throws IOException
183         * @throws StructureException
184         * @see
185         */
186        public Atom[] getAtoms(String name) throws IOException, StructureException {
187                return getAtoms(new StructureName(name));
188        }
189        public Atom[] getAtoms(StructureIdentifier name) throws IOException, StructureException {
190
191                Atom[] atoms = null;
192
193                // System.out.println("loading " + name);
194                Structure s = getStructure(name);
195
196                atoms = StructureTools.getAtomCAArray(s);
197
198                /*
199                 * synchronized (cache){ cache.put(name, atoms); }
200                 */
201
202                return atoms;
203        }
204        /**
205         * Returns the representative atoms for the provided name.
206         * See {@link #getStructure(String)} for supported naming conventions.
207         *
208         * @param name
209         * @return an array of Atoms.
210         * @throws IOException
211         * @throws StructureException
212         * @see
213         */
214        public Atom[] getRepresentativeAtoms(String name) throws IOException, StructureException {
215                return getRepresentativeAtoms(new StructureName(name));
216        }
217        
218        public Atom[] getRepresentativeAtoms(StructureIdentifier name) throws IOException, StructureException {
219
220                Atom[] atoms = null;
221
222                Structure s = getStructure(name);
223
224                atoms = StructureTools.getRepresentativeAtomArray(s);
225
226                /*
227                 * synchronized (cache){ cache.put(name, atoms); }
228                 */
229
230                return atoms;
231        }
232        
233        /**
234         * Returns the biological assembly for a given PDB ID and bioAssemblyId, by building the 
235         * assembly from the biounit annotations found in {@link Structure#getPDBHeader()}
236         * <p>
237         * Note, the number of available biological unit files
238         * varies. Many entries don't have a biological assembly specified (e.g. NMR structures), many entries have only one
239         * biological assembly (bioAssemblyId=1), and some structures have multiple biological assemblies.
240         *
241         * @param pdbId
242         *            the PDB ID
243         * @param bioAssemblyId
244         *            the 1-based index of the biological assembly (0 gets the asymmetric unit)
245         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 
246         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).             
247         * @return a structure object
248         * @throws IOException
249         * @throws StructureException if biassemblyId < 0 or other problems while loading structure
250         * @author Peter Rose
251         * @since 3.2
252         */
253        public Structure getBiologicalAssembly(String pdbId, int bioAssemblyId, boolean multiModel)
254                        throws StructureException, IOException {
255
256                if (bioAssemblyId < 0) {
257                        throw new StructureException("bioAssemblyID must be nonnegative: " + pdbId + " bioAssemblyId "
258                                        + bioAssemblyId);
259                }
260                
261                boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly();
262                
263                if (!getFileParsingParams().isParseBioAssembly()) {
264                        getFileParsingParams().setParseBioAssembly(true);
265                }
266                
267                Structure asymUnit = getStructureForPdbId(pdbId);
268                
269                getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly);
270                
271                if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies()==null) {
272                        logger.info("No bioassembly information found for {}, returning asymmetric unit as biological assembly", pdbId);
273                        return asymUnit; 
274                }
275
276                // 0 ... asym unit
277                if ( bioAssemblyId == 0) {
278                        logger.info("Requested biological assembly 0 for PDB id "+pdbId+", returning asymmetric unit");
279                        return asymUnit;
280                }
281                // does it exist?
282                if (!asymUnit.getPDBHeader().getBioAssemblies().containsKey(bioAssemblyId)) {
283                        throw new StructureException("No biological assembly available for biological assembly id " + bioAssemblyId + " of " + pdbId);
284                }
285
286                List<BiologicalAssemblyTransformation> transformations =
287                                asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms();
288
289
290                if ( transformations == null || transformations.size() == 0){
291
292                        throw new StructureException("Could not load transformations to recreate biological assembly id " + bioAssemblyId + " of " + pdbId);
293                        
294                }
295                
296                BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
297
298                // if we use mmcif or mmtf, then we need to pass useAsymIds=true
299                boolean useAsymIds = false;
300                if (useMmCif) useAsymIds = true;
301                if (useMmtf) useAsymIds = true;
302                return builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel);
303                
304        }
305
306        /**
307         * Returns the default biological unit (bioassemblyId=1, known in PDB as pdb1.gz). If it is not available,
308         * the asymmetric unit will be returned, e.g. for NMR structures.
309         *
310         * <p>Biological assemblies can also be accessed using
311         * <tt>getStructure("BIO:<i>[pdbId]</i>")</tt>
312         * @param pdbId the PDB id
313         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 
314         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).  
315         * @return a structure object
316         * @throws IOException
317         * @throws StructureException
318         * @since 4.2
319         */
320        public Structure getBiologicalAssembly(String pdbId, boolean multiModel) throws StructureException, IOException {
321                
322                boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly();
323                
324                if (!getFileParsingParams().isParseBioAssembly()) {
325                        getFileParsingParams().setParseBioAssembly(true);
326                }
327                
328                Structure asymUnit = getStructureForPdbId(pdbId);
329                
330                getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly);
331
332                
333                if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies()==null) {
334                        logger.info("No bioassembly information found for {}, returning asymmetric unit as biological assembly", pdbId);
335                        return asymUnit; 
336                }
337
338                int bioAssemblyId = 1;
339                
340                // does it exist?
341                if (!asymUnit.getPDBHeader().getBioAssemblies().containsKey(bioAssemblyId)) {
342                        return asymUnit;
343                }
344
345                List<BiologicalAssemblyTransformation> transformations =
346                                asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms();
347
348
349                if ( transformations == null || transformations.size() == 0){
350
351                        throw new StructureException("Could not load transformations to recreate biological assembly id " + bioAssemblyId + " of " + pdbId);
352                        
353                }
354                
355                BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
356
357                // if we use mmcif or mmtf, then we need to pass useAsymIds=true
358                boolean useAsymIds = false;
359                if (useMmCif) useAsymIds = true;
360                if (useMmtf) useAsymIds = true;
361                return builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel);
362                
363        }
364
365        /**
366         * Returns all biological assemblies for given PDB id.
367         * @param pdbId
368         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 
369         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).  
370         * @return
371         * @throws StructureException
372         * @throws IOException
373         * @since 5.0
374         */
375        public List<Structure> getBiologicalAssemblies(String pdbId, boolean multiModel) throws StructureException, IOException {
376                
377                List<Structure> assemblies = new ArrayList<>();
378                
379                boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly();
380                
381                if (!getFileParsingParams().isParseBioAssembly()) {
382                        getFileParsingParams().setParseBioAssembly(true);
383                }
384                
385                Structure asymUnit = getStructureForPdbId(pdbId);
386                
387                getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly);
388                
389
390                if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies()==null) {
391                        logger.info("No bioassembly information found for {}, returning asymmetric unit as the only biological assembly", pdbId);
392                        assemblies.add(asymUnit);
393                        return assemblies; 
394                }
395
396
397                for (int bioAssemblyId : asymUnit.getPDBHeader().getBioAssemblies().keySet()) { 
398                        List<BiologicalAssemblyTransformation> transformations =
399                                        asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms();
400
401
402                        if ( transformations == null || transformations.size() == 0){
403
404                                logger.info("Could not load transformations to recreate biological assembly id " + bioAssemblyId + " of " + pdbId+". Assembly id will be missing in biological assemblies.");
405                                continue;
406                        }
407
408                        BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
409
410                        // if we use mmcif or mmtf, then we need to pass useAsymIds=true
411                        boolean useAsymIds = false;
412                        if (useMmCif) useAsymIds = true;
413                        if (useMmtf) useAsymIds = true;
414                        Structure s = builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel);
415                        assemblies.add(s);
416                }
417                return assemblies;
418        }
419        
420        /**
421         * Returns the path that contains the caching file for utility data, such as domain definitions.
422         *
423         * @return
424         */
425        public String getCachePath() {
426                return cachePath;
427        }
428
429        public FileParsingParameters getFileParsingParams() {
430                return params;
431        }
432
433        /**
434         * Get the path that is used to cache PDB files.
435         *
436         * @return path to a directory
437         */
438        public String getPath() {
439                return path;
440        }
441
442        public PDPProvider getPdpprovider() {
443                return pdpprovider;
444        }
445
446        /**
447         * Request a Structure based on a <i>name</i>.
448         *
449         * <pre>
450         *              Formal specification for how to specify the <i>name</i>:
451         *
452         *              name     := pdbID
453         *                             | pdbID '.' chainID
454         *                             | pdbID '.' range
455         *                             | scopID
456         *              range         := '('? range (',' range)? ')'?
457         *                             | chainID
458         *                             | chainID '_' resNum '-' resNum
459         *              pdbID         := [0-9][a-zA-Z0-9]{3}
460         *              chainID       := [a-zA-Z0-9]
461         *              scopID        := 'd' pdbID [a-z_][0-9_]
462         *              resNum        := [-+]?[0-9]+[A-Za-z]?
463         *
464         *
465         *              Example structures:
466         *              1TIM     #whole structure
467         *              4HHB.C     #single chain
468         *              4GCR.A_1-83     #one domain, by residue number
469         *              3AA0.A,B     #two chains treated as one structure
470         *              d2bq6a1     #scop domain
471         * </pre>
472         *
473         * With the additional set of rules:
474         *
475         * <ul>
476         * <li>If only a PDB code is provided, the whole structure will be return including ligands, but the first model
477         * only (for NMR).
478         * <li>Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A</li>
479         * <li>To specify a SCOP domain write a scopId e.g. d2bq6a1. Some flexibility can be allowed in SCOP domain names,
480         * see {@link #setStrictSCOP(boolean)}</li>
481         * <li>URLs are accepted as well</li>
482         * </ul>
483         *
484         * <p>Note that this method should not be used in StructureIdentifier
485         * implementations to avoid circular calls.
486         * @param name
487         * @return a Structure object, or null if name appears improperly formated (eg too short, etc)
488         * @throws IOException
489         *             The PDB file cannot be cached due to IO errors
490         * @throws StructureException
491         *             The name appeared valid but did not correspond to a structure. Also thrown by some submethods upon
492         *             errors, eg for poorly formatted subranges.
493         */
494        public Structure getStructure(String name) throws IOException, StructureException {
495                StructureName structureName = new StructureName(name);
496
497                return getStructure(structureName);
498        }
499
500        /**
501         * Get the structure corresponding to the given {@link StructureIdentifier}.
502         * Equivalent to calling {@link StructureIdentifier#loadStructure(AtomCache)}
503         * followed by {@link StructureIdentifier#reduce(Structure)}.
504         *
505         * <p>Note that this method should not be used in StructureIdentifier
506         * implementations to avoid circular calls.
507         * @param strucId
508         * @return
509         * @throws IOException
510         * @throws StructureException
511         */
512        public Structure getStructure(StructureIdentifier strucId) throws IOException, StructureException {
513                Structure s = strucId.loadStructure(this);
514                Structure r = strucId.reduce(s);
515                r.setStructureIdentifier(strucId);
516                return r;
517        }
518
519        /**
520         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
521         *
522         * @param domain
523         *            a SCOP domain
524         * @return a Structure object
525         * @throws IOException
526         * @throws StructureException
527         */
528        public Structure getStructureForDomain(ScopDomain domain) throws IOException, StructureException {
529                return getStructureForDomain(domain, ScopFactory.getSCOP());
530        }
531
532        /**
533         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
534         *
535         * @param domain
536         *            a SCOP domain
537         * @param scopDatabase
538         *            A {@link ScopDatabase} to use
539         * @return a Structure object
540         * @throws IOException
541         * @throws StructureException
542         */
543        public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase) throws IOException,
544                        StructureException {
545                return getStructureForDomain(domain, scopDatabase, false);
546        }
547
548        /**
549         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
550         *
551         * @param domain
552         *            a SCOP domain
553         * @param scopDatabase
554         *            A {@link ScopDatabase} to use
555         * @param strictLigandHandling
556         *            If set to false, hetero-atoms are included if and only if they belong to a chain to which the SCOP
557         *            domain belongs; if set to true, hetero-atoms are included if and only if they are strictly within the
558         *            definition (residue numbers) of the SCOP domain
559         * @return a Structure object
560         * @throws IOException
561         * @throws StructureException
562         */
563        public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase, boolean strictLigandHandling)
564                        throws IOException, StructureException {
565
566                String pdbId = domain.getPdbId();
567                Structure fullStructure = getStructureForPdbId(pdbId);
568                Structure structure = domain.reduce(fullStructure);
569
570                // TODO It would be better to move all of this into the reduce method,
571                // but that would require ligand handling properties in StructureIdentifiers
572
573                // because ligands sometimes occur after TER records in PDB files, we may need to add some ligands back in
574                // specifically, we add a ligand if and only if it occurs within the domain
575                AtomPositionMap map = null;
576                List<ResidueRangeAndLength> rrs = null;
577                if (strictLigandHandling) {
578                        map = new AtomPositionMap(StructureTools.getAllAtomArray(fullStructure), AtomPositionMap.ANYTHING_MATCHER);
579                        rrs = ResidueRangeAndLength.parseMultiple(domain.getRanges(), map);
580                }
581                for (Chain chain : fullStructure.getNonPolyChains()) {
582
583                        if (!structure.hasPdbChain(chain.getName())) {
584                                continue; // we can't do anything with a chain our domain
585                        }
586
587                        Chain newChain;
588                        if (! structure.hasNonPolyChain(chain.getId())) {
589                                newChain = new ChainImpl();
590                                newChain.setId(chain.getId());
591                                newChain.setName(chain.getName());
592                                newChain.setEntityInfo(chain.getEntityInfo());
593                                structure.addChain(newChain);
594                        } else {
595                                newChain = structure.getNonPolyChain(chain.getId());
596                        }
597                        List<Group> ligands = StructureTools.filterLigands(chain.getAtomGroups());
598                        for (Group group : ligands) {
599                                boolean shouldContain = true;
600                                if (strictLigandHandling) {
601                                        shouldContain = false; // whether the ligand occurs within the domain
602                                        for (ResidueRange rr : rrs) {
603                                                if (rr.contains(group.getResidueNumber(), map)) {
604                                                        shouldContain = true;
605                                                }
606                                        }
607                                }
608                                boolean alreadyContains = newChain.getAtomGroups().contains(group); // we don't want to add duplicate
609                                                                                                                                                                        // ligands
610                                if (shouldContain && !alreadyContains) {
611
612                                        newChain.addGroup(group);
613
614                                }
615                        }
616                }
617
618                // build a more meaningful description for the new structure
619                StringBuilder header = new StringBuilder();
620                header.append(domain.getClassificationId());
621                if (scopDatabase != null) {
622                        int sf = domain.getSuperfamilyId();
623                        ScopDescription description = scopDatabase.getScopDescriptionBySunid(sf);
624                        if (description != null) {
625                                header.append(" | ");
626                                header.append(description.getDescription());
627                        }
628                }
629                structure.getPDBHeader().setDescription(header.toString());
630
631                return structure;
632
633        }
634
635        /**
636         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
637         *
638         * @param scopId
639         *            a SCOP Id
640         * @return a Structure object
641         * @throws IOException
642         * @throws StructureException
643         */
644        public Structure getStructureForDomain(String scopId) throws IOException, StructureException {
645                return getStructureForDomain(scopId, ScopFactory.getSCOP());
646        }
647
648        /**
649         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
650         *
651         * @param scopId
652         *            a SCOP Id
653         * @param scopDatabase
654         *            A {@link ScopDatabase} to use
655         * @return a Structure object
656         * @throws IOException
657         * @throws StructureException
658         */
659        public Structure getStructureForDomain(String scopId, ScopDatabase scopDatabase) throws IOException,
660                        StructureException {
661                ScopDomain domain = scopDatabase.getDomainByScopID(scopId);
662                return getStructureForDomain(domain, scopDatabase);
663        }
664
665        /**
666         * Send a signal to the cache that the system is shutting down. Notifies underlying SerializableCache instances to
667         * flush themselves...
668         */
669        public void notifyShutdown() {
670                // System.out.println(" AtomCache got notify shutdown..");
671                if (pdpprovider != null) {
672                        if (pdpprovider instanceof RemotePDPProvider) {
673                                RemotePDPProvider remotePDP = (RemotePDPProvider) pdpprovider;
674                                remotePDP.flushCache();
675                        }
676                }
677
678                // todo: use a SCOP implementation that is backed by SerializableCache
679                ScopDatabase scopInstallation = ScopFactory.getSCOP();
680                if (scopInstallation != null) {
681                        if (scopInstallation instanceof CachedRemoteScopInstallation) {
682                                CachedRemoteScopInstallation cacheScop = (CachedRemoteScopInstallation) scopInstallation;
683                                cacheScop.flushCache();
684                        }
685                }
686
687        }
688
689        /**
690         * set the location at which utility data should be cached.
691         *
692         * @param cachePath
693         */
694        public void setCachePath(String cachePath) {
695                this.cachePath = cachePath;
696        }
697
698        public void setFileParsingParams(FileParsingParameters params) {
699                this.params = params;
700        }
701
702
703        /**
704         * <b>[Optional]</b> This method changes the behavior when obsolete entries
705         * are requested. Current behaviors are:
706         * <ul>
707         * <li>{@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION}
708         *   Throw a {@link StructureException} (the default)
709         * <li>{@link ObsoleteBehavior#FETCH_OBSOLETE FETCH_OBSOLETE}
710         *   Load the requested ID from the PDB's obsolete repository
711         * <li>{@link ObsoleteBehavior#FETCH_CURRENT FETCH_CURRENT}
712         *   Load the most recent version of the requested structure
713         *
714         * <p>This setting may be silently ignored by implementations which do not have
715         * access to the server to determine whether an entry is obsolete, such as
716         * if {@link #isAutoFetch()} is false. Note that an obsolete entry may still be
717         * returned even this is FETCH_CURRENT if the entry is found locally.
718         *
719         * @param fetchFileEvenIfObsolete Whether to fetch obsolete records
720         * @see #setFetchCurrent(boolean)
721         * @since 4.0.0
722         */
723        public void setObsoleteBehavior(ObsoleteBehavior behavior) {
724                obsoleteBehavior = behavior;
725        }
726
727        /**
728         * Returns how this instance deals with obsolete entries. Note that this
729         * setting may be ignored by some implementations or in some situations,
730         * such as when {@link #isAutoFetch()} is false.
731         *
732         * <p>For most implementations, the default value is
733         * {@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION}.
734         *
735         * @return The ObsoleteBehavior
736         * @since 4.0.0
737         */
738        public ObsoleteBehavior getObsoleteBehavior() {
739                return obsoleteBehavior;
740        }
741
742        /**
743         * Get the behavior for fetching files from the server
744         * @return
745         */
746        public FetchBehavior getFetchBehavior() {
747                return fetchBehavior;
748        }
749        /**
750         * Set the behavior for fetching files from the server
751         * @param fetchBehavior
752         */
753        public void setFetchBehavior(FetchBehavior fetchBehavior) {
754                this.fetchBehavior = fetchBehavior;
755        }
756
757        /**
758         * Set the path that is used to cache PDB files.
759         *
760         * @param path
761         *            to a directory
762         */
763        public void setPath(String path) {
764                this.path = FileDownloadUtils.expandUserHome(path);
765        }
766
767        public void setPdpprovider(PDPProvider pdpprovider) {
768                this.pdpprovider = pdpprovider;
769        }
770
771        /**
772         * @return the useMmCif
773         */
774        public boolean isUseMmCif() {
775                return useMmCif;
776        }
777
778        /**
779         * @param useMmCif
780         *            the useMmCif to set
781         */
782        public void setUseMmCif(boolean useMmCif) {
783                this.useMmCif = useMmCif;
784                // Either way the user wants to use PDB or MMCIF
785                this.useMmtf = false;
786        }
787        
788        /**
789         * Set whether to use mmtf.
790         * @param bool the input boolean to set
791         */
792        public void setUseMmtf(boolean useMmtf) {
793                this.useMmtf = useMmtf;
794                if(useMmtf){
795                        useMmCif=false;
796                }
797                
798        }
799
800        /** Returns useMmtf flag
801         *
802         * @return true if will load data via mmtf file format
803     */
804        public boolean isUseMmtf(){
805                return this.useMmtf;
806        }
807
808        private boolean checkLoading(String name) {
809                return currentlyLoading.contains(name);
810
811        }
812
813        /**
814         * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the the {@link CathDatabase}
815         * at {@link CathFactory#getCathDatabase()}.
816         */
817        public Structure getStructureForCathDomain(StructureName structureName) throws IOException, StructureException {
818                return getStructureForCathDomain(structureName, CathFactory.getCathDatabase());
819        }
820
821        /**
822         * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the specified {@link CathDatabase}.
823         */
824        public Structure getStructureForCathDomain(StructureName structureName, CathDatabase cathInstall) throws IOException, StructureException {
825
826                CathDomain cathDomain = cathInstall.getDomainByCathId(structureName.getIdentifier());
827
828                Structure s = getStructureForPdbId(cathDomain.getIdentifier());
829                Structure n = cathDomain.reduce(s);
830
831                // add the ligands of the chain...
832
833                Chain newChain = n.getPolyChainByPDB(structureName.getChainId());
834                List<Chain> origChains = s.getNonPolyChainsByPDB(structureName.getChainId());
835                for ( Chain origChain : origChains) {
836                        List<Group> ligands = origChain.getAtomGroups();
837
838                        for (Group g : ligands) {
839                                if (!newChain.getAtomGroups().contains(g)) {
840                                        newChain.addGroup(g);
841                                }
842                        }
843                }
844
845                return n;
846        }
847
848        protected void flagLoading(String name) {
849                if (!currentlyLoading.contains(name)) {
850
851                        currentlyLoading.add(name);
852                }
853        }
854
855        protected void flagLoadingFinished(String name) {
856
857                currentlyLoading.remove(name);
858        }
859
860        /**
861         * Loads a structure directly by PDB ID
862         * @param pdbId
863         * @return
864         * @throws IOException
865         * @throws StructureException
866         */
867        public Structure getStructureForPdbId(String pdbId) throws IOException, StructureException {
868                if(pdbId == null)
869                        return null;
870                if(pdbId.length() != 4) {
871                        throw new StructureException("Unrecognized PDB ID: "+pdbId);
872                }
873                while (checkLoading(pdbId)) {
874                        // waiting for loading to be finished...
875
876                        try {
877                                Thread.sleep(100);
878                        } catch (InterruptedException e) {
879                                logger.error(e.getMessage());
880                        }
881
882                }
883
884                Structure s;
885                if (useMmtf) {
886                        logger.debug("loading from mmtf");
887                        s = loadStructureFromMmtfByPdbId(pdbId);
888                }
889                else if (useMmCif) {
890                        logger.debug("loading from mmcif");
891                        s = loadStructureFromCifByPdbId(pdbId);
892                } else {
893                        logger.debug("loading from pdb");
894                        s = loadStructureFromPdbByPdbId(pdbId);
895                }
896                return s;
897        }
898
899        /**
900         * Load a {@link Structure} from MMTF either from the local file system.
901         * @param pdbId the input PDB id
902         * @return the {@link Structure} object of the parsed structure
903         * @throws IOException error reading from Web or file system
904         */
905        private Structure loadStructureFromMmtfByPdbId(String pdbId) throws IOException {
906                logger.debug("Loading structure {} from mmtf file.", pdbId);
907                MMTFFileReader reader = new MMTFFileReader();
908                reader.setFetchBehavior(fetchBehavior);
909                reader.setObsoleteBehavior(obsoleteBehavior);
910                Structure structure = reader.getStructureById(pdbId.toLowerCase());
911                return structure;
912        }
913
914        protected Structure loadStructureFromCifByPdbId(String pdbId) throws IOException, StructureException {
915
916                logger.debug("Loading structure {} from mmCIF file {}.", pdbId, path);
917                Structure s;
918                flagLoading(pdbId);
919                try {
920                        MMCIFFileReader reader = new MMCIFFileReader(path);
921                        reader.setFetchBehavior(fetchBehavior);
922                        reader.setObsoleteBehavior(obsoleteBehavior);
923                        reader.setFileParsingParameters(params);
924                        s = reader.getStructureById(pdbId.toLowerCase());
925
926                } finally {
927                        flagLoadingFinished(pdbId);
928                }
929
930                return s;
931        }
932
933        protected Structure loadStructureFromPdbByPdbId(String pdbId) throws IOException, StructureException {
934
935                logger.debug("Loading structure {} from PDB file {}.", pdbId, path);
936                Structure s;
937                flagLoading(pdbId);
938                try {
939                        PDBFileReader reader = new PDBFileReader(path);
940                        reader.setFetchBehavior(fetchBehavior);
941                        reader.setObsoleteBehavior(obsoleteBehavior);
942
943                        reader.setFileParsingParameters(params);
944
945                        s = reader.getStructureById(pdbId.toLowerCase());
946
947                } finally {
948                        flagLoadingFinished(pdbId);
949                }
950
951                return s;
952        }
953
954}