001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.align.util;
022
023import java.io.IOException;
024import java.util.Collection;
025import java.util.Collections;
026import java.util.List;
027import java.util.TreeSet;
028
029import org.biojava.nbio.core.util.InputStreamProvider;
030import org.biojava.nbio.structure.Atom;
031import org.biojava.nbio.structure.AtomPositionMap;
032import org.biojava.nbio.structure.Chain;
033import org.biojava.nbio.structure.Group;
034import org.biojava.nbio.structure.ResidueRange;
035import org.biojava.nbio.structure.ResidueRangeAndLength;
036import org.biojava.nbio.structure.Structure;
037import org.biojava.nbio.structure.StructureException;
038import org.biojava.nbio.structure.StructureIO;
039import org.biojava.nbio.structure.StructureIdentifier;
040import org.biojava.nbio.structure.StructureTools;
041import org.biojava.nbio.structure.align.client.StructureName;
042import org.biojava.nbio.structure.cath.CathDatabase;
043import org.biojava.nbio.structure.cath.CathDomain;
044import org.biojava.nbio.structure.cath.CathFactory;
045import org.biojava.nbio.structure.domain.PDPProvider;
046import org.biojava.nbio.structure.domain.RemotePDPProvider;
047import org.biojava.nbio.structure.io.FileParsingParameters;
048import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior;
049import org.biojava.nbio.structure.io.LocalPDBDirectory.ObsoleteBehavior;
050import org.biojava.nbio.structure.io.MMCIFFileReader;
051import org.biojava.nbio.structure.io.PDBFileReader;
052import org.biojava.nbio.structure.io.util.FileDownloadUtils;
053import org.biojava.nbio.structure.quaternary.io.BioUnitDataProviderFactory;
054import org.biojava.nbio.structure.quaternary.io.MmCifBiolAssemblyProvider;
055import org.biojava.nbio.structure.quaternary.io.PDBBioUnitDataProvider;
056import org.biojava.nbio.structure.scop.CachedRemoteScopInstallation;
057import org.biojava.nbio.structure.scop.ScopDatabase;
058import org.biojava.nbio.structure.scop.ScopDescription;
059import org.biojava.nbio.structure.scop.ScopDomain;
060import org.biojava.nbio.structure.scop.ScopFactory;
061import org.slf4j.Logger;
062import org.slf4j.LoggerFactory;
063
064/**
065 * A utility class that provides easy access to Structure objects. If you are running a script that is frequently
066 * re-using the same PDB structures, the AtomCache keeps an in-memory cache of the files for quicker access. The cache
067 * is a soft-cache, this means it won't cause out of memory exceptions, but garbage collects the data if the Java
068 * virtual machine needs to free up space. The AtomCache is thread-safe.
069 *
070 * @author Andreas Prlic
071 * @author Spencer Bliven
072 * @author Peter Rose
073 * @since 3.0
074 */
075public class AtomCache {
076
077        private static final Logger logger = LoggerFactory.getLogger(AtomCache.class);
078
079        public static final String BIOL_ASSEMBLY_IDENTIFIER = "BIO:";
080        public static final String CHAIN_NR_SYMBOL = ":";
081        public static final String CHAIN_SPLIT_SYMBOL = ".";
082
083        public static final String PDP_DOMAIN_IDENTIFIER = "PDP:";
084
085        public static final String UNDERSCORE = "_";
086
087        private static final String FILE_SEPARATOR = System.getProperty("file.separator");
088
089        protected FileParsingParameters params;
090        protected PDPProvider pdpprovider;
091
092        private FetchBehavior fetchBehavior;
093        private ObsoleteBehavior obsoleteBehavior;
094
095        private String cachePath;
096
097        // make sure IDs are loaded uniquely
098        private Collection<String> currentlyLoading = Collections.synchronizedCollection(new TreeSet<String>());
099
100        private String path;
101
102        private boolean useMmCif;
103
104        /**
105         * Default AtomCache constructor.
106         *
107         * Usually stores files in a temp directory, but this can be overriden by setting the PDB_DIR variable at runtime.
108         *
109         * @see UserConfiguration#UserConfiguration()
110         */
111        public AtomCache() {
112                this(new UserConfiguration());
113        }
114
115        /**
116         * Creates an instance of an AtomCache that is pointed to the a particular path in the file system. It will use the same value for pdbFilePath and cachePath.
117         *
118         * @param pdbFilePath
119         *            a directory in the file system to use as a location to cache files.
120         */
121        public AtomCache(String pdbFilePath) {
122                this(pdbFilePath,pdbFilePath);
123        }
124
125        /**
126         * Creates an instance of an AtomCache that is pointed to the a particular path in the file system.
127         *
128         * @param pdbFilePath
129         *            a directory in the file system to use as a location to cache files.
130         * @param cachePath
131         */
132        public AtomCache(String pdbFilePath, String cachePath) {
133
134                logger.debug("Initialising AtomCache with pdbFilePath={}, cachePath={}",pdbFilePath, cachePath);
135
136                if (!pdbFilePath.endsWith(FILE_SEPARATOR)) {
137                        pdbFilePath += FILE_SEPARATOR;
138                }
139
140                // we are caching the binary files that contain the PDBs gzipped
141                // that is the most memory efficient way of caching...
142                // set the input stream provider to caching mode
143                System.setProperty(InputStreamProvider.CACHE_PROPERTY, "true");
144
145                setPath(pdbFilePath);
146
147                this.cachePath = cachePath;
148
149                fetchBehavior = FetchBehavior.DEFAULT;
150                obsoleteBehavior = ObsoleteBehavior.DEFAULT;
151
152                currentlyLoading.clear();
153                params = new FileParsingParameters();
154
155                setUseMmCif(true);
156
157        }
158
159        /**
160         * @param isSplit Ignored
161         * @deprecated isSplit parameter is ignored (4.0.0)
162         */
163        @Deprecated
164        public AtomCache(String pdbFilePath,boolean isSplit) {
165                this(pdbFilePath);
166        }
167        /**
168         * @param isSplit Ignored
169         * @deprecated isSplit parameter is ignored (4.0.0)
170         */
171        @Deprecated
172        public AtomCache(String pdbFilePath, String cachePath,boolean isSplit) {
173                this(pdbFilePath,cachePath);
174        }
175
176        /**
177         * Creates a new AtomCache object based on the provided UserConfiguration.
178         *
179         * @param config
180         *            the UserConfiguration to use for this cache.
181         */
182        public AtomCache(UserConfiguration config) {
183                this(config.getPdbFilePath(), config.getCacheFilePath());
184                fetchBehavior = config.getFetchBehavior();
185                obsoleteBehavior = config.getObsoleteBehavior();
186                useMmCif = config.getFileFormat().equals( UserConfiguration.MMCIF_FORMAT );
187        }
188
189        /**
190         * Returns the CA atoms for the provided name. See {@link #getStructure(String)} for supported naming conventions.
191         * <p>
192         * This method only works with protein chains. Use {@link #getRepresentativeAtoms(String)}
193         * for a more general solution.
194         * @param name
195         * @return an array of Atoms.
196         * @throws IOException
197         * @throws StructureException
198         * @see
199         */
200        public Atom[] getAtoms(String name) throws IOException, StructureException {
201                return getAtoms(new StructureName(name));
202        }
203        public Atom[] getAtoms(StructureIdentifier name) throws IOException, StructureException {
204
205                Atom[] atoms = null;
206
207                // System.out.println("loading " + name);
208                Structure s = getStructure(name);
209
210                atoms = StructureTools.getAtomCAArray(s);
211
212                /*
213                 * synchronized (cache){ cache.put(name, atoms); }
214                 */
215
216                return atoms;
217        }
218        /**
219         * Returns the representative atoms for the provided name.
220         * See {@link #getStructure(String)} for supported naming conventions.
221         *
222         * @param name
223         * @return an array of Atoms.
224         * @throws IOException
225         * @throws StructureException
226         * @see
227         */
228        public Atom[] getRepresentativeAtoms(String name) throws IOException, StructureException {
229                return getRepresentativeAtoms(new StructureName(name));
230        }
231        public Atom[] getRepresentativeAtoms(StructureIdentifier name) throws IOException, StructureException {
232
233                Atom[] atoms = null;
234
235                Structure s = getStructure(name);
236
237                atoms = StructureTools.getRepresentativeAtomArray(s);
238
239                /*
240                 * synchronized (cache){ cache.put(name, atoms); }
241                 */
242
243                return atoms;
244        }
245        /**
246         * Loads the biological assembly for a given PDB ID and bioAssemblyId. If a bioAssemblyId > 0 is specified, the
247         * corresponding biological assembly file will be loaded. Note, the number of available biological unit files
248         * varies. Many entries don't have a biological assembly specified (i.e. NMR structures), many entries have only one
249         * biological assembly (bioAssemblyId=1), and a few structures have multiple biological assemblies. Set
250         * bioAssemblyFallback to true, to download the original PDB file in cases that a biological assembly file is not
251         * available.
252         *
253         * @param pdbId
254         *            the PDB ID
255         * @param bioAssemblyId
256         *            the 1-based index of the biological assembly (0 gets the asymmetric unit)
257         * @param bioAssemblyFallback
258         *            if true, try reading original PDB file in case the biological assembly file is not available
259         * @return a structure object
260         * @throws IOException
261         * @throws StructureException
262         * @author Peter Rose
263         * @since 3.2
264         */
265        public Structure getBiologicalAssembly(String pdbId, int bioAssemblyId, boolean bioAssemblyFallback)
266                        throws StructureException, IOException {
267
268                if (bioAssemblyId < 0) {
269                        throw new StructureException("bioAssemblyID must be nonnegative: " + pdbId + " bioAssemblyId "
270                                        + bioAssemblyId);
271                }
272                Structure s = StructureIO.getBiologicalAssembly(pdbId, bioAssemblyId,this);
273
274                if ( s == null && bioAssemblyFallback)
275                        return StructureIO.getBiologicalAssembly(pdbId, 0,this);
276
277                return s;
278        }
279
280        /**
281         * Loads the default biological unit (e.g. *.pdb1.gz). If it is not available,
282         * the asymmetric unit will be loaded, i.e. for NMR structures.
283         *
284         * <p>Biological assemblies can also be accessed using
285         * <tt>getStructure("BIO:<i>[pdbId]</i>")</tt>
286         * @param pdbId
287         *            the PDB ID
288         * @return a structure object
289         * @throws IOException
290         * @throws StructureException
291         * @since 4.2
292         */
293        public Structure getBiologicalAssembly(String pdbId) throws StructureException, IOException {
294                int bioAssemblyId = 1;
295                return getBiologicalAssembly(pdbId, bioAssemblyId);
296        }
297        /**
298         * Loads the default biological unit (e.g. *.pdb1.gz). If it is not available,
299         * the asymmetric unit will be loaded, i.e. for NMR structures.
300         *
301         * @param pdbId
302         *            the PDB ID
303         * @return a structure object
304         * @throws IOException
305         * @throws StructureException
306         * @since 3.2
307         * @deprecated Renamed to {@link #getBiologicalAssembly(String)} in 4.2
308         */
309        @Deprecated
310        public Structure getBiologicalUnit(String pdbId) throws StructureException, IOException {
311                return getBiologicalAssembly(pdbId);
312        }
313        /**
314         * Loads the default biological unit (e.g. *.pdb1.gz). If it is not available,
315         * the asymmetric unit will be loaded, i.e. for NMR structures.
316         *
317         * @param pdbId
318         *            the PDB ID
319         * @param bioAssemblyId
320         *            the 1-based index of the biological assembly (0 gets the asymmetric unit)
321         * @return a structure object
322         * @throws IOException
323         * @throws StructureException
324         * @since 4.2
325         */
326        public Structure getBiologicalAssembly(String pdbId,int bioAssemblyId) throws StructureException, IOException {
327                boolean bioAssemblyFallback = true;
328                return getBiologicalAssembly(pdbId, bioAssemblyId, bioAssemblyFallback);
329        }
330
331        /**
332         * Returns the path that contains the caching file for utility data, such as domain definitions.
333         *
334         * @return
335         */
336        public String getCachePath() {
337                return cachePath;
338        }
339
340        public FileParsingParameters getFileParsingParams() {
341                return params;
342        }
343
344        /**
345         * Get the path that is used to cache PDB files.
346         *
347         * @return path to a directory
348         */
349        public String getPath() {
350                return path;
351        }
352
353        public PDPProvider getPdpprovider() {
354                return pdpprovider;
355        }
356
357        /**
358         * Request a Structure based on a <i>name</i>.
359         *
360         * <pre>
361         *              Formal specification for how to specify the <i>name</i>:
362         *
363         *              name     := pdbID
364         *                             | pdbID '.' chainID
365         *                             | pdbID '.' range
366         *                             | scopID
367         *              range         := '('? range (',' range)? ')'?
368         *                             | chainID
369         *                             | chainID '_' resNum '-' resNum
370         *              pdbID         := [0-9][a-zA-Z0-9]{3}
371         *              chainID       := [a-zA-Z0-9]
372         *              scopID        := 'd' pdbID [a-z_][0-9_]
373         *              resNum        := [-+]?[0-9]+[A-Za-z]?
374         *
375         *
376         *              Example structures:
377         *              1TIM     #whole structure
378         *              4HHB.C     #single chain
379         *              4GCR.A_1-83     #one domain, by residue number
380         *              3AA0.A,B     #two chains treated as one structure
381         *              d2bq6a1     #scop domain
382         * </pre>
383         *
384         * With the additional set of rules:
385         *
386         * <ul>
387         * <li>If only a PDB code is provided, the whole structure will be return including ligands, but the first model
388         * only (for NMR).
389         * <li>Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A</li>
390         * <li>To specify a SCOP domain write a scopId e.g. d2bq6a1. Some flexibility can be allowed in SCOP domain names,
391         * see {@link #setStrictSCOP(boolean)}</li>
392         * <li>URLs are accepted as well</li>
393         * </ul>
394         *
395         * <p>Note that this method should not be used in StructureIdentifier
396         * implementations to avoid circular calls.
397         * @param name
398         * @return a Structure object, or null if name appears improperly formated (eg too short, etc)
399         * @throws IOException
400         *             The PDB file cannot be cached due to IO errors
401         * @throws StructureException
402         *             The name appeared valid but did not correspond to a structure. Also thrown by some submethods upon
403         *             errors, eg for poorly formatted subranges.
404         */
405        public Structure getStructure(String name) throws IOException, StructureException {
406                StructureName structureName = new StructureName(name);
407
408                return getStructure(structureName);
409        }
410
411        /**
412         * Get the structure corresponding to the given {@link StructureIdentifier}.
413         * Equivalent to calling {@link StructureIdentifier#loadStructure(AtomCache)}
414         * followed by {@link StructureIdentifier#reduce(Structure)}.
415         *
416         * <p>Note that this method should not be used in StructureIdentifier
417         * implementations to avoid circular calls.
418         * @param strucId
419         * @return
420         * @throws IOException
421         * @throws StructureException
422         */
423        public Structure getStructure(StructureIdentifier strucId) throws IOException, StructureException {
424                Structure s = strucId.loadStructure(this);
425                Structure r = strucId.reduce(s);
426                r.setStructureIdentifier(strucId);
427                return r;
428
429//              if (name.length() < 4) {
430//                      throw new IllegalArgumentException("Can't interpret IDs that are shorter than 4 characters!");
431//              }
432//
433//              Structure n = null;
434//
435//              boolean useChainNr = false;
436//              boolean useDomainInfo = false;
437//              String range = null;
438//              int chainNr = -1;
439//
440//
441//              StructureName structureName = new StructureName(name);
442//
443//              String pdbId = null;
444//              String chainId = null;
445//
446//              if (name.length() == 4) {
447//
448//                      pdbId = name;
449//                      Structure s;
450//                      if (useMmCif) {
451//                              s = loadStructureFromCifByPdbId(pdbId);
452//                      } else {
453//                              s = loadStructureFromPdbByPdbId(pdbId);
454//                      }
455//                      return s;
456//              } else if (structureName.isScopName()) {
457//
458//                      // return based on SCOP domain ID
459//                      return getStructureFromSCOPDomain(name);
460//              } else if (structureName.isCathID()) {
461//                      return getStructureForCathDomain(structureName, CathFactory.getCathDatabase());
462//              } else if (name.length() == 6) {
463//                      // name is PDB.CHAINID style (e.g. 4hhb.A)
464//
465//                      pdbId = name.substring(0, 4);
466//                      if (name.substring(4, 5).equals(CHAIN_SPLIT_SYMBOL)) {
467//                              chainId = name.substring(5, 6);
468//                      } else if (name.substring(4, 5).equals(CHAIN_NR_SYMBOL)) {
469//
470//                              useChainNr = true;
471//                              chainNr = Integer.parseInt(name.substring(5, 6));
472//                      }
473//
474//              } else if (name.startsWith("file:/") || name.startsWith("http:/")) {
475//                      // this is a URL
476//
477//                      URL url = new URL(name);
478//                      return getStructureFromURL(url);
479//
480//
481//              } else if (structureName.isPDPDomain()) {
482//
483//                      // this is a PDP domain definition
484//
485//                      return getPDPStructure(name);
486//
487//              } else if (name.startsWith(BIOL_ASSEMBLY_IDENTIFIER)) {
488//
489//                      return getBioAssembly(name);
490//
491//              } else if (name.length() > 6 && !name.startsWith(PDP_DOMAIN_IDENTIFIER)
492//                              && (name.contains(CHAIN_NR_SYMBOL) || name.contains(UNDERSCORE))
493//                              && !(name.startsWith("file:/") || name.startsWith("http:/"))
494//
495//                              ) {
496//
497//                      // this is a name + range
498//
499//                      pdbId = name.substring(0, 4);
500//                      // this ID has domain split information...
501//                      useDomainInfo = true;
502//                      range = name.substring(5);
503//
504//              }
505//
506//              // System.out.println("got: >" + name + "< " + pdbId + " " + chainId + " useChainNr:" + useChainNr + " "
507//              // +chainNr + " useDomainInfo:" + useDomainInfo + " " + range);
508//
509//              if (pdbId == null) {
510//
511//                      return null;
512//              }
513//
514//              while (checkLoading(pdbId)) {
515//                      // waiting for loading to be finished...
516//
517//                      try {
518//                              Thread.sleep(100);
519//                      } catch (InterruptedException e) {
520//                              logger.error(e.getMessage());
521//                      }
522//
523//              }
524//
525//              // long start = System.currentTimeMillis();
526//
527//              Structure s;
528//              if (useMmCif) {
529//                      s = loadStructureFromCifByPdbId(pdbId);
530//              } else {
531//                      s = loadStructureFromPdbByPdbId(pdbId);
532//              }
533//
534//              // long end = System.currentTimeMillis();
535//              // System.out.println("time to load " + pdbId + " " + (end-start) + "\t  size :" +
536//              // StructureTools.getNrAtoms(s) + "\t cached: " + cache.size());
537//
538//              if (chainId == null && chainNr < 0 && range == null) {
539//                      // we only want the 1st model in this case
540//                      n = StructureTools.getReducedStructure(s, -1);
541//              } else {
542//
543//                      if (useChainNr) {
544//                              // System.out.println("using ChainNr");
545//                              n = StructureTools.getReducedStructure(s, chainNr);
546//                      } else if (useDomainInfo) {
547//                              // System.out.println("calling getSubRanges");
548//                              n = StructureTools.getSubRanges(s, range);
549//                      } else {
550//                              // System.out.println("reducing Chain Id " + chainId);
551//                              n = StructureTools.getReducedStructure(s, chainId);
552//                      }
553//              }
554//
555//
556//
557//              n.setName(name);
558//              return n;
559
560        }
561
562        /**
563         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
564         *
565         * @param domain
566         *            a SCOP domain
567         * @return a Structure object
568         * @throws IOException
569         * @throws StructureException
570         */
571        public Structure getStructureForDomain(ScopDomain domain) throws IOException, StructureException {
572                return getStructureForDomain(domain, ScopFactory.getSCOP());
573        }
574
575        /**
576         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
577         *
578         * @param domain
579         *            a SCOP domain
580         * @param scopDatabase
581         *            A {@link ScopDatabase} to use
582         * @return a Structure object
583         * @throws IOException
584         * @throws StructureException
585         */
586        public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase) throws IOException,
587                        StructureException {
588                return getStructureForDomain(domain, scopDatabase, false);
589        }
590
591        /**
592         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
593         *
594         * @param domain
595         *            a SCOP domain
596         * @param scopDatabase
597         *            A {@link ScopDatabase} to use
598         * @param strictLigandHandling
599         *            If set to false, hetero-atoms are included if and only if they belong to a chain to which the SCOP
600         *            domain belongs; if set to true, hetero-atoms are included if and only if they are strictly within the
601         *            definition (residue numbers) of the SCOP domain
602         * @return a Structure object
603         * @throws IOException
604         * @throws StructureException
605         */
606        public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase, boolean strictLigandHandling)
607                        throws IOException, StructureException {
608
609                String pdbId = domain.getPdbId();
610                Structure fullStructure = getStructureForPdbId(pdbId);
611                Structure structure = domain.reduce(fullStructure);
612
613                // TODO It would be better to move all of this into the reduce method,
614                // but that would require ligand handling properties in StructureIdentifiers
615
616                // because ligands sometimes occur after TER records in PDB files, we may need to add some ligands back in
617                // specifically, we add a ligand if and only if it occurs within the domain
618                AtomPositionMap map = null;
619                List<ResidueRangeAndLength> rrs = null;
620                if (strictLigandHandling) {
621                        map = new AtomPositionMap(StructureTools.getAllAtomArray(fullStructure), AtomPositionMap.ANYTHING_MATCHER);
622                        rrs = ResidueRangeAndLength.parseMultiple(domain.getRanges(), map);
623                }
624                for (Chain chain : fullStructure.getChains()) {
625                        if (!structure.hasChain(chain.getChainID())) {
626                                continue; // we can't do anything with a chain our domain
627                        }
628                        // doesn't contain
629                        Chain newChain = structure.getChainByPDB(chain.getChainID());
630                        List<Group> ligands = StructureTools.filterLigands(chain.getAtomGroups());
631                        for (Group group : ligands) {
632                                boolean shouldContain = true;
633                                if (strictLigandHandling) {
634                                        shouldContain = false; // whether the ligand occurs within the domain
635                                        for (ResidueRange rr : rrs) {
636                                                if (rr.contains(group.getResidueNumber(), map)) {
637                                                        shouldContain = true;
638                                                }
639                                        }
640                                }
641                                boolean alreadyContains = newChain.getAtomGroups().contains(group); // we don't want to add duplicate
642                                                                                                                                                                        // ligands
643                                if (shouldContain && !alreadyContains) {
644                                        newChain.addGroup(group);
645                                }
646                        }
647                }
648
649                // build a more meaningful description for the new structure
650                StringBuilder header = new StringBuilder();
651                header.append(domain.getClassificationId());
652                if (scopDatabase != null) {
653                        int sf = domain.getSuperfamilyId();
654                        ScopDescription description = scopDatabase.getScopDescriptionBySunid(sf);
655                        if (description != null) {
656                                header.append(" | ");
657                                header.append(description.getDescription());
658                        }
659                }
660                structure.getPDBHeader().setDescription(header.toString());
661
662                return structure;
663
664        }
665
666        /**
667         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
668         *
669         * @param scopId
670         *            a SCOP Id
671         * @return a Structure object
672         * @throws IOException
673         * @throws StructureException
674         */
675        public Structure getStructureForDomain(String scopId) throws IOException, StructureException {
676                return getStructureForDomain(scopId, ScopFactory.getSCOP());
677        }
678
679        /**
680         * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
681         *
682         * @param scopId
683         *            a SCOP Id
684         * @param scopDatabase
685         *            A {@link ScopDatabase} to use
686         * @return a Structure object
687         * @throws IOException
688         * @throws StructureException
689         */
690        public Structure getStructureForDomain(String scopId, ScopDatabase scopDatabase) throws IOException,
691                        StructureException {
692                ScopDomain domain = scopDatabase.getDomainByScopID(scopId);
693                return getStructureForDomain(domain, scopDatabase);
694        }
695
696        /**
697         * Does the cache automatically download files that are missing from the local installation from the PDB FTP site?
698         *
699         * @return flag
700         * @deprecated Use {@link #getFetchBehavior()}
701         */
702        @Deprecated
703        public boolean isAutoFetch() {
704                return fetchBehavior != FetchBehavior.LOCAL_ONLY;
705        }
706
707        /**
708         * <b>N.B.</b> This feature won't work unless the structure wasn't found & autoFetch is set to <code>true</code>.
709         *
710         * @return the fetchCurrent
711         * @deprecated Use {@link FileParsingParameters#getObsoleteBehavior()} instead (4.0.0)
712         */
713        @Deprecated
714        public boolean isFetchCurrent() {
715                return getObsoleteBehavior() == ObsoleteBehavior.FETCH_CURRENT;
716        }
717
718        /**
719         * forces the cache to fetch the file if its status is OBSOLETE. This feature has a higher priority than
720         * {@link #setFetchCurrent(boolean)}.<br>
721         * <b>N.B.</b> This feature won't work unless the structure wasn't found & autoFetch is set to <code>true</code>.
722         *
723         * @return the fetchFileEvenIfObsolete
724         * @author Amr AL-Hossary
725         * @see #fetchCurrent
726         * @since 3.0.2
727         * @deprecated Use {@link FileParsingParameters#getObsoleteBehavior()} instead (4.0.0)
728         */
729        @Deprecated
730        public boolean isFetchFileEvenIfObsolete() {
731                return getObsoleteBehavior() == ObsoleteBehavior.FETCH_OBSOLETE;
732        }
733
734
735        /**
736         * Scop handling was changed in 4.2.0. For behaviour equivalent to
737         * strictSCOP==true, use {@link ScopDatabase#getDomainByScopID(String)}.
738         * For strictSCOP==False, create a {@link StructureName} or use
739         * {@link StructureName#guessScopDomain(String, ScopDatabase)} explicitely.
740         *
741         * @return false; ignored
742         * @deprecated since 4.2
743         */
744        @Deprecated
745        public boolean isStrictSCOP() {
746                return false;
747        }
748
749        /**
750         * Send a signal to the cache that the system is shutting down. Notifies underlying SerializableCache instances to
751         * flush themselves...
752         */
753        public void notifyShutdown() {
754                // System.out.println(" AtomCache got notify shutdown..");
755                if (pdpprovider != null) {
756                        if (pdpprovider instanceof RemotePDPProvider) {
757                                RemotePDPProvider remotePDP = (RemotePDPProvider) pdpprovider;
758                                remotePDP.flushCache();
759                        }
760                }
761
762                // todo: use a SCOP implementation that is backed by SerializableCache
763                ScopDatabase scopInstallation = ScopFactory.getSCOP();
764                if (scopInstallation != null) {
765                        if (scopInstallation instanceof CachedRemoteScopInstallation) {
766                                CachedRemoteScopInstallation cacheScop = (CachedRemoteScopInstallation) scopInstallation;
767                                cacheScop.flushCache();
768                        }
769                }
770
771        }
772
773        /**
774         * Does the cache automatically download files that are missing from the local installation from the PDB FTP site?
775         *
776         * @param autoFetch
777         *            flag
778         * @deprecated Use {@link #getFetchBehavior()}
779         */
780        @Deprecated
781        public void setAutoFetch(boolean autoFetch) {
782                if(autoFetch) {
783                        setFetchBehavior(FetchBehavior.DEFAULT);
784                } else {
785                        setFetchBehavior(FetchBehavior.LOCAL_ONLY);
786                }
787        }
788
789        /**
790         * set the location at which utility data should be cached.
791         *
792         * @param cachePath
793         */
794        public void setCachePath(String cachePath) {
795                this.cachePath = cachePath;
796        }
797
798        /**
799         * if enabled, the reader searches for the newest possible PDB ID, if not present in he local installation. The
800         * {@link #setFetchFileEvenIfObsolete(boolean)} function has a higher priority than this function.<br>
801         * <b>N.B.</b> This feature won't work unless the structure wasn't found & autoFetch is set to <code>true</code>.
802         *
803         * @param fetchCurrent
804         *            the fetchCurrent to set
805         * @author Amr AL-Hossary
806         * @see #setFetchFileEvenIfObsolete(boolean)
807         * @since 3.0.2
808         * @deprecated Use {@link FileParsingParameters#setObsoleteBehavior()} instead (4.0.0)
809         */
810        @Deprecated
811        public void setFetchCurrent(boolean fetchNewestCurrent) {
812                if(fetchNewestCurrent) {
813                        setObsoleteBehavior(ObsoleteBehavior.FETCH_CURRENT);
814                } else {
815                        if(getObsoleteBehavior() == ObsoleteBehavior.FETCH_CURRENT) {
816                                setObsoleteBehavior(ObsoleteBehavior.DEFAULT);
817                        }
818                }
819        }
820
821        /**
822         * <b>N.B.</b> This feature won't work unless the structure wasn't found & autoFetch is set to <code>true</code>.
823         *
824         * @param fetchFileEvenIfObsolete
825         *            the fetchFileEvenIfObsolete to set
826         * @deprecated Use {@link FileParsingParameters#setObsoleteBehavior()} instead (4.0.0)
827         */
828        @Deprecated
829        public void setFetchFileEvenIfObsolete(boolean fetchFileEvenIfObsolete) {
830                if(fetchFileEvenIfObsolete) {
831                        setObsoleteBehavior(ObsoleteBehavior.FETCH_OBSOLETE);
832                } else {
833                        if(getObsoleteBehavior() == ObsoleteBehavior.FETCH_OBSOLETE) {
834                                setObsoleteBehavior(ObsoleteBehavior.DEFAULT);
835                        }
836                }
837        }
838
839        public void setFileParsingParams(FileParsingParameters params) {
840                this.params = params;
841        }
842
843
844        /**
845         * <b>[Optional]</b> This method changes the behavior when obsolete entries
846         * are requested. Current behaviors are:
847         * <ul>
848         * <li>{@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION}
849         *   Throw a {@link StructureException} (the default)
850         * <li>{@link ObsoleteBehavior#FETCH_OBSOLETE FETCH_OBSOLETE}
851         *   Load the requested ID from the PDB's obsolete repository
852         * <li>{@link ObsoleteBehavior#FETCH_CURRENT FETCH_CURRENT}
853         *   Load the most recent version of the requested structure
854         *
855         * <p>This setting may be silently ignored by implementations which do not have
856         * access to the server to determine whether an entry is obsolete, such as
857         * if {@link #isAutoFetch()} is false. Note that an obsolete entry may still be
858         * returned even this is FETCH_CURRENT if the entry is found locally.
859         *
860         * @param fetchFileEvenIfObsolete Whether to fetch obsolete records
861         * @see #setFetchCurrent(boolean)
862         * @since 4.0.0
863         */
864        public void setObsoleteBehavior(ObsoleteBehavior behavior) {
865                obsoleteBehavior = behavior;
866        }
867
868        /**
869         * Returns how this instance deals with obsolete entries. Note that this
870         * setting may be ignored by some implementations or in some situations,
871         * such as when {@link #isAutoFetch()} is false.
872         *
873         * <p>For most implementations, the default value is
874         * {@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION}.
875         *
876         * @return The ObsoleteBehavior
877         * @since 4.0.0
878         */
879        public ObsoleteBehavior getObsoleteBehavior() {
880                return obsoleteBehavior;
881        }
882
883        /**
884         * Get the behavior for fetching files from the server
885         * @return
886         */
887        public FetchBehavior getFetchBehavior() {
888                return fetchBehavior;
889        }
890        /**
891         * Set the behavior for fetching files from the server
892         * @param fetchBehavior
893         */
894        public void setFetchBehavior(FetchBehavior fetchBehavior) {
895                this.fetchBehavior = fetchBehavior;
896        }
897
898        /**
899         * Set the path that is used to cache PDB files.
900         *
901         * @param path
902         *            to a directory
903         */
904        public void setPath(String path) {
905                this.path = FileDownloadUtils.expandUserHome(path);
906        }
907
908        public void setPdpprovider(PDPProvider pdpprovider) {
909                this.pdpprovider = pdpprovider;
910        }
911
912
913        /**
914         * This method does nothing.
915         *
916         * Scop handling was changed in 4.2.0. For behaviour equivalent to
917         * strictSCOP==true, use {@link ScopDatabase#getDomainByScopID(String)}.
918         * For strictSCOP==False, create a {@link StructureName} or use
919         * {@link StructureName#guessScopDomain(String, ScopDatabase)} explicitely.
920         *
921         * @param strictSCOP Ignored
922         * @deprecated Removed in 4.2.0
923         */
924        @Deprecated
925        public void setStrictSCOP(boolean ignored) {}
926
927        /**
928         * @return the useMmCif
929         */
930        public boolean isUseMmCif() {
931                return useMmCif;
932        }
933
934        /**
935         * @param useMmCif
936         *            the useMmCif to set
937         */
938        public void setUseMmCif(boolean useMmCif) {
939                this.useMmCif = useMmCif;
940
941                if ( useMmCif) {
942                        // get bio assembly from mmcif file
943
944                        BioUnitDataProviderFactory.setBioUnitDataProvider(MmCifBiolAssemblyProvider.class);
945
946                } else {
947
948                        BioUnitDataProviderFactory.setBioUnitDataProvider(PDBBioUnitDataProvider.class);
949
950                }
951        }
952
953        private boolean checkLoading(String name) {
954                return currentlyLoading.contains(name);
955
956        }
957
958        /**
959         * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the the {@link CathDatabase}
960         * at {@link CathFactory#getCathDatabase()}.
961         */
962        public Structure getStructureForCathDomain(StructureName structureName) throws IOException, StructureException {
963                return getStructureForCathDomain(structureName, CathFactory.getCathDatabase());
964        }
965
966        /**
967         * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the specified {@link CathDatabase}.
968         */
969        public Structure getStructureForCathDomain(StructureName structureName, CathDatabase cathInstall) throws IOException, StructureException {
970
971                CathDomain cathDomain = cathInstall.getDomainByCathId(structureName.getIdentifier());
972
973                Structure s = getStructureForPdbId(cathDomain.getIdentifier());
974                Structure n = cathDomain.reduce(s);
975
976                // add the ligands of the chain...
977
978                Chain newChain = n.getChainByPDB(structureName.getChainId());
979                Chain origChain = s.getChainByPDB(structureName.getChainId());
980                List<Group> ligands = origChain.getAtomLigands();
981
982                for (Group g : ligands) {
983                        if (!newChain.getAtomGroups().contains(g)) {
984                                newChain.addGroup(g);
985                        }
986                }
987
988                return n;
989        }
990
991        protected void flagLoading(String name) {
992                if (!currentlyLoading.contains(name)) {
993
994                        currentlyLoading.add(name);
995                }
996        }
997
998        protected void flagLoadingFinished(String name) {
999
1000                currentlyLoading.remove(name);
1001        }
1002
1003        /**
1004         * Loads a structure directly by PDB ID
1005         * @param pdbId
1006         * @return
1007         * @throws IOException
1008         * @throws StructureException
1009         */
1010        public Structure getStructureForPdbId(String pdbId) throws IOException, StructureException {
1011                if(pdbId == null)
1012                        return null;
1013                if(pdbId.length() != 4) {
1014                        throw new StructureException("Unrecognized PDB ID: "+pdbId);
1015                }
1016                while (checkLoading(pdbId)) {
1017                        // waiting for loading to be finished...
1018
1019                        try {
1020                                Thread.sleep(100);
1021                        } catch (InterruptedException e) {
1022                                logger.error(e.getMessage());
1023                        }
1024
1025                }
1026
1027                Structure s;
1028                if (useMmCif) {
1029                        s = loadStructureFromCifByPdbId(pdbId);
1030                } else {
1031                        s = loadStructureFromPdbByPdbId(pdbId);
1032                }
1033                return s;
1034        }
1035
1036
1037        protected Structure loadStructureFromCifByPdbId(String pdbId) throws IOException, StructureException {
1038
1039                Structure s;
1040                flagLoading(pdbId);
1041                try {
1042                        MMCIFFileReader reader = new MMCIFFileReader(path);
1043                        reader.setFetchBehavior(fetchBehavior);
1044                        reader.setObsoleteBehavior(obsoleteBehavior);
1045
1046                        reader.setFileParsingParameters(params);
1047
1048                        s = reader.getStructureById(pdbId.toLowerCase());
1049
1050                } finally {
1051                        flagLoadingFinished(pdbId);
1052                }
1053
1054                return s;
1055        }
1056
1057        protected Structure loadStructureFromPdbByPdbId(String pdbId) throws IOException, StructureException {
1058
1059                Structure s;
1060                flagLoading(pdbId);
1061                try {
1062                        PDBFileReader reader = new PDBFileReader(path);
1063                        reader.setFetchBehavior(fetchBehavior);
1064                        reader.setObsoleteBehavior(obsoleteBehavior);
1065
1066                        reader.setFileParsingParameters(params);
1067
1068                        s = reader.getStructureById(pdbId.toLowerCase());
1069
1070                } finally {
1071                        flagLoadingFinished(pdbId);
1072                }
1073
1074                return s;
1075        }
1076
1077}