001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.util; 022 023import java.io.IOException; 024import java.util.ArrayList; 025import java.util.Collection; 026import java.util.Collections; 027import java.util.List; 028import java.util.TreeSet; 029 030import org.biojava.nbio.core.util.InputStreamProvider; 031import org.biojava.nbio.structure.*; 032import org.biojava.nbio.structure.align.client.StructureName; 033import org.biojava.nbio.structure.cath.CathDatabase; 034import org.biojava.nbio.structure.cath.CathDomain; 035import org.biojava.nbio.structure.cath.CathFactory; 036import org.biojava.nbio.structure.domain.PDPProvider; 037import org.biojava.nbio.structure.domain.RemotePDPProvider; 038import org.biojava.nbio.structure.io.FileParsingParameters; 039import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior; 040import org.biojava.nbio.structure.io.LocalPDBDirectory.ObsoleteBehavior; 041import org.biojava.nbio.structure.io.MMCIFFileReader; 042import org.biojava.nbio.structure.io.MMTFFileReader; 043import org.biojava.nbio.structure.io.PDBFileReader; 044import org.biojava.nbio.core.util.FileDownloadUtils; 045import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder; 046import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 047import org.biojava.nbio.structure.scop.CachedRemoteScopInstallation; 048import org.biojava.nbio.structure.scop.ScopDatabase; 049import org.biojava.nbio.structure.scop.ScopDescription; 050import org.biojava.nbio.structure.scop.ScopDomain; 051import org.biojava.nbio.structure.scop.ScopFactory; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055/** 056 * A utility class that provides easy access to Structure objects. If you are running a script that is frequently 057 * re-using the same PDB structures, the AtomCache keeps an in-memory cache of the files for quicker access. The cache 058 * is a soft-cache, this means it won't cause out of memory exceptions, but garbage collects the data if the Java 059 * virtual machine needs to free up space. The AtomCache is thread-safe. 060 * 061 * @author Andreas Prlic 062 * @author Spencer Bliven 063 * @author Peter Rose 064 * @since 3.0 065 */ 066public class AtomCache { 067 068 private static final Logger logger = LoggerFactory.getLogger(AtomCache.class); 069 070 /** 071 * The default output bioassembly style: if true the bioassemblies are multimodel, 072 * if false the bioassemblies are flat with renamed chains for symmetry-partners. 073 */ 074 public static final boolean DEFAULT_BIOASSEMBLY_STYLE = false; 075 076 public static final String BIOL_ASSEMBLY_IDENTIFIER = "BIO:"; 077 public static final String CHAIN_NR_SYMBOL = ":"; 078 public static final String CHAIN_SPLIT_SYMBOL = "."; 079 080 public static final String PDP_DOMAIN_IDENTIFIER = "PDP:"; 081 082 public static final String UNDERSCORE = "_"; 083 084 private static final String FILE_SEPARATOR = System.getProperty("file.separator"); 085 086 protected FileParsingParameters params; 087 protected PDPProvider pdpprovider; 088 089 private FetchBehavior fetchBehavior; 090 private ObsoleteBehavior obsoleteBehavior; 091 092 private String cachePath; 093 094 // make sure IDs are loaded uniquely 095 private Collection<String> currentlyLoading = Collections.synchronizedCollection(new TreeSet<String>()); 096 097 private String path; 098 099 private boolean useMmCif; 100 private boolean useMmtf; 101 102 /** 103 * Default AtomCache constructor. 104 * 105 * Usually stores files in a temp directory, but this can be overriden by setting the PDB_DIR variable at runtime. 106 * 107 * @see UserConfiguration#UserConfiguration() 108 */ 109 public AtomCache() { 110 this(new UserConfiguration()); 111 } 112 113 /** 114 * Creates an instance of an AtomCache that is pointed to the a particular path in the file system. It will use the same value for pdbFilePath and cachePath. 115 * 116 * @param pdbFilePath 117 * a directory in the file system to use as a location to cache files. 118 */ 119 public AtomCache(String pdbFilePath) { 120 this(pdbFilePath,pdbFilePath); 121 } 122 123 /** 124 * Creates an instance of an AtomCache that is pointed to the a particular path in the file system. 125 * 126 * @param pdbFilePath 127 * a directory in the file system to use as a location to cache files. 128 * @param cachePath 129 */ 130 public AtomCache(String pdbFilePath, String cachePath) { 131 132 logger.debug("Initialising AtomCache with pdbFilePath={}, cachePath={}",pdbFilePath, cachePath); 133 134 if (!pdbFilePath.endsWith(FILE_SEPARATOR)) { 135 pdbFilePath += FILE_SEPARATOR; 136 } 137 138 // we are caching the binary files that contain the PDBs gzipped 139 // that is the most memory efficient way of caching... 140 // set the input stream provider to caching mode 141 System.setProperty(InputStreamProvider.CACHE_PROPERTY, "true"); 142 143 setPath(pdbFilePath); 144 145 this.cachePath = cachePath; 146 147 fetchBehavior = FetchBehavior.DEFAULT; 148 obsoleteBehavior = ObsoleteBehavior.DEFAULT; 149 150 currentlyLoading.clear(); 151 params = new FileParsingParameters(); 152 153 setUseMmCif(false); 154 setUseMmtf(true); 155 156 } 157 158 /** 159 * Creates a new AtomCache object based on the provided UserConfiguration. 160 * 161 * @param config 162 * the UserConfiguration to use for this cache. 163 */ 164 public AtomCache(UserConfiguration config) { 165 this(config.getPdbFilePath(), config.getCacheFilePath()); 166 fetchBehavior = config.getFetchBehavior(); 167 obsoleteBehavior = config.getObsoleteBehavior(); 168 useMmCif = config.getFileFormat().equals( UserConfiguration.MMCIF_FORMAT ); 169 170 if ( useMmCif) 171 useMmtf = false; 172 173 } 174 175 /** 176 * Returns the CA atoms for the provided name. See {@link #getStructure(String)} for supported naming conventions. 177 * <p> 178 * This method only works with protein chains. Use {@link #getRepresentativeAtoms(String)} 179 * for a more general solution. 180 * @param name 181 * @return an array of Atoms. 182 * @throws IOException 183 * @throws StructureException 184 * @see 185 */ 186 public Atom[] getAtoms(String name) throws IOException, StructureException { 187 return getAtoms(new StructureName(name)); 188 } 189 public Atom[] getAtoms(StructureIdentifier name) throws IOException, StructureException { 190 191 Atom[] atoms = null; 192 193 // System.out.println("loading " + name); 194 Structure s = getStructure(name); 195 196 atoms = StructureTools.getAtomCAArray(s); 197 198 /* 199 * synchronized (cache){ cache.put(name, atoms); } 200 */ 201 202 return atoms; 203 } 204 /** 205 * Returns the representative atoms for the provided name. 206 * See {@link #getStructure(String)} for supported naming conventions. 207 * 208 * @param name 209 * @return an array of Atoms. 210 * @throws IOException 211 * @throws StructureException 212 * @see 213 */ 214 public Atom[] getRepresentativeAtoms(String name) throws IOException, StructureException { 215 return getRepresentativeAtoms(new StructureName(name)); 216 } 217 218 public Atom[] getRepresentativeAtoms(StructureIdentifier name) throws IOException, StructureException { 219 220 Atom[] atoms = null; 221 222 Structure s = getStructure(name); 223 224 atoms = StructureTools.getRepresentativeAtomArray(s); 225 226 /* 227 * synchronized (cache){ cache.put(name, atoms); } 228 */ 229 230 return atoms; 231 } 232 233 /** 234 * Returns the biological assembly for a given PDB ID and bioAssemblyId, by building the 235 * assembly from the biounit annotations found in {@link Structure#getPDBHeader()} 236 * <p> 237 * Note, the number of available biological unit files 238 * varies. Many entries don't have a biological assembly specified (e.g. NMR structures), many entries have only one 239 * biological assembly (bioAssemblyId=1), and some structures have multiple biological assemblies. 240 * 241 * @param pdbId 242 * the PDB ID 243 * @param bioAssemblyId 244 * the 1-based index of the biological assembly (0 gets the asymmetric unit) 245 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 246 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 247 * @return a structure object 248 * @throws IOException 249 * @throws StructureException if biassemblyId < 0 or other problems while loading structure 250 * @author Peter Rose 251 * @since 3.2 252 */ 253 public Structure getBiologicalAssembly(String pdbId, int bioAssemblyId, boolean multiModel) 254 throws StructureException, IOException { 255 256 if (bioAssemblyId < 0) { 257 throw new StructureException("bioAssemblyID must be nonnegative: " + pdbId + " bioAssemblyId " 258 + bioAssemblyId); 259 } 260 261 boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly(); 262 263 if (!getFileParsingParams().isParseBioAssembly()) { 264 getFileParsingParams().setParseBioAssembly(true); 265 } 266 267 Structure asymUnit = getStructureForPdbId(pdbId); 268 269 getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly); 270 271 if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies()==null) { 272 logger.info("No bioassembly information found for {}, returning asymmetric unit as biological assembly", pdbId); 273 return asymUnit; 274 } 275 276 // 0 ... asym unit 277 if ( bioAssemblyId == 0) { 278 logger.info("Requested biological assembly 0 for PDB id "+pdbId+", returning asymmetric unit"); 279 return asymUnit; 280 } 281 // does it exist? 282 if (!asymUnit.getPDBHeader().getBioAssemblies().containsKey(bioAssemblyId)) { 283 throw new StructureException("No biological assembly available for biological assembly id " + bioAssemblyId + " of " + pdbId); 284 } 285 286 List<BiologicalAssemblyTransformation> transformations = 287 asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms(); 288 289 290 if ( transformations == null || transformations.size() == 0){ 291 292 throw new StructureException("Could not load transformations to recreate biological assembly id " + bioAssemblyId + " of " + pdbId); 293 294 } 295 296 BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); 297 298 // if we use mmcif or mmtf, then we need to pass useAsymIds=true 299 boolean useAsymIds = false; 300 if (useMmCif) useAsymIds = true; 301 if (useMmtf) useAsymIds = true; 302 return builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel); 303 304 } 305 306 /** 307 * Returns the default biological unit (bioassemblyId=1, known in PDB as pdb1.gz). If it is not available, 308 * the asymmetric unit will be returned, e.g. for NMR structures. 309 * 310 * <p>Biological assemblies can also be accessed using 311 * <tt>getStructure("BIO:<i>[pdbId]</i>")</tt> 312 * @param pdbId the PDB id 313 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 314 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 315 * @return a structure object 316 * @throws IOException 317 * @throws StructureException 318 * @since 4.2 319 */ 320 public Structure getBiologicalAssembly(String pdbId, boolean multiModel) throws StructureException, IOException { 321 322 boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly(); 323 324 if (!getFileParsingParams().isParseBioAssembly()) { 325 getFileParsingParams().setParseBioAssembly(true); 326 } 327 328 Structure asymUnit = getStructureForPdbId(pdbId); 329 330 getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly); 331 332 333 if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies()==null) { 334 logger.info("No bioassembly information found for {}, returning asymmetric unit as biological assembly", pdbId); 335 return asymUnit; 336 } 337 338 int bioAssemblyId = 1; 339 340 // does it exist? 341 if (!asymUnit.getPDBHeader().getBioAssemblies().containsKey(bioAssemblyId)) { 342 return asymUnit; 343 } 344 345 List<BiologicalAssemblyTransformation> transformations = 346 asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms(); 347 348 349 if ( transformations == null || transformations.size() == 0){ 350 351 throw new StructureException("Could not load transformations to recreate biological assembly id " + bioAssemblyId + " of " + pdbId); 352 353 } 354 355 BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); 356 357 // if we use mmcif or mmtf, then we need to pass useAsymIds=true 358 boolean useAsymIds = false; 359 if (useMmCif) useAsymIds = true; 360 if (useMmtf) useAsymIds = true; 361 return builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel); 362 363 } 364 365 /** 366 * Returns all biological assemblies for given PDB id. 367 * @param pdbId 368 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 369 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 370 * @return 371 * @throws StructureException 372 * @throws IOException 373 * @since 5.0 374 */ 375 public List<Structure> getBiologicalAssemblies(String pdbId, boolean multiModel) throws StructureException, IOException { 376 377 List<Structure> assemblies = new ArrayList<>(); 378 379 boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly(); 380 381 if (!getFileParsingParams().isParseBioAssembly()) { 382 getFileParsingParams().setParseBioAssembly(true); 383 } 384 385 Structure asymUnit = getStructureForPdbId(pdbId); 386 387 getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly); 388 389 390 if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies()==null) { 391 logger.info("No bioassembly information found for {}, returning asymmetric unit as the only biological assembly", pdbId); 392 assemblies.add(asymUnit); 393 return assemblies; 394 } 395 396 397 for (int bioAssemblyId : asymUnit.getPDBHeader().getBioAssemblies().keySet()) { 398 List<BiologicalAssemblyTransformation> transformations = 399 asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms(); 400 401 402 if ( transformations == null || transformations.size() == 0){ 403 404 logger.info("Could not load transformations to recreate biological assembly id " + bioAssemblyId + " of " + pdbId+". Assembly id will be missing in biological assemblies."); 405 continue; 406 } 407 408 BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); 409 410 // if we use mmcif or mmtf, then we need to pass useAsymIds=true 411 boolean useAsymIds = false; 412 if (useMmCif) useAsymIds = true; 413 if (useMmtf) useAsymIds = true; 414 Structure s = builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel); 415 assemblies.add(s); 416 } 417 return assemblies; 418 } 419 420 /** 421 * Returns the path that contains the caching file for utility data, such as domain definitions. 422 * 423 * @return 424 */ 425 public String getCachePath() { 426 return cachePath; 427 } 428 429 public FileParsingParameters getFileParsingParams() { 430 return params; 431 } 432 433 /** 434 * Get the path that is used to cache PDB files. 435 * 436 * @return path to a directory 437 */ 438 public String getPath() { 439 return path; 440 } 441 442 public PDPProvider getPdpprovider() { 443 return pdpprovider; 444 } 445 446 /** 447 * Request a Structure based on a <i>name</i>. 448 * 449 * <pre> 450 * Formal specification for how to specify the <i>name</i>: 451 * 452 * name := pdbID 453 * | pdbID '.' chainID 454 * | pdbID '.' range 455 * | scopID 456 * range := '('? range (',' range)? ')'? 457 * | chainID 458 * | chainID '_' resNum '-' resNum 459 * pdbID := [0-9][a-zA-Z0-9]{3} 460 * chainID := [a-zA-Z0-9] 461 * scopID := 'd' pdbID [a-z_][0-9_] 462 * resNum := [-+]?[0-9]+[A-Za-z]? 463 * 464 * 465 * Example structures: 466 * 1TIM #whole structure 467 * 4HHB.C #single chain 468 * 4GCR.A_1-83 #one domain, by residue number 469 * 3AA0.A,B #two chains treated as one structure 470 * d2bq6a1 #scop domain 471 * </pre> 472 * 473 * With the additional set of rules: 474 * 475 * <ul> 476 * <li>If only a PDB code is provided, the whole structure will be return including ligands, but the first model 477 * only (for NMR). 478 * <li>Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A</li> 479 * <li>To specify a SCOP domain write a scopId e.g. d2bq6a1. Some flexibility can be allowed in SCOP domain names, 480 * see {@link #setStrictSCOP(boolean)}</li> 481 * <li>URLs are accepted as well</li> 482 * </ul> 483 * 484 * <p>Note that this method should not be used in StructureIdentifier 485 * implementations to avoid circular calls. 486 * @param name 487 * @return a Structure object, or null if name appears improperly formated (eg too short, etc) 488 * @throws IOException 489 * The PDB file cannot be cached due to IO errors 490 * @throws StructureException 491 * The name appeared valid but did not correspond to a structure. Also thrown by some submethods upon 492 * errors, eg for poorly formatted subranges. 493 */ 494 public Structure getStructure(String name) throws IOException, StructureException { 495 StructureName structureName = new StructureName(name); 496 497 return getStructure(structureName); 498 } 499 500 /** 501 * Get the structure corresponding to the given {@link StructureIdentifier}. 502 * Equivalent to calling {@link StructureIdentifier#loadStructure(AtomCache)} 503 * followed by {@link StructureIdentifier#reduce(Structure)}. 504 * 505 * <p>Note that this method should not be used in StructureIdentifier 506 * implementations to avoid circular calls. 507 * @param strucId 508 * @return 509 * @throws IOException 510 * @throws StructureException 511 */ 512 public Structure getStructure(StructureIdentifier strucId) throws IOException, StructureException { 513 Structure s = strucId.loadStructure(this); 514 Structure r = strucId.reduce(s); 515 r.setStructureIdentifier(strucId); 516 return r; 517 } 518 519 /** 520 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 521 * 522 * @param domain 523 * a SCOP domain 524 * @return a Structure object 525 * @throws IOException 526 * @throws StructureException 527 */ 528 public Structure getStructureForDomain(ScopDomain domain) throws IOException, StructureException { 529 return getStructureForDomain(domain, ScopFactory.getSCOP()); 530 } 531 532 /** 533 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 534 * 535 * @param domain 536 * a SCOP domain 537 * @param scopDatabase 538 * A {@link ScopDatabase} to use 539 * @return a Structure object 540 * @throws IOException 541 * @throws StructureException 542 */ 543 public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase) throws IOException, 544 StructureException { 545 return getStructureForDomain(domain, scopDatabase, false); 546 } 547 548 /** 549 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 550 * 551 * @param domain 552 * a SCOP domain 553 * @param scopDatabase 554 * A {@link ScopDatabase} to use 555 * @param strictLigandHandling 556 * If set to false, hetero-atoms are included if and only if they belong to a chain to which the SCOP 557 * domain belongs; if set to true, hetero-atoms are included if and only if they are strictly within the 558 * definition (residue numbers) of the SCOP domain 559 * @return a Structure object 560 * @throws IOException 561 * @throws StructureException 562 */ 563 public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase, boolean strictLigandHandling) 564 throws IOException, StructureException { 565 566 String pdbId = domain.getPdbId(); 567 Structure fullStructure = getStructureForPdbId(pdbId); 568 Structure structure = domain.reduce(fullStructure); 569 570 // TODO It would be better to move all of this into the reduce method, 571 // but that would require ligand handling properties in StructureIdentifiers 572 573 // because ligands sometimes occur after TER records in PDB files, we may need to add some ligands back in 574 // specifically, we add a ligand if and only if it occurs within the domain 575 AtomPositionMap map = null; 576 List<ResidueRangeAndLength> rrs = null; 577 if (strictLigandHandling) { 578 map = new AtomPositionMap(StructureTools.getAllAtomArray(fullStructure), AtomPositionMap.ANYTHING_MATCHER); 579 rrs = ResidueRangeAndLength.parseMultiple(domain.getRanges(), map); 580 } 581 for (Chain chain : fullStructure.getNonPolyChains()) { 582 583 if (!structure.hasPdbChain(chain.getName())) { 584 continue; // we can't do anything with a chain our domain 585 } 586 587 Chain newChain; 588 if (! structure.hasNonPolyChain(chain.getId())) { 589 newChain = new ChainImpl(); 590 newChain.setId(chain.getId()); 591 newChain.setName(chain.getName()); 592 newChain.setEntityInfo(chain.getEntityInfo()); 593 structure.addChain(newChain); 594 } else { 595 newChain = structure.getNonPolyChain(chain.getId()); 596 } 597 List<Group> ligands = StructureTools.filterLigands(chain.getAtomGroups()); 598 for (Group group : ligands) { 599 boolean shouldContain = true; 600 if (strictLigandHandling) { 601 shouldContain = false; // whether the ligand occurs within the domain 602 for (ResidueRange rr : rrs) { 603 if (rr.contains(group.getResidueNumber(), map)) { 604 shouldContain = true; 605 } 606 } 607 } 608 boolean alreadyContains = newChain.getAtomGroups().contains(group); // we don't want to add duplicate 609 // ligands 610 if (shouldContain && !alreadyContains) { 611 612 newChain.addGroup(group); 613 614 } 615 } 616 } 617 618 // build a more meaningful description for the new structure 619 StringBuilder header = new StringBuilder(); 620 header.append(domain.getClassificationId()); 621 if (scopDatabase != null) { 622 int sf = domain.getSuperfamilyId(); 623 ScopDescription description = scopDatabase.getScopDescriptionBySunid(sf); 624 if (description != null) { 625 header.append(" | "); 626 header.append(description.getDescription()); 627 } 628 } 629 structure.getPDBHeader().setDescription(header.toString()); 630 631 return structure; 632 633 } 634 635 /** 636 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 637 * 638 * @param scopId 639 * a SCOP Id 640 * @return a Structure object 641 * @throws IOException 642 * @throws StructureException 643 */ 644 public Structure getStructureForDomain(String scopId) throws IOException, StructureException { 645 return getStructureForDomain(scopId, ScopFactory.getSCOP()); 646 } 647 648 /** 649 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 650 * 651 * @param scopId 652 * a SCOP Id 653 * @param scopDatabase 654 * A {@link ScopDatabase} to use 655 * @return a Structure object 656 * @throws IOException 657 * @throws StructureException 658 */ 659 public Structure getStructureForDomain(String scopId, ScopDatabase scopDatabase) throws IOException, 660 StructureException { 661 ScopDomain domain = scopDatabase.getDomainByScopID(scopId); 662 return getStructureForDomain(domain, scopDatabase); 663 } 664 665 /** 666 * Send a signal to the cache that the system is shutting down. Notifies underlying SerializableCache instances to 667 * flush themselves... 668 */ 669 public void notifyShutdown() { 670 // System.out.println(" AtomCache got notify shutdown.."); 671 if (pdpprovider != null) { 672 if (pdpprovider instanceof RemotePDPProvider) { 673 RemotePDPProvider remotePDP = (RemotePDPProvider) pdpprovider; 674 remotePDP.flushCache(); 675 } 676 } 677 678 // todo: use a SCOP implementation that is backed by SerializableCache 679 ScopDatabase scopInstallation = ScopFactory.getSCOP(); 680 if (scopInstallation != null) { 681 if (scopInstallation instanceof CachedRemoteScopInstallation) { 682 CachedRemoteScopInstallation cacheScop = (CachedRemoteScopInstallation) scopInstallation; 683 cacheScop.flushCache(); 684 } 685 } 686 687 } 688 689 /** 690 * set the location at which utility data should be cached. 691 * 692 * @param cachePath 693 */ 694 public void setCachePath(String cachePath) { 695 this.cachePath = cachePath; 696 } 697 698 public void setFileParsingParams(FileParsingParameters params) { 699 this.params = params; 700 } 701 702 703 /** 704 * <b>[Optional]</b> This method changes the behavior when obsolete entries 705 * are requested. Current behaviors are: 706 * <ul> 707 * <li>{@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION} 708 * Throw a {@link StructureException} (the default) 709 * <li>{@link ObsoleteBehavior#FETCH_OBSOLETE FETCH_OBSOLETE} 710 * Load the requested ID from the PDB's obsolete repository 711 * <li>{@link ObsoleteBehavior#FETCH_CURRENT FETCH_CURRENT} 712 * Load the most recent version of the requested structure 713 * 714 * <p>This setting may be silently ignored by implementations which do not have 715 * access to the server to determine whether an entry is obsolete, such as 716 * if {@link #isAutoFetch()} is false. Note that an obsolete entry may still be 717 * returned even this is FETCH_CURRENT if the entry is found locally. 718 * 719 * @param fetchFileEvenIfObsolete Whether to fetch obsolete records 720 * @see #setFetchCurrent(boolean) 721 * @since 4.0.0 722 */ 723 public void setObsoleteBehavior(ObsoleteBehavior behavior) { 724 obsoleteBehavior = behavior; 725 } 726 727 /** 728 * Returns how this instance deals with obsolete entries. Note that this 729 * setting may be ignored by some implementations or in some situations, 730 * such as when {@link #isAutoFetch()} is false. 731 * 732 * <p>For most implementations, the default value is 733 * {@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION}. 734 * 735 * @return The ObsoleteBehavior 736 * @since 4.0.0 737 */ 738 public ObsoleteBehavior getObsoleteBehavior() { 739 return obsoleteBehavior; 740 } 741 742 /** 743 * Get the behavior for fetching files from the server 744 * @return 745 */ 746 public FetchBehavior getFetchBehavior() { 747 return fetchBehavior; 748 } 749 /** 750 * Set the behavior for fetching files from the server 751 * @param fetchBehavior 752 */ 753 public void setFetchBehavior(FetchBehavior fetchBehavior) { 754 this.fetchBehavior = fetchBehavior; 755 } 756 757 /** 758 * Set the path that is used to cache PDB files. 759 * 760 * @param path 761 * to a directory 762 */ 763 public void setPath(String path) { 764 this.path = FileDownloadUtils.expandUserHome(path); 765 } 766 767 public void setPdpprovider(PDPProvider pdpprovider) { 768 this.pdpprovider = pdpprovider; 769 } 770 771 /** 772 * @return the useMmCif 773 */ 774 public boolean isUseMmCif() { 775 return useMmCif; 776 } 777 778 /** 779 * @param useMmCif 780 * the useMmCif to set 781 */ 782 public void setUseMmCif(boolean useMmCif) { 783 this.useMmCif = useMmCif; 784 // Either way the user wants to use PDB or MMCIF 785 this.useMmtf = false; 786 } 787 788 /** 789 * Set whether to use mmtf. 790 * @param bool the input boolean to set 791 */ 792 public void setUseMmtf(boolean useMmtf) { 793 this.useMmtf = useMmtf; 794 if(useMmtf){ 795 useMmCif=false; 796 } 797 798 } 799 800 /** Returns useMmtf flag 801 * 802 * @return true if will load data via mmtf file format 803 */ 804 public boolean isUseMmtf(){ 805 return this.useMmtf; 806 } 807 808 private boolean checkLoading(String name) { 809 return currentlyLoading.contains(name); 810 811 } 812 813 /** 814 * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the the {@link CathDatabase} 815 * at {@link CathFactory#getCathDatabase()}. 816 */ 817 public Structure getStructureForCathDomain(StructureName structureName) throws IOException, StructureException { 818 return getStructureForCathDomain(structureName, CathFactory.getCathDatabase()); 819 } 820 821 /** 822 * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the specified {@link CathDatabase}. 823 */ 824 public Structure getStructureForCathDomain(StructureName structureName, CathDatabase cathInstall) throws IOException, StructureException { 825 826 CathDomain cathDomain = cathInstall.getDomainByCathId(structureName.getIdentifier()); 827 828 Structure s = getStructureForPdbId(cathDomain.getIdentifier()); 829 Structure n = cathDomain.reduce(s); 830 831 // add the ligands of the chain... 832 833 Chain newChain = n.getPolyChainByPDB(structureName.getChainId()); 834 List<Chain> origChains = s.getNonPolyChainsByPDB(structureName.getChainId()); 835 for ( Chain origChain : origChains) { 836 List<Group> ligands = origChain.getAtomGroups(); 837 838 for (Group g : ligands) { 839 if (!newChain.getAtomGroups().contains(g)) { 840 newChain.addGroup(g); 841 } 842 } 843 } 844 845 return n; 846 } 847 848 protected void flagLoading(String name) { 849 if (!currentlyLoading.contains(name)) { 850 851 currentlyLoading.add(name); 852 } 853 } 854 855 protected void flagLoadingFinished(String name) { 856 857 currentlyLoading.remove(name); 858 } 859 860 /** 861 * Loads a structure directly by PDB ID 862 * @param pdbId 863 * @return 864 * @throws IOException 865 * @throws StructureException 866 */ 867 public Structure getStructureForPdbId(String pdbId) throws IOException, StructureException { 868 if(pdbId == null) 869 return null; 870 if(pdbId.length() != 4) { 871 throw new StructureException("Unrecognized PDB ID: "+pdbId); 872 } 873 while (checkLoading(pdbId)) { 874 // waiting for loading to be finished... 875 876 try { 877 Thread.sleep(100); 878 } catch (InterruptedException e) { 879 logger.error(e.getMessage()); 880 } 881 882 } 883 884 Structure s; 885 if (useMmtf) { 886 logger.debug("loading from mmtf"); 887 s = loadStructureFromMmtfByPdbId(pdbId); 888 } 889 else if (useMmCif) { 890 logger.debug("loading from mmcif"); 891 s = loadStructureFromCifByPdbId(pdbId); 892 } else { 893 logger.debug("loading from pdb"); 894 s = loadStructureFromPdbByPdbId(pdbId); 895 } 896 return s; 897 } 898 899 /** 900 * Load a {@link Structure} from MMTF either from the local file system. 901 * @param pdbId the input PDB id 902 * @return the {@link Structure} object of the parsed structure 903 * @throws IOException error reading from Web or file system 904 */ 905 private Structure loadStructureFromMmtfByPdbId(String pdbId) throws IOException { 906 logger.debug("Loading structure {} from mmtf file.", pdbId); 907 MMTFFileReader reader = new MMTFFileReader(); 908 reader.setFetchBehavior(fetchBehavior); 909 reader.setObsoleteBehavior(obsoleteBehavior); 910 Structure structure = reader.getStructureById(pdbId.toLowerCase()); 911 return structure; 912 } 913 914 protected Structure loadStructureFromCifByPdbId(String pdbId) throws IOException, StructureException { 915 916 logger.debug("Loading structure {} from mmCIF file {}.", pdbId, path); 917 Structure s; 918 flagLoading(pdbId); 919 try { 920 MMCIFFileReader reader = new MMCIFFileReader(path); 921 reader.setFetchBehavior(fetchBehavior); 922 reader.setObsoleteBehavior(obsoleteBehavior); 923 reader.setFileParsingParameters(params); 924 s = reader.getStructureById(pdbId.toLowerCase()); 925 926 } finally { 927 flagLoadingFinished(pdbId); 928 } 929 930 return s; 931 } 932 933 protected Structure loadStructureFromPdbByPdbId(String pdbId) throws IOException, StructureException { 934 935 logger.debug("Loading structure {} from PDB file {}.", pdbId, path); 936 Structure s; 937 flagLoading(pdbId); 938 try { 939 PDBFileReader reader = new PDBFileReader(path); 940 reader.setFetchBehavior(fetchBehavior); 941 reader.setObsoleteBehavior(obsoleteBehavior); 942 943 reader.setFileParsingParameters(params); 944 945 s = reader.getStructureById(pdbId.toLowerCase()); 946 947 } finally { 948 flagLoadingFinished(pdbId); 949 } 950 951 return s; 952 } 953 954}