001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.util; 022 023import java.io.IOException; 024import java.util.Collection; 025import java.util.Collections; 026import java.util.List; 027import java.util.TreeSet; 028 029import org.biojava.nbio.core.util.InputStreamProvider; 030import org.biojava.nbio.structure.Atom; 031import org.biojava.nbio.structure.AtomPositionMap; 032import org.biojava.nbio.structure.Chain; 033import org.biojava.nbio.structure.Group; 034import org.biojava.nbio.structure.ResidueRange; 035import org.biojava.nbio.structure.ResidueRangeAndLength; 036import org.biojava.nbio.structure.Structure; 037import org.biojava.nbio.structure.StructureException; 038import org.biojava.nbio.structure.StructureIO; 039import org.biojava.nbio.structure.StructureIdentifier; 040import org.biojava.nbio.structure.StructureTools; 041import org.biojava.nbio.structure.align.client.StructureName; 042import org.biojava.nbio.structure.cath.CathDatabase; 043import org.biojava.nbio.structure.cath.CathDomain; 044import org.biojava.nbio.structure.cath.CathFactory; 045import org.biojava.nbio.structure.domain.PDPProvider; 046import org.biojava.nbio.structure.domain.RemotePDPProvider; 047import org.biojava.nbio.structure.io.FileParsingParameters; 048import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior; 049import org.biojava.nbio.structure.io.LocalPDBDirectory.ObsoleteBehavior; 050import org.biojava.nbio.structure.io.MMCIFFileReader; 051import org.biojava.nbio.structure.io.PDBFileReader; 052import org.biojava.nbio.structure.io.util.FileDownloadUtils; 053import org.biojava.nbio.structure.quaternary.io.BioUnitDataProviderFactory; 054import org.biojava.nbio.structure.quaternary.io.MmCifBiolAssemblyProvider; 055import org.biojava.nbio.structure.quaternary.io.PDBBioUnitDataProvider; 056import org.biojava.nbio.structure.scop.CachedRemoteScopInstallation; 057import org.biojava.nbio.structure.scop.ScopDatabase; 058import org.biojava.nbio.structure.scop.ScopDescription; 059import org.biojava.nbio.structure.scop.ScopDomain; 060import org.biojava.nbio.structure.scop.ScopFactory; 061import org.slf4j.Logger; 062import org.slf4j.LoggerFactory; 063 064/** 065 * A utility class that provides easy access to Structure objects. If you are running a script that is frequently 066 * re-using the same PDB structures, the AtomCache keeps an in-memory cache of the files for quicker access. The cache 067 * is a soft-cache, this means it won't cause out of memory exceptions, but garbage collects the data if the Java 068 * virtual machine needs to free up space. The AtomCache is thread-safe. 069 * 070 * @author Andreas Prlic 071 * @author Spencer Bliven 072 * @author Peter Rose 073 * @since 3.0 074 */ 075public class AtomCache { 076 077 private static final Logger logger = LoggerFactory.getLogger(AtomCache.class); 078 079 public static final String BIOL_ASSEMBLY_IDENTIFIER = "BIO:"; 080 public static final String CHAIN_NR_SYMBOL = ":"; 081 public static final String CHAIN_SPLIT_SYMBOL = "."; 082 083 public static final String PDP_DOMAIN_IDENTIFIER = "PDP:"; 084 085 public static final String UNDERSCORE = "_"; 086 087 private static final String FILE_SEPARATOR = System.getProperty("file.separator"); 088 089 protected FileParsingParameters params; 090 protected PDPProvider pdpprovider; 091 092 private FetchBehavior fetchBehavior; 093 private ObsoleteBehavior obsoleteBehavior; 094 095 private String cachePath; 096 097 // make sure IDs are loaded uniquely 098 private Collection<String> currentlyLoading = Collections.synchronizedCollection(new TreeSet<String>()); 099 100 private String path; 101 102 private boolean useMmCif; 103 104 /** 105 * Default AtomCache constructor. 106 * 107 * Usually stores files in a temp directory, but this can be overriden by setting the PDB_DIR variable at runtime. 108 * 109 * @see UserConfiguration#UserConfiguration() 110 */ 111 public AtomCache() { 112 this(new UserConfiguration()); 113 } 114 115 /** 116 * Creates an instance of an AtomCache that is pointed to the a particular path in the file system. It will use the same value for pdbFilePath and cachePath. 117 * 118 * @param pdbFilePath 119 * a directory in the file system to use as a location to cache files. 120 */ 121 public AtomCache(String pdbFilePath) { 122 this(pdbFilePath,pdbFilePath); 123 } 124 125 /** 126 * Creates an instance of an AtomCache that is pointed to the a particular path in the file system. 127 * 128 * @param pdbFilePath 129 * a directory in the file system to use as a location to cache files. 130 * @param cachePath 131 */ 132 public AtomCache(String pdbFilePath, String cachePath) { 133 134 logger.debug("Initialising AtomCache with pdbFilePath={}, cachePath={}",pdbFilePath, cachePath); 135 136 if (!pdbFilePath.endsWith(FILE_SEPARATOR)) { 137 pdbFilePath += FILE_SEPARATOR; 138 } 139 140 // we are caching the binary files that contain the PDBs gzipped 141 // that is the most memory efficient way of caching... 142 // set the input stream provider to caching mode 143 System.setProperty(InputStreamProvider.CACHE_PROPERTY, "true"); 144 145 setPath(pdbFilePath); 146 147 this.cachePath = cachePath; 148 149 fetchBehavior = FetchBehavior.DEFAULT; 150 obsoleteBehavior = ObsoleteBehavior.DEFAULT; 151 152 currentlyLoading.clear(); 153 params = new FileParsingParameters(); 154 155 setUseMmCif(true); 156 157 } 158 159 /** 160 * @param isSplit Ignored 161 * @deprecated isSplit parameter is ignored (4.0.0) 162 */ 163 @Deprecated 164 public AtomCache(String pdbFilePath,boolean isSplit) { 165 this(pdbFilePath); 166 } 167 /** 168 * @param isSplit Ignored 169 * @deprecated isSplit parameter is ignored (4.0.0) 170 */ 171 @Deprecated 172 public AtomCache(String pdbFilePath, String cachePath,boolean isSplit) { 173 this(pdbFilePath,cachePath); 174 } 175 176 /** 177 * Creates a new AtomCache object based on the provided UserConfiguration. 178 * 179 * @param config 180 * the UserConfiguration to use for this cache. 181 */ 182 public AtomCache(UserConfiguration config) { 183 this(config.getPdbFilePath(), config.getCacheFilePath()); 184 fetchBehavior = config.getFetchBehavior(); 185 obsoleteBehavior = config.getObsoleteBehavior(); 186 useMmCif = config.getFileFormat().equals( UserConfiguration.MMCIF_FORMAT ); 187 } 188 189 /** 190 * Returns the CA atoms for the provided name. See {@link #getStructure(String)} for supported naming conventions. 191 * <p> 192 * This method only works with protein chains. Use {@link #getRepresentativeAtoms(String)} 193 * for a more general solution. 194 * @param name 195 * @return an array of Atoms. 196 * @throws IOException 197 * @throws StructureException 198 * @see 199 */ 200 public Atom[] getAtoms(String name) throws IOException, StructureException { 201 return getAtoms(new StructureName(name)); 202 } 203 public Atom[] getAtoms(StructureIdentifier name) throws IOException, StructureException { 204 205 Atom[] atoms = null; 206 207 // System.out.println("loading " + name); 208 Structure s = getStructure(name); 209 210 atoms = StructureTools.getAtomCAArray(s); 211 212 /* 213 * synchronized (cache){ cache.put(name, atoms); } 214 */ 215 216 return atoms; 217 } 218 /** 219 * Returns the representative atoms for the provided name. 220 * See {@link #getStructure(String)} for supported naming conventions. 221 * 222 * @param name 223 * @return an array of Atoms. 224 * @throws IOException 225 * @throws StructureException 226 * @see 227 */ 228 public Atom[] getRepresentativeAtoms(String name) throws IOException, StructureException { 229 return getRepresentativeAtoms(new StructureName(name)); 230 } 231 public Atom[] getRepresentativeAtoms(StructureIdentifier name) throws IOException, StructureException { 232 233 Atom[] atoms = null; 234 235 Structure s = getStructure(name); 236 237 atoms = StructureTools.getRepresentativeAtomArray(s); 238 239 /* 240 * synchronized (cache){ cache.put(name, atoms); } 241 */ 242 243 return atoms; 244 } 245 /** 246 * Loads the biological assembly for a given PDB ID and bioAssemblyId. If a bioAssemblyId > 0 is specified, the 247 * corresponding biological assembly file will be loaded. Note, the number of available biological unit files 248 * varies. Many entries don't have a biological assembly specified (i.e. NMR structures), many entries have only one 249 * biological assembly (bioAssemblyId=1), and a few structures have multiple biological assemblies. Set 250 * bioAssemblyFallback to true, to download the original PDB file in cases that a biological assembly file is not 251 * available. 252 * 253 * @param pdbId 254 * the PDB ID 255 * @param bioAssemblyId 256 * the 1-based index of the biological assembly (0 gets the asymmetric unit) 257 * @param bioAssemblyFallback 258 * if true, try reading original PDB file in case the biological assembly file is not available 259 * @return a structure object 260 * @throws IOException 261 * @throws StructureException 262 * @author Peter Rose 263 * @since 3.2 264 */ 265 public Structure getBiologicalAssembly(String pdbId, int bioAssemblyId, boolean bioAssemblyFallback) 266 throws StructureException, IOException { 267 268 if (bioAssemblyId < 0) { 269 throw new StructureException("bioAssemblyID must be nonnegative: " + pdbId + " bioAssemblyId " 270 + bioAssemblyId); 271 } 272 Structure s = StructureIO.getBiologicalAssembly(pdbId, bioAssemblyId,this); 273 274 if ( s == null && bioAssemblyFallback) 275 return StructureIO.getBiologicalAssembly(pdbId, 0,this); 276 277 return s; 278 } 279 280 /** 281 * Loads the default biological unit (e.g. *.pdb1.gz). If it is not available, 282 * the asymmetric unit will be loaded, i.e. for NMR structures. 283 * 284 * <p>Biological assemblies can also be accessed using 285 * <tt>getStructure("BIO:<i>[pdbId]</i>")</tt> 286 * @param pdbId 287 * the PDB ID 288 * @return a structure object 289 * @throws IOException 290 * @throws StructureException 291 * @since 4.2 292 */ 293 public Structure getBiologicalAssembly(String pdbId) throws StructureException, IOException { 294 int bioAssemblyId = 1; 295 return getBiologicalAssembly(pdbId, bioAssemblyId); 296 } 297 /** 298 * Loads the default biological unit (e.g. *.pdb1.gz). If it is not available, 299 * the asymmetric unit will be loaded, i.e. for NMR structures. 300 * 301 * @param pdbId 302 * the PDB ID 303 * @return a structure object 304 * @throws IOException 305 * @throws StructureException 306 * @since 3.2 307 * @deprecated Renamed to {@link #getBiologicalAssembly(String)} in 4.2 308 */ 309 @Deprecated 310 public Structure getBiologicalUnit(String pdbId) throws StructureException, IOException { 311 return getBiologicalAssembly(pdbId); 312 } 313 /** 314 * Loads the default biological unit (e.g. *.pdb1.gz). If it is not available, 315 * the asymmetric unit will be loaded, i.e. for NMR structures. 316 * 317 * @param pdbId 318 * the PDB ID 319 * @param bioAssemblyId 320 * the 1-based index of the biological assembly (0 gets the asymmetric unit) 321 * @return a structure object 322 * @throws IOException 323 * @throws StructureException 324 * @since 4.2 325 */ 326 public Structure getBiologicalAssembly(String pdbId,int bioAssemblyId) throws StructureException, IOException { 327 boolean bioAssemblyFallback = true; 328 return getBiologicalAssembly(pdbId, bioAssemblyId, bioAssemblyFallback); 329 } 330 331 /** 332 * Returns the path that contains the caching file for utility data, such as domain definitions. 333 * 334 * @return 335 */ 336 public String getCachePath() { 337 return cachePath; 338 } 339 340 public FileParsingParameters getFileParsingParams() { 341 return params; 342 } 343 344 /** 345 * Get the path that is used to cache PDB files. 346 * 347 * @return path to a directory 348 */ 349 public String getPath() { 350 return path; 351 } 352 353 public PDPProvider getPdpprovider() { 354 return pdpprovider; 355 } 356 357 /** 358 * Request a Structure based on a <i>name</i>. 359 * 360 * <pre> 361 * Formal specification for how to specify the <i>name</i>: 362 * 363 * name := pdbID 364 * | pdbID '.' chainID 365 * | pdbID '.' range 366 * | scopID 367 * range := '('? range (',' range)? ')'? 368 * | chainID 369 * | chainID '_' resNum '-' resNum 370 * pdbID := [0-9][a-zA-Z0-9]{3} 371 * chainID := [a-zA-Z0-9] 372 * scopID := 'd' pdbID [a-z_][0-9_] 373 * resNum := [-+]?[0-9]+[A-Za-z]? 374 * 375 * 376 * Example structures: 377 * 1TIM #whole structure 378 * 4HHB.C #single chain 379 * 4GCR.A_1-83 #one domain, by residue number 380 * 3AA0.A,B #two chains treated as one structure 381 * d2bq6a1 #scop domain 382 * </pre> 383 * 384 * With the additional set of rules: 385 * 386 * <ul> 387 * <li>If only a PDB code is provided, the whole structure will be return including ligands, but the first model 388 * only (for NMR). 389 * <li>Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A</li> 390 * <li>To specify a SCOP domain write a scopId e.g. d2bq6a1. Some flexibility can be allowed in SCOP domain names, 391 * see {@link #setStrictSCOP(boolean)}</li> 392 * <li>URLs are accepted as well</li> 393 * </ul> 394 * 395 * <p>Note that this method should not be used in StructureIdentifier 396 * implementations to avoid circular calls. 397 * @param name 398 * @return a Structure object, or null if name appears improperly formated (eg too short, etc) 399 * @throws IOException 400 * The PDB file cannot be cached due to IO errors 401 * @throws StructureException 402 * The name appeared valid but did not correspond to a structure. Also thrown by some submethods upon 403 * errors, eg for poorly formatted subranges. 404 */ 405 public Structure getStructure(String name) throws IOException, StructureException { 406 StructureName structureName = new StructureName(name); 407 408 return getStructure(structureName); 409 } 410 411 /** 412 * Get the structure corresponding to the given {@link StructureIdentifier}. 413 * Equivalent to calling {@link StructureIdentifier#loadStructure(AtomCache)} 414 * followed by {@link StructureIdentifier#reduce(Structure)}. 415 * 416 * <p>Note that this method should not be used in StructureIdentifier 417 * implementations to avoid circular calls. 418 * @param strucId 419 * @return 420 * @throws IOException 421 * @throws StructureException 422 */ 423 public Structure getStructure(StructureIdentifier strucId) throws IOException, StructureException { 424 Structure s = strucId.loadStructure(this); 425 Structure r = strucId.reduce(s); 426 r.setStructureIdentifier(strucId); 427 return r; 428 429// if (name.length() < 4) { 430// throw new IllegalArgumentException("Can't interpret IDs that are shorter than 4 characters!"); 431// } 432// 433// Structure n = null; 434// 435// boolean useChainNr = false; 436// boolean useDomainInfo = false; 437// String range = null; 438// int chainNr = -1; 439// 440// 441// StructureName structureName = new StructureName(name); 442// 443// String pdbId = null; 444// String chainId = null; 445// 446// if (name.length() == 4) { 447// 448// pdbId = name; 449// Structure s; 450// if (useMmCif) { 451// s = loadStructureFromCifByPdbId(pdbId); 452// } else { 453// s = loadStructureFromPdbByPdbId(pdbId); 454// } 455// return s; 456// } else if (structureName.isScopName()) { 457// 458// // return based on SCOP domain ID 459// return getStructureFromSCOPDomain(name); 460// } else if (structureName.isCathID()) { 461// return getStructureForCathDomain(structureName, CathFactory.getCathDatabase()); 462// } else if (name.length() == 6) { 463// // name is PDB.CHAINID style (e.g. 4hhb.A) 464// 465// pdbId = name.substring(0, 4); 466// if (name.substring(4, 5).equals(CHAIN_SPLIT_SYMBOL)) { 467// chainId = name.substring(5, 6); 468// } else if (name.substring(4, 5).equals(CHAIN_NR_SYMBOL)) { 469// 470// useChainNr = true; 471// chainNr = Integer.parseInt(name.substring(5, 6)); 472// } 473// 474// } else if (name.startsWith("file:/") || name.startsWith("http:/")) { 475// // this is a URL 476// 477// URL url = new URL(name); 478// return getStructureFromURL(url); 479// 480// 481// } else if (structureName.isPDPDomain()) { 482// 483// // this is a PDP domain definition 484// 485// return getPDPStructure(name); 486// 487// } else if (name.startsWith(BIOL_ASSEMBLY_IDENTIFIER)) { 488// 489// return getBioAssembly(name); 490// 491// } else if (name.length() > 6 && !name.startsWith(PDP_DOMAIN_IDENTIFIER) 492// && (name.contains(CHAIN_NR_SYMBOL) || name.contains(UNDERSCORE)) 493// && !(name.startsWith("file:/") || name.startsWith("http:/")) 494// 495// ) { 496// 497// // this is a name + range 498// 499// pdbId = name.substring(0, 4); 500// // this ID has domain split information... 501// useDomainInfo = true; 502// range = name.substring(5); 503// 504// } 505// 506// // System.out.println("got: >" + name + "< " + pdbId + " " + chainId + " useChainNr:" + useChainNr + " " 507// // +chainNr + " useDomainInfo:" + useDomainInfo + " " + range); 508// 509// if (pdbId == null) { 510// 511// return null; 512// } 513// 514// while (checkLoading(pdbId)) { 515// // waiting for loading to be finished... 516// 517// try { 518// Thread.sleep(100); 519// } catch (InterruptedException e) { 520// logger.error(e.getMessage()); 521// } 522// 523// } 524// 525// // long start = System.currentTimeMillis(); 526// 527// Structure s; 528// if (useMmCif) { 529// s = loadStructureFromCifByPdbId(pdbId); 530// } else { 531// s = loadStructureFromPdbByPdbId(pdbId); 532// } 533// 534// // long end = System.currentTimeMillis(); 535// // System.out.println("time to load " + pdbId + " " + (end-start) + "\t size :" + 536// // StructureTools.getNrAtoms(s) + "\t cached: " + cache.size()); 537// 538// if (chainId == null && chainNr < 0 && range == null) { 539// // we only want the 1st model in this case 540// n = StructureTools.getReducedStructure(s, -1); 541// } else { 542// 543// if (useChainNr) { 544// // System.out.println("using ChainNr"); 545// n = StructureTools.getReducedStructure(s, chainNr); 546// } else if (useDomainInfo) { 547// // System.out.println("calling getSubRanges"); 548// n = StructureTools.getSubRanges(s, range); 549// } else { 550// // System.out.println("reducing Chain Id " + chainId); 551// n = StructureTools.getReducedStructure(s, chainId); 552// } 553// } 554// 555// 556// 557// n.setName(name); 558// return n; 559 560 } 561 562 /** 563 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 564 * 565 * @param domain 566 * a SCOP domain 567 * @return a Structure object 568 * @throws IOException 569 * @throws StructureException 570 */ 571 public Structure getStructureForDomain(ScopDomain domain) throws IOException, StructureException { 572 return getStructureForDomain(domain, ScopFactory.getSCOP()); 573 } 574 575 /** 576 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 577 * 578 * @param domain 579 * a SCOP domain 580 * @param scopDatabase 581 * A {@link ScopDatabase} to use 582 * @return a Structure object 583 * @throws IOException 584 * @throws StructureException 585 */ 586 public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase) throws IOException, 587 StructureException { 588 return getStructureForDomain(domain, scopDatabase, false); 589 } 590 591 /** 592 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 593 * 594 * @param domain 595 * a SCOP domain 596 * @param scopDatabase 597 * A {@link ScopDatabase} to use 598 * @param strictLigandHandling 599 * If set to false, hetero-atoms are included if and only if they belong to a chain to which the SCOP 600 * domain belongs; if set to true, hetero-atoms are included if and only if they are strictly within the 601 * definition (residue numbers) of the SCOP domain 602 * @return a Structure object 603 * @throws IOException 604 * @throws StructureException 605 */ 606 public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase, boolean strictLigandHandling) 607 throws IOException, StructureException { 608 609 String pdbId = domain.getPdbId(); 610 Structure fullStructure = getStructureForPdbId(pdbId); 611 Structure structure = domain.reduce(fullStructure); 612 613 // TODO It would be better to move all of this into the reduce method, 614 // but that would require ligand handling properties in StructureIdentifiers 615 616 // because ligands sometimes occur after TER records in PDB files, we may need to add some ligands back in 617 // specifically, we add a ligand if and only if it occurs within the domain 618 AtomPositionMap map = null; 619 List<ResidueRangeAndLength> rrs = null; 620 if (strictLigandHandling) { 621 map = new AtomPositionMap(StructureTools.getAllAtomArray(fullStructure), AtomPositionMap.ANYTHING_MATCHER); 622 rrs = ResidueRangeAndLength.parseMultiple(domain.getRanges(), map); 623 } 624 for (Chain chain : fullStructure.getChains()) { 625 if (!structure.hasChain(chain.getChainID())) { 626 continue; // we can't do anything with a chain our domain 627 } 628 // doesn't contain 629 Chain newChain = structure.getChainByPDB(chain.getChainID()); 630 List<Group> ligands = StructureTools.filterLigands(chain.getAtomGroups()); 631 for (Group group : ligands) { 632 boolean shouldContain = true; 633 if (strictLigandHandling) { 634 shouldContain = false; // whether the ligand occurs within the domain 635 for (ResidueRange rr : rrs) { 636 if (rr.contains(group.getResidueNumber(), map)) { 637 shouldContain = true; 638 } 639 } 640 } 641 boolean alreadyContains = newChain.getAtomGroups().contains(group); // we don't want to add duplicate 642 // ligands 643 if (shouldContain && !alreadyContains) { 644 newChain.addGroup(group); 645 } 646 } 647 } 648 649 // build a more meaningful description for the new structure 650 StringBuilder header = new StringBuilder(); 651 header.append(domain.getClassificationId()); 652 if (scopDatabase != null) { 653 int sf = domain.getSuperfamilyId(); 654 ScopDescription description = scopDatabase.getScopDescriptionBySunid(sf); 655 if (description != null) { 656 header.append(" | "); 657 header.append(description.getDescription()); 658 } 659 } 660 structure.getPDBHeader().setDescription(header.toString()); 661 662 return structure; 663 664 } 665 666 /** 667 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 668 * 669 * @param scopId 670 * a SCOP Id 671 * @return a Structure object 672 * @throws IOException 673 * @throws StructureException 674 */ 675 public Structure getStructureForDomain(String scopId) throws IOException, StructureException { 676 return getStructureForDomain(scopId, ScopFactory.getSCOP()); 677 } 678 679 /** 680 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 681 * 682 * @param scopId 683 * a SCOP Id 684 * @param scopDatabase 685 * A {@link ScopDatabase} to use 686 * @return a Structure object 687 * @throws IOException 688 * @throws StructureException 689 */ 690 public Structure getStructureForDomain(String scopId, ScopDatabase scopDatabase) throws IOException, 691 StructureException { 692 ScopDomain domain = scopDatabase.getDomainByScopID(scopId); 693 return getStructureForDomain(domain, scopDatabase); 694 } 695 696 /** 697 * Does the cache automatically download files that are missing from the local installation from the PDB FTP site? 698 * 699 * @return flag 700 * @deprecated Use {@link #getFetchBehavior()} 701 */ 702 @Deprecated 703 public boolean isAutoFetch() { 704 return fetchBehavior != FetchBehavior.LOCAL_ONLY; 705 } 706 707 /** 708 * <b>N.B.</b> This feature won't work unless the structure wasn't found & autoFetch is set to <code>true</code>. 709 * 710 * @return the fetchCurrent 711 * @deprecated Use {@link FileParsingParameters#getObsoleteBehavior()} instead (4.0.0) 712 */ 713 @Deprecated 714 public boolean isFetchCurrent() { 715 return getObsoleteBehavior() == ObsoleteBehavior.FETCH_CURRENT; 716 } 717 718 /** 719 * forces the cache to fetch the file if its status is OBSOLETE. This feature has a higher priority than 720 * {@link #setFetchCurrent(boolean)}.<br> 721 * <b>N.B.</b> This feature won't work unless the structure wasn't found & autoFetch is set to <code>true</code>. 722 * 723 * @return the fetchFileEvenIfObsolete 724 * @author Amr AL-Hossary 725 * @see #fetchCurrent 726 * @since 3.0.2 727 * @deprecated Use {@link FileParsingParameters#getObsoleteBehavior()} instead (4.0.0) 728 */ 729 @Deprecated 730 public boolean isFetchFileEvenIfObsolete() { 731 return getObsoleteBehavior() == ObsoleteBehavior.FETCH_OBSOLETE; 732 } 733 734 735 /** 736 * Scop handling was changed in 4.2.0. For behaviour equivalent to 737 * strictSCOP==true, use {@link ScopDatabase#getDomainByScopID(String)}. 738 * For strictSCOP==False, create a {@link StructureName} or use 739 * {@link StructureName#guessScopDomain(String, ScopDatabase)} explicitely. 740 * 741 * @return false; ignored 742 * @deprecated since 4.2 743 */ 744 @Deprecated 745 public boolean isStrictSCOP() { 746 return false; 747 } 748 749 /** 750 * Send a signal to the cache that the system is shutting down. Notifies underlying SerializableCache instances to 751 * flush themselves... 752 */ 753 public void notifyShutdown() { 754 // System.out.println(" AtomCache got notify shutdown.."); 755 if (pdpprovider != null) { 756 if (pdpprovider instanceof RemotePDPProvider) { 757 RemotePDPProvider remotePDP = (RemotePDPProvider) pdpprovider; 758 remotePDP.flushCache(); 759 } 760 } 761 762 // todo: use a SCOP implementation that is backed by SerializableCache 763 ScopDatabase scopInstallation = ScopFactory.getSCOP(); 764 if (scopInstallation != null) { 765 if (scopInstallation instanceof CachedRemoteScopInstallation) { 766 CachedRemoteScopInstallation cacheScop = (CachedRemoteScopInstallation) scopInstallation; 767 cacheScop.flushCache(); 768 } 769 } 770 771 } 772 773 /** 774 * Does the cache automatically download files that are missing from the local installation from the PDB FTP site? 775 * 776 * @param autoFetch 777 * flag 778 * @deprecated Use {@link #getFetchBehavior()} 779 */ 780 @Deprecated 781 public void setAutoFetch(boolean autoFetch) { 782 if(autoFetch) { 783 setFetchBehavior(FetchBehavior.DEFAULT); 784 } else { 785 setFetchBehavior(FetchBehavior.LOCAL_ONLY); 786 } 787 } 788 789 /** 790 * set the location at which utility data should be cached. 791 * 792 * @param cachePath 793 */ 794 public void setCachePath(String cachePath) { 795 this.cachePath = cachePath; 796 } 797 798 /** 799 * if enabled, the reader searches for the newest possible PDB ID, if not present in he local installation. The 800 * {@link #setFetchFileEvenIfObsolete(boolean)} function has a higher priority than this function.<br> 801 * <b>N.B.</b> This feature won't work unless the structure wasn't found & autoFetch is set to <code>true</code>. 802 * 803 * @param fetchCurrent 804 * the fetchCurrent to set 805 * @author Amr AL-Hossary 806 * @see #setFetchFileEvenIfObsolete(boolean) 807 * @since 3.0.2 808 * @deprecated Use {@link FileParsingParameters#setObsoleteBehavior()} instead (4.0.0) 809 */ 810 @Deprecated 811 public void setFetchCurrent(boolean fetchNewestCurrent) { 812 if(fetchNewestCurrent) { 813 setObsoleteBehavior(ObsoleteBehavior.FETCH_CURRENT); 814 } else { 815 if(getObsoleteBehavior() == ObsoleteBehavior.FETCH_CURRENT) { 816 setObsoleteBehavior(ObsoleteBehavior.DEFAULT); 817 } 818 } 819 } 820 821 /** 822 * <b>N.B.</b> This feature won't work unless the structure wasn't found & autoFetch is set to <code>true</code>. 823 * 824 * @param fetchFileEvenIfObsolete 825 * the fetchFileEvenIfObsolete to set 826 * @deprecated Use {@link FileParsingParameters#setObsoleteBehavior()} instead (4.0.0) 827 */ 828 @Deprecated 829 public void setFetchFileEvenIfObsolete(boolean fetchFileEvenIfObsolete) { 830 if(fetchFileEvenIfObsolete) { 831 setObsoleteBehavior(ObsoleteBehavior.FETCH_OBSOLETE); 832 } else { 833 if(getObsoleteBehavior() == ObsoleteBehavior.FETCH_OBSOLETE) { 834 setObsoleteBehavior(ObsoleteBehavior.DEFAULT); 835 } 836 } 837 } 838 839 public void setFileParsingParams(FileParsingParameters params) { 840 this.params = params; 841 } 842 843 844 /** 845 * <b>[Optional]</b> This method changes the behavior when obsolete entries 846 * are requested. Current behaviors are: 847 * <ul> 848 * <li>{@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION} 849 * Throw a {@link StructureException} (the default) 850 * <li>{@link ObsoleteBehavior#FETCH_OBSOLETE FETCH_OBSOLETE} 851 * Load the requested ID from the PDB's obsolete repository 852 * <li>{@link ObsoleteBehavior#FETCH_CURRENT FETCH_CURRENT} 853 * Load the most recent version of the requested structure 854 * 855 * <p>This setting may be silently ignored by implementations which do not have 856 * access to the server to determine whether an entry is obsolete, such as 857 * if {@link #isAutoFetch()} is false. Note that an obsolete entry may still be 858 * returned even this is FETCH_CURRENT if the entry is found locally. 859 * 860 * @param fetchFileEvenIfObsolete Whether to fetch obsolete records 861 * @see #setFetchCurrent(boolean) 862 * @since 4.0.0 863 */ 864 public void setObsoleteBehavior(ObsoleteBehavior behavior) { 865 obsoleteBehavior = behavior; 866 } 867 868 /** 869 * Returns how this instance deals with obsolete entries. Note that this 870 * setting may be ignored by some implementations or in some situations, 871 * such as when {@link #isAutoFetch()} is false. 872 * 873 * <p>For most implementations, the default value is 874 * {@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION}. 875 * 876 * @return The ObsoleteBehavior 877 * @since 4.0.0 878 */ 879 public ObsoleteBehavior getObsoleteBehavior() { 880 return obsoleteBehavior; 881 } 882 883 /** 884 * Get the behavior for fetching files from the server 885 * @return 886 */ 887 public FetchBehavior getFetchBehavior() { 888 return fetchBehavior; 889 } 890 /** 891 * Set the behavior for fetching files from the server 892 * @param fetchBehavior 893 */ 894 public void setFetchBehavior(FetchBehavior fetchBehavior) { 895 this.fetchBehavior = fetchBehavior; 896 } 897 898 /** 899 * Set the path that is used to cache PDB files. 900 * 901 * @param path 902 * to a directory 903 */ 904 public void setPath(String path) { 905 this.path = FileDownloadUtils.expandUserHome(path); 906 } 907 908 public void setPdpprovider(PDPProvider pdpprovider) { 909 this.pdpprovider = pdpprovider; 910 } 911 912 913 /** 914 * This method does nothing. 915 * 916 * Scop handling was changed in 4.2.0. For behaviour equivalent to 917 * strictSCOP==true, use {@link ScopDatabase#getDomainByScopID(String)}. 918 * For strictSCOP==False, create a {@link StructureName} or use 919 * {@link StructureName#guessScopDomain(String, ScopDatabase)} explicitely. 920 * 921 * @param strictSCOP Ignored 922 * @deprecated Removed in 4.2.0 923 */ 924 @Deprecated 925 public void setStrictSCOP(boolean ignored) {} 926 927 /** 928 * @return the useMmCif 929 */ 930 public boolean isUseMmCif() { 931 return useMmCif; 932 } 933 934 /** 935 * @param useMmCif 936 * the useMmCif to set 937 */ 938 public void setUseMmCif(boolean useMmCif) { 939 this.useMmCif = useMmCif; 940 941 if ( useMmCif) { 942 // get bio assembly from mmcif file 943 944 BioUnitDataProviderFactory.setBioUnitDataProvider(MmCifBiolAssemblyProvider.class); 945 946 } else { 947 948 BioUnitDataProviderFactory.setBioUnitDataProvider(PDBBioUnitDataProvider.class); 949 950 } 951 } 952 953 private boolean checkLoading(String name) { 954 return currentlyLoading.contains(name); 955 956 } 957 958 /** 959 * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the the {@link CathDatabase} 960 * at {@link CathFactory#getCathDatabase()}. 961 */ 962 public Structure getStructureForCathDomain(StructureName structureName) throws IOException, StructureException { 963 return getStructureForCathDomain(structureName, CathFactory.getCathDatabase()); 964 } 965 966 /** 967 * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the specified {@link CathDatabase}. 968 */ 969 public Structure getStructureForCathDomain(StructureName structureName, CathDatabase cathInstall) throws IOException, StructureException { 970 971 CathDomain cathDomain = cathInstall.getDomainByCathId(structureName.getIdentifier()); 972 973 Structure s = getStructureForPdbId(cathDomain.getIdentifier()); 974 Structure n = cathDomain.reduce(s); 975 976 // add the ligands of the chain... 977 978 Chain newChain = n.getChainByPDB(structureName.getChainId()); 979 Chain origChain = s.getChainByPDB(structureName.getChainId()); 980 List<Group> ligands = origChain.getAtomLigands(); 981 982 for (Group g : ligands) { 983 if (!newChain.getAtomGroups().contains(g)) { 984 newChain.addGroup(g); 985 } 986 } 987 988 return n; 989 } 990 991 protected void flagLoading(String name) { 992 if (!currentlyLoading.contains(name)) { 993 994 currentlyLoading.add(name); 995 } 996 } 997 998 protected void flagLoadingFinished(String name) { 999 1000 currentlyLoading.remove(name); 1001 } 1002 1003 /** 1004 * Loads a structure directly by PDB ID 1005 * @param pdbId 1006 * @return 1007 * @throws IOException 1008 * @throws StructureException 1009 */ 1010 public Structure getStructureForPdbId(String pdbId) throws IOException, StructureException { 1011 if(pdbId == null) 1012 return null; 1013 if(pdbId.length() != 4) { 1014 throw new StructureException("Unrecognized PDB ID: "+pdbId); 1015 } 1016 while (checkLoading(pdbId)) { 1017 // waiting for loading to be finished... 1018 1019 try { 1020 Thread.sleep(100); 1021 } catch (InterruptedException e) { 1022 logger.error(e.getMessage()); 1023 } 1024 1025 } 1026 1027 Structure s; 1028 if (useMmCif) { 1029 s = loadStructureFromCifByPdbId(pdbId); 1030 } else { 1031 s = loadStructureFromPdbByPdbId(pdbId); 1032 } 1033 return s; 1034 } 1035 1036 1037 protected Structure loadStructureFromCifByPdbId(String pdbId) throws IOException, StructureException { 1038 1039 Structure s; 1040 flagLoading(pdbId); 1041 try { 1042 MMCIFFileReader reader = new MMCIFFileReader(path); 1043 reader.setFetchBehavior(fetchBehavior); 1044 reader.setObsoleteBehavior(obsoleteBehavior); 1045 1046 reader.setFileParsingParameters(params); 1047 1048 s = reader.getStructureById(pdbId.toLowerCase()); 1049 1050 } finally { 1051 flagLoadingFinished(pdbId); 1052 } 1053 1054 return s; 1055 } 1056 1057 protected Structure loadStructureFromPdbByPdbId(String pdbId) throws IOException, StructureException { 1058 1059 Structure s; 1060 flagLoading(pdbId); 1061 try { 1062 PDBFileReader reader = new PDBFileReader(path); 1063 reader.setFetchBehavior(fetchBehavior); 1064 reader.setObsoleteBehavior(obsoleteBehavior); 1065 1066 reader.setFileParsingParameters(params); 1067 1068 s = reader.getStructureById(pdbId.toLowerCase()); 1069 1070 } finally { 1071 flagLoadingFinished(pdbId); 1072 } 1073 1074 return s; 1075 } 1076 1077}