001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.util; 022 023import java.io.IOException; 024import java.util.ArrayList; 025import java.util.Collection; 026import java.util.Collections; 027import java.util.List; 028import java.util.TreeSet; 029 030import org.biojava.nbio.core.util.InputStreamProvider; 031import org.biojava.nbio.structure.*; 032import org.biojava.nbio.structure.align.client.StructureName; 033import org.biojava.nbio.structure.cath.CathDatabase; 034import org.biojava.nbio.structure.cath.CathDomain; 035import org.biojava.nbio.structure.cath.CathFactory; 036import org.biojava.nbio.structure.io.BcifFileReader; 037import org.biojava.nbio.structure.io.CifFileReader; 038import org.biojava.nbio.structure.io.FileParsingParameters; 039import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior; 040import org.biojava.nbio.structure.io.LocalPDBDirectory.ObsoleteBehavior; 041import org.biojava.nbio.structure.io.PDBFileReader; 042import org.biojava.nbio.core.util.FileDownloadUtils; 043import org.biojava.nbio.structure.io.StructureFiletype; 044import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder; 045import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 046import org.biojava.nbio.structure.scop.ScopDatabase; 047import org.biojava.nbio.structure.scop.ScopDescription; 048import org.biojava.nbio.structure.scop.ScopDomain; 049import org.biojava.nbio.structure.scop.ScopFactory; 050import org.slf4j.Logger; 051import org.slf4j.LoggerFactory; 052 053/** 054 * A utility class that provides easy access to Structure objects. If you are running a script that is frequently 055 * re-using the same PDB structures, the AtomCache keeps an in-memory cache of the files for quicker access. The cache 056 * is a soft-cache, this means it won't cause out of memory exceptions, but garbage collects the data if the Java 057 * virtual machine needs to free up space. The AtomCache is thread-safe. 058 * 059 * @author Andreas Prlic 060 * @author Spencer Bliven 061 * @author Peter Rose 062 * @since 3.0 063 */ 064public class AtomCache { 065 private static final Logger logger = LoggerFactory.getLogger(AtomCache.class); 066 067 /** 068 * The default output bioassembly style: if true the bioassemblies are multimodel, 069 * if false the bioassemblies are flat with renamed chains for symmetry-partners. 070 */ 071 public static final boolean DEFAULT_BIOASSEMBLY_STYLE = false; 072 073 public static final String BIOL_ASSEMBLY_IDENTIFIER = "BIO:"; 074 public static final String CHAIN_NR_SYMBOL = ":"; 075 public static final String CHAIN_SPLIT_SYMBOL = "."; 076 public static final String UNDERSCORE = "_"; 077 078 private static final String FILE_SEPARATOR = System.getProperty("file.separator"); 079 080 protected FileParsingParameters params; 081 private FetchBehavior fetchBehavior; 082 private ObsoleteBehavior obsoleteBehavior; 083 private String cachePath; 084 085 // make sure IDs are loaded uniquely 086 private final Collection<String> currentlyLoading = Collections.synchronizedCollection(new TreeSet<>()); 087 088 private String path; 089 private StructureFiletype filetype = StructureFiletype.BCIF; 090 091 /** 092 * Default AtomCache constructor. 093 * 094 * Usually stores files in a temp directory, but this can be overriden by setting the PDB_DIR variable at runtime. 095 * 096 * @see UserConfiguration#UserConfiguration() 097 */ 098 public AtomCache() { 099 this(new UserConfiguration()); 100 } 101 102 /** 103 * Creates an instance of an AtomCache that is pointed to the a particular path in the file system. It will use the same value for pdbFilePath and cachePath. 104 * 105 * @param pdbFilePath 106 * a directory in the file system to use as a location to cache files. 107 */ 108 public AtomCache(String pdbFilePath) { 109 this(pdbFilePath,pdbFilePath); 110 } 111 112 /** 113 * Creates an instance of an AtomCache that is pointed to the a particular path in the file system. 114 * 115 * @param pdbFilePath 116 * a directory in the file system to use as a location to cache files. 117 * @param cachePath 118 */ 119 public AtomCache(String pdbFilePath, String cachePath) { 120 logger.debug("Initialising AtomCache with pdbFilePath={}, cachePath={}", pdbFilePath, cachePath); 121 if (!pdbFilePath.endsWith(FILE_SEPARATOR)) { 122 pdbFilePath += FILE_SEPARATOR; 123 } 124 125 // we are caching the binary files that contain the PDBs gzipped 126 // that is the most memory efficient way of caching... 127 // set the input stream provider to caching mode 128 System.setProperty(InputStreamProvider.CACHE_PROPERTY, "true"); 129 130 setPath(pdbFilePath); 131 132 this.cachePath = cachePath; 133 134 fetchBehavior = FetchBehavior.DEFAULT; 135 obsoleteBehavior = ObsoleteBehavior.DEFAULT; 136 137 currentlyLoading.clear(); 138 params = new FileParsingParameters(); 139 140 setFiletype(StructureFiletype.BCIF); 141 } 142 143 /** 144 * Creates a new AtomCache object based on the provided UserConfiguration. 145 * 146 * @param config 147 * the UserConfiguration to use for this cache. 148 */ 149 public AtomCache(UserConfiguration config) { 150 this(config.getPdbFilePath(), config.getCacheFilePath()); 151 fetchBehavior = config.getFetchBehavior(); 152 obsoleteBehavior = config.getObsoleteBehavior(); 153 filetype = config.getStructureFiletype(); 154 } 155 156 /** 157 * Returns the CA atoms for the provided name. See {@link #getStructure(String)} for supported naming conventions. 158 * <p> 159 * This method only works with protein chains. Use {@link #getRepresentativeAtoms(String)} 160 * for a more general solution. 161 * @param name 162 * @return an array of Atoms. 163 * @throws IOException 164 * @throws StructureException 165 */ 166 public Atom[] getAtoms(String name) throws IOException, StructureException { 167 return getAtoms(new StructureName(name)); 168 } 169 170 public Atom[] getAtoms(StructureIdentifier name) throws IOException, StructureException { 171 Atom[] atoms; 172 173 // System.out.println("loading " + name); 174 Structure s = getStructure(name); 175 atoms = StructureTools.getAtomCAArray(s); 176 177 /* 178 * synchronized (cache){ cache.put(name, atoms); } 179 */ 180 return atoms; 181 } 182 183 /** 184 * Returns the representative atoms for the provided name. 185 * See {@link #getStructure(String)} for supported naming conventions. 186 * 187 * @param name 188 * @return an array of Atoms. 189 * @throws IOException 190 * @throws StructureException 191 */ 192 public Atom[] getRepresentativeAtoms(String name) throws IOException, StructureException { 193 return getRepresentativeAtoms(new StructureName(name)); 194 } 195 196 public Atom[] getRepresentativeAtoms(StructureIdentifier name) throws IOException, StructureException { 197 Atom[] atoms; 198 199 Structure s = getStructure(name); 200 atoms = StructureTools.getRepresentativeAtomArray(s); 201 202 /* 203 * synchronized (cache){ cache.put(name, atoms); } 204 */ 205 return atoms; 206 } 207 208 /** 209 * Returns the biological assembly for a given PDB ID and bioAssemblyId, by building the 210 * assembly from the biounit annotations found in {@link Structure#getPDBHeader()} 211 * <p> 212 * Note, the number of available biological unit files 213 * varies. Many entries don't have a biological assembly specified (e.g. NMR structures), many entries have only one 214 * biological assembly (bioAssemblyId=1), and some structures have multiple biological assemblies. 215 * 216 * @param pdbId 217 * the PDB ID 218 * @param bioAssemblyId 219 * the 1-based index of the biological assembly (0 gets the asymmetric unit) 220 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 221 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 222 * @return a structure object 223 * @throws IOException 224 * @throws StructureException if biassemblyId < 0 or other problems while loading structure 225 * @since 3.2 226 */ 227 public Structure getBiologicalAssembly(String pdbId, int bioAssemblyId, boolean multiModel) 228 throws StructureException, IOException { 229 return getBiologicalAssembly(new PdbId(pdbId), bioAssemblyId, multiModel); 230 } 231 232 /** 233 * Returns the biological assembly for a given PDB ID and bioAssemblyId, by building the 234 * assembly from the biounit annotations found in {@link Structure#getPDBHeader()} 235 * <p> 236 * Note, the number of available biological unit files 237 * varies. Many entries don't have a biological assembly specified (e.g. NMR structures), many entries have only one 238 * biological assembly (bioAssemblyId=1), and some structures have multiple biological assemblies. 239 * 240 * @param pdbId 241 * the PDB ID 242 * @param bioAssemblyId 243 * the 1-based index of the biological assembly (0 gets the asymmetric unit) 244 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 245 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 246 * @return a structure object 247 * @throws IOException 248 * @throws StructureException if biassemblyId < 0 or other problems while loading structure 249 * @since 6.0.0 250 */ 251 public Structure getBiologicalAssembly(PdbId pdbId, int bioAssemblyId, boolean multiModel) 252 throws StructureException, IOException { 253 if (bioAssemblyId < 0) { 254 throw new StructureException("bioAssemblyID must be nonnegative: " + pdbId + " bioAssemblyId " 255 + bioAssemblyId); 256 } 257 258 boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly(); 259 260 if (!getFileParsingParams().isParseBioAssembly()) { 261 getFileParsingParams().setParseBioAssembly(true); 262 } 263 264 Structure asymUnit = getStructureForPdbId(pdbId); 265 266 getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly); 267 268 if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies() == null) { 269 logger.info("No bioassembly information found for {}, returning asymmetric unit as biological assembly", pdbId); 270 return asymUnit; 271 } 272 273 // 0 ... asym unit 274 if (bioAssemblyId == 0) { 275 logger.info("Requested biological assembly 0 for PDB id {}, returning asymmetric unit", pdbId); 276 return asymUnit; 277 } 278 // does it exist? 279 if (!asymUnit.getPDBHeader().getBioAssemblies().containsKey(bioAssemblyId)) { 280 throw new StructureException("No biological assembly available for biological assembly id " + bioAssemblyId + " of " + pdbId); 281 } 282 283 List<BiologicalAssemblyTransformation> transformations = 284 asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms(); 285 286 287 if (transformations == null || transformations.size() == 0) { 288 throw new StructureException("Could not load transformations to recreate biological assembly id " + bioAssemblyId + " of " + pdbId); 289 } 290 291 BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); 292 293 // if we use mmcif or mmtf, then we need to pass useAsymIds=true 294 boolean useAsymIds = false; 295 if (filetype == StructureFiletype.CIF || filetype == StructureFiletype.BCIF || filetype == StructureFiletype.MMTF) { 296 useAsymIds = true; 297 } 298 return builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel); 299 } 300 301 /** 302 * Returns the default biological unit (bioassemblyId=1, known in PDB as pdb1.gz). If it is not available, 303 * the asymmetric unit will be returned, e.g. for NMR structures. 304 * 305 * <p>Biological assemblies can also be accessed using 306 * <code>getStructure("BIO:<i>[pdbId]</i>")</code> 307 * @param pdbId the PDB id 308 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 309 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 310 * @return a structure object 311 * @throws IOException 312 * @throws StructureException 313 * @since 4.2 314 */ 315 public Structure getBiologicalAssembly(String pdbId, boolean multiModel) throws StructureException, IOException { 316 boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly(); 317 318 if (!getFileParsingParams().isParseBioAssembly()) { 319 getFileParsingParams().setParseBioAssembly(true); 320 } 321 322 Structure asymUnit = getStructureForPdbId(pdbId); 323 getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly); 324 325 326 if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies() == null) { 327 logger.info("No bioassembly information found for {}, returning asymmetric unit as biological assembly", pdbId); 328 return asymUnit; 329 } 330 331 int bioAssemblyId = 1; 332 333 // does it exist? 334 if (!asymUnit.getPDBHeader().getBioAssemblies().containsKey(bioAssemblyId)) { 335 return asymUnit; 336 } 337 338 List<BiologicalAssemblyTransformation> transformations = 339 asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms(); 340 341 342 if (transformations == null || transformations.size() == 0) { 343 throw new StructureException("Could not load transformations to recreate biological assembly id " + bioAssemblyId + " of " + pdbId); 344 } 345 346 BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); 347 348 // if we use mmcif or mmtf, then we need to pass useAsymIds=true 349 boolean useAsymIds = false; 350 if (filetype == StructureFiletype.CIF || filetype == StructureFiletype.BCIF || filetype == StructureFiletype.MMTF) { 351 useAsymIds = true; 352 } 353 return builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel); 354 } 355 356 /** 357 * Returns all biological assemblies for given PDB id. 358 * @param pdbId 359 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 360 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 361 * @return 362 * @throws StructureException 363 * @throws IOException 364 * @since 5.0 365 */ 366 public List<Structure> getBiologicalAssemblies(String pdbId, boolean multiModel) throws StructureException, IOException { 367 List<Structure> assemblies = new ArrayList<>(); 368 369 boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly(); 370 371 if (!getFileParsingParams().isParseBioAssembly()) { 372 getFileParsingParams().setParseBioAssembly(true); 373 } 374 375 Structure asymUnit = getStructureForPdbId(pdbId); 376 getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly); 377 378 if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies() == null) { 379 logger.info("No bioassembly information found for {}, returning asymmetric unit as the only biological assembly", pdbId); 380 assemblies.add(asymUnit); 381 return assemblies; 382 } 383 384 for (int bioAssemblyId : asymUnit.getPDBHeader().getBioAssemblies().keySet()) { 385 List<BiologicalAssemblyTransformation> transformations = 386 asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms(); 387 388 if (transformations == null || transformations.size() == 0) { 389 logger.info("Could not load transformations to recreate biological assembly id {} of {}. Assembly " + 390 "id will be missing in biological assemblies.", bioAssemblyId, pdbId); 391 continue; 392 } 393 394 BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); 395 396 // if we use mmcif or mmtf, then we need to pass useAsymIds=true 397 boolean useAsymIds = false; 398 if (filetype == StructureFiletype.CIF || filetype == StructureFiletype.BCIF || filetype == StructureFiletype.MMTF) { 399 useAsymIds = true; 400 } 401 Structure s = builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel); 402 assemblies.add(s); 403 } 404 return assemblies; 405 } 406 407 /** 408 * Returns the path that contains the caching file for utility data, such as domain definitions. 409 * 410 * @return 411 */ 412 public String getCachePath() { 413 return cachePath; 414 } 415 416 public FileParsingParameters getFileParsingParams() { 417 return params; 418 } 419 420 /** 421 * Get the path that is used to cache PDB files. 422 * 423 * @return path to a directory 424 */ 425 public String getPath() { 426 return path; 427 } 428 429 /** 430 * Request a Structure based on a <i>name</i>. 431 * 432 * <pre> 433 * Formal specification for how to specify the <i>name</i>: 434 * 435 * name := pdbID 436 * | pdbID '.' chainID 437 * | pdbID '.' range 438 * | scopID 439 * range := '('? range (',' range)? ')'? 440 * | chainID 441 * | chainID '_' resNum '-' resNum 442 * pdbID := [1-9][a-zA-Z0-9]{3} 443 * | PDB_[a-zA-Z0-9]{8} 444 * chainID := [a-zA-Z0-9] 445 * scopID := 'd' pdbID [a-z_][0-9_] 446 * resNum := [-+]?[0-9]+[A-Za-z]? 447 * 448 * 449 * Example structures: 450 * 1TIM #whole structure 451 * 4HHB.C #single chain 452 * 4GCR.A_1-83 #one domain, by residue number 453 * 3AA0.A,B #two chains treated as one structure 454 * PDB_00001TIM #whole structure (extended format) 455 * PDB_00004HHB.C #single chain (extended format) 456 * PDB_00004GCR.A_1-83 #one domain, by residue number (extended format) 457 * PDB_00003AA0.A,B #two chains treated as one structure (extended format) 458 * d2bq6a1 #scop domain 459 * </pre> 460 * 461 * With the additional set of rules: 462 * 463 * <ul> 464 * <li>If only a PDB code is provided, the whole structure will be return including ligands, but the first model 465 * only (for NMR). 466 * <li>Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A</li> 467 * <li>To specify a SCOP domain write a scopId e.g. d2bq6a1. </li> 468 * <li>URLs are accepted as well</li> 469 * </ul> 470 * 471 * <p>Note that this method should not be used in StructureIdentifier 472 * implementations to avoid circular calls. 473 * @param name 474 * @return a Structure object, or null if name appears improperly formated (eg too short, etc) 475 * @throws IOException 476 * The PDB file cannot be cached due to IO errors 477 * @throws StructureException 478 * The name appeared valid but did not correspond to a structure. Also thrown by some submethods upon 479 * errors, eg for poorly formatted subranges. 480 */ 481 public Structure getStructure(String name) throws IOException, StructureException { 482 StructureName structureName = new StructureName(name); 483 return getStructure(structureName); 484 } 485 486 /** 487 * Get the structure corresponding to the given {@link StructureIdentifier}. 488 * Equivalent to calling {@link StructureIdentifier#loadStructure(AtomCache)} 489 * followed by {@link StructureIdentifier#reduce(Structure)}. 490 * 491 * <p>Note that this method should not be used in StructureIdentifier 492 * implementations to avoid circular calls. 493 * @param strucId 494 * @return 495 * @throws IOException 496 * @throws StructureException 497 */ 498 public Structure getStructure(StructureIdentifier strucId) throws IOException, StructureException { 499 Structure s = strucId.loadStructure(this); 500 Structure r = strucId.reduce(s); 501 r.setStructureIdentifier(strucId); 502 return r; 503 } 504 505 /** 506 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 507 * 508 * @param domain 509 * a SCOP domain 510 * @return a Structure object 511 * @throws IOException 512 * @throws StructureException 513 */ 514 public Structure getStructureForDomain(ScopDomain domain) throws IOException, StructureException { 515 return getStructureForDomain(domain, ScopFactory.getSCOP()); 516 } 517 518 /** 519 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 520 * 521 * @param domain 522 * a SCOP domain 523 * @param scopDatabase 524 * A {@link ScopDatabase} to use 525 * @return a Structure object 526 * @throws IOException 527 * @throws StructureException 528 */ 529 public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase) throws IOException, 530 StructureException { 531 return getStructureForDomain(domain, scopDatabase, false); 532 } 533 534 /** 535 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 536 * 537 * @param domain 538 * a SCOP domain 539 * @param scopDatabase 540 * A {@link ScopDatabase} to use 541 * @param strictLigandHandling 542 * If set to false, hetero-atoms are included if and only if they belong to a chain to which the SCOP 543 * domain belongs; if set to true, hetero-atoms are included if and only if they are strictly within the 544 * definition (residue numbers) of the SCOP domain 545 * @return a Structure object 546 * @throws IOException 547 * @throws StructureException 548 */ 549 public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase, boolean strictLigandHandling) 550 throws IOException, StructureException { 551 PdbId pdbId = domain.getPdbId(); 552 Structure fullStructure = getStructureForPdbId(pdbId); 553 Structure structure = domain.reduce(fullStructure); 554 555 // TODO It would be better to move all of this into the reduce method, 556 // but that would require ligand handling properties in StructureIdentifiers 557 558 // because ligands sometimes occur after TER records in PDB files, we may need to add some ligands back in 559 // specifically, we add a ligand if and only if it occurs within the domain 560 AtomPositionMap map = null; 561 List<ResidueRangeAndLength> rrs = null; 562 if (strictLigandHandling) { 563 map = new AtomPositionMap(StructureTools.getAllAtomArray(fullStructure), AtomPositionMap.ANYTHING_MATCHER); 564 rrs = ResidueRangeAndLength.parseMultiple(domain.getRanges(), map); 565 } 566 for (Chain chain : fullStructure.getNonPolyChains()) { 567 if (!structure.hasPdbChain(chain.getName())) { 568 continue; // we can't do anything with a chain our domain 569 } 570 571 Chain newChain; 572 if (!structure.hasNonPolyChain(chain.getId())) { 573 newChain = new ChainImpl(); 574 newChain.setId(chain.getId()); 575 newChain.setName(chain.getName()); 576 newChain.setEntityInfo(chain.getEntityInfo()); 577 structure.addChain(newChain); 578 } else { 579 newChain = structure.getNonPolyChain(chain.getId()); 580 } 581 582 List<Group> ligands = StructureTools.filterLigands(chain.getAtomGroups()); 583 for (Group group : ligands) { 584 boolean shouldContain = true; 585 if (strictLigandHandling) { 586 shouldContain = false; // whether the ligand occurs within the domain 587 for (ResidueRange rr : rrs) { 588 if (rr.contains(group.getResidueNumber(), map)) { 589 shouldContain = true; 590 } 591 } 592 } 593 boolean alreadyContains = newChain.getAtomGroups().contains(group); // we don't want to add duplicate 594 // ligands 595 if (shouldContain && !alreadyContains) { 596 newChain.addGroup(group); 597 } 598 } 599 } 600 601 // build a more meaningful description for the new structure 602 StringBuilder header = new StringBuilder(); 603 header.append(domain.getClassificationId()); 604 if (scopDatabase != null) { 605 int sf = domain.getSuperfamilyId(); 606 ScopDescription description = scopDatabase.getScopDescriptionBySunid(sf); 607 if (description != null) { 608 header.append(" | "); 609 header.append(description.getDescription()); 610 } 611 } 612 structure.getPDBHeader().setDescription(header.toString()); 613 614 return structure; 615 } 616 617 /** 618 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 619 * 620 * @param scopId 621 * a SCOP Id 622 * @return a Structure object 623 * @throws IOException 624 * @throws StructureException 625 */ 626 public Structure getStructureForDomain(String scopId) throws IOException, StructureException { 627 return getStructureForDomain(scopId, ScopFactory.getSCOP()); 628 } 629 630 /** 631 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 632 * 633 * @param scopId 634 * a SCOP Id 635 * @param scopDatabase 636 * A {@link ScopDatabase} to use 637 * @return a Structure object 638 * @throws IOException 639 * @throws StructureException 640 */ 641 public Structure getStructureForDomain(String scopId, ScopDatabase scopDatabase) throws IOException, 642 StructureException { 643 ScopDomain domain = scopDatabase.getDomainByScopID(scopId); 644 return getStructureForDomain(domain, scopDatabase); 645 } 646 647 /** 648 * set the location at which utility data should be cached. 649 * 650 * @param cachePath 651 */ 652 public void setCachePath(String cachePath) { 653 this.cachePath = cachePath; 654 } 655 656 public void setFileParsingParams(FileParsingParameters params) { 657 this.params = params; 658 } 659 660 /** 661 * <b>[Optional]</b> This method changes the behavior when obsolete entries 662 * are requested. Current behaviors are: 663 * <ul> 664 * <li>{@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION} 665 * Throw a {@link StructureException} (the default) 666 * <li>{@link ObsoleteBehavior#FETCH_OBSOLETE FETCH_OBSOLETE} 667 * Load the requested ID from the PDB's obsolete repository 668 * <li>{@link ObsoleteBehavior#FETCH_CURRENT FETCH_CURRENT} 669 * Load the most recent version of the requested structure 670 * </ul> 671 * 672 * <p>This setting may be silently ignored by implementations which do not have 673 * access to the server to determine whether an entry is obsolete, such as 674 * certain {@link FetchBehavior}s. Note that an obsolete entry may still be 675 * returned even this is FETCH_CURRENT if the entry is found locally. 676 * 677 * @param behavior Whether to fetch obsolete records 678 * @since 4.0.0 679 */ 680 public void setObsoleteBehavior(ObsoleteBehavior behavior) { 681 obsoleteBehavior = behavior; 682 } 683 684 /** 685 * Returns how this instance deals with obsolete entries. Note that this 686 * setting may be ignored by some implementations or in some situations, 687 * such as certain {@link FetchBehavior}s. 688 * 689 * <p>For most implementations, the default value is 690 * {@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION}. 691 * 692 * @return The ObsoleteBehavior 693 * @since 4.0.0 694 */ 695 public ObsoleteBehavior getObsoleteBehavior() { 696 return obsoleteBehavior; 697 } 698 699 /** 700 * Get the behavior for fetching files from the server 701 * @return 702 */ 703 public FetchBehavior getFetchBehavior() { 704 return fetchBehavior; 705 } 706 707 /** 708 * Set the behavior for fetching files from the server 709 * @param fetchBehavior 710 */ 711 public void setFetchBehavior(FetchBehavior fetchBehavior) { 712 this.fetchBehavior = fetchBehavior; 713 } 714 715 /** 716 * Set the path that is used to cache PDB files. 717 * 718 * @param path 719 * to a directory 720 */ 721 public void setPath(String path) { 722 this.path = FileDownloadUtils.expandUserHome(path); 723 } 724 725 /** 726 * Returns the currently active file type that will be parsed. 727 * @return a StructureFiletype 728 */ 729 public StructureFiletype getFiletype() { 730 return filetype; 731 } 732 733 /** 734 * Set the file type that will be parsed. 735 * @param filetype a StructureFiletype 736 */ 737 public void setFiletype(StructureFiletype filetype) { 738 this.filetype = filetype; 739 } 740 741 private boolean checkLoading(PdbId pdbId) { 742 return currentlyLoading.contains(pdbId.getId()); 743 } 744 745 /** 746 * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the the {@link CathDatabase} 747 * at {@link CathFactory#getCathDatabase()}. 748 */ 749 public Structure getStructureForCathDomain(StructureName structureName) throws IOException, StructureException { 750 return getStructureForCathDomain(structureName, CathFactory.getCathDatabase()); 751 } 752 753 /** 754 * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the specified {@link CathDatabase}. 755 */ 756 public Structure getStructureForCathDomain(StructureName structureName, CathDatabase cathInstall) throws IOException, StructureException { 757 CathDomain cathDomain = cathInstall.getDomainByCathId(structureName.getIdentifier()); 758 759 Structure s = getStructureForPdbId(cathDomain.getIdentifier()); 760 Structure n = cathDomain.reduce(s); 761 762 // add the ligands of the chain... 763 Chain newChain = n.getPolyChainByPDB(structureName.getChainId()); 764 List<Chain> origChains = s.getNonPolyChainsByPDB(structureName.getChainId()); 765 for (Chain origChain : origChains) { 766 List<Group> ligands = origChain.getAtomGroups(); 767 768 for (Group g : ligands) { 769 if (!newChain.getAtomGroups().contains(g)) { 770 newChain.addGroup(g); 771 } 772 } 773 } 774 775 return n; 776 } 777 778 protected void flagLoading(PdbId pdbId) { 779 String id = pdbId.getId(); 780 if (!currentlyLoading.contains(id)) { 781 currentlyLoading.add(id); 782 } 783 } 784 785 protected void flagLoadingFinished(PdbId pdbId) { 786 currentlyLoading.remove(pdbId.getId()); 787 } 788 789 /** 790 * Loads a structure directly by PDB ID 791 * @param id 792 * @return 793 * @throws IOException 794 * @throws StructureException 795 */ 796 public Structure getStructureForPdbId(String id) throws IOException, StructureException { 797 if (id == null) 798 return null; 799 return getStructureForPdbId(new PdbId(id)); 800 } 801 /** 802 * Loads a structure directly by PDB ID 803 * @param pdbId 804 * @return 805 * @throws IOException 806 */ 807 public Structure getStructureForPdbId(PdbId pdbId) throws IOException { 808 if (pdbId == null) 809 return null; 810 811 while (checkLoading(pdbId)) { 812 // waiting for loading to be finished... 813 try { 814 Thread.sleep(100); 815 } catch (InterruptedException e) { 816 logger.error(e.getMessage()); 817 } 818 } 819 820 switch (filetype) { 821 case CIF: 822 logger.debug("loading from mmcif"); 823 return loadStructureFromCifByPdbId(pdbId); 824 case BCIF: 825 logger.debug("loading from bcif"); 826 return loadStructureFromBcifByPdbId(pdbId); 827 case PDB: default: 828 logger.debug("loading from pdb"); 829 return loadStructureFromPdbByPdbId(pdbId); 830 } 831 } 832 833 protected Structure loadStructureFromCifByPdbId(String pdbId) throws IOException { 834 return loadStructureFromCifByPdbId(new PdbId(pdbId)); 835 } 836 837 protected Structure loadStructureFromCifByPdbId(PdbId pdbId) throws IOException { 838 logger.debug("Loading structure {} from mmCIF file {}.", pdbId, path); 839 Structure s; 840 flagLoading(pdbId); 841 try { 842 CifFileReader reader = new CifFileReader(path); 843 reader.setFetchBehavior(fetchBehavior); 844 reader.setObsoleteBehavior(obsoleteBehavior); 845 reader.setFileParsingParameters(params); 846 s = reader.getStructureById(pdbId); 847 } finally { 848 flagLoadingFinished(pdbId); 849 } 850 851 return s; 852 } 853 854 protected Structure loadStructureFromBcifByPdbId(String pdbId) throws IOException { 855 return loadStructureFromBcifByPdbId(new PdbId(pdbId)); 856 } 857 protected Structure loadStructureFromBcifByPdbId(PdbId pdbId) throws IOException { 858 logger.debug("Loading structure {} from BinaryCIF file {}.", pdbId, path); 859 Structure s; 860 flagLoading(pdbId); 861 try { 862 BcifFileReader reader = new BcifFileReader(path); 863 reader.setFetchBehavior(fetchBehavior); 864 reader.setObsoleteBehavior(obsoleteBehavior); 865 reader.setFileParsingParameters(params); 866 s = reader.getStructureById(pdbId); 867 } finally { 868 flagLoadingFinished(pdbId); 869 } 870 871 return s; 872 } 873 874 protected Structure loadStructureFromPdbByPdbId(String pdbId) throws IOException { 875 return loadStructureFromPdbByPdbId(new PdbId(pdbId)); 876 } 877 878 protected Structure loadStructureFromPdbByPdbId(PdbId pdbId) throws IOException { 879 logger.debug("Loading structure {} from PDB file {}.", pdbId, path); 880 Structure s; 881 flagLoading(pdbId); 882 try { 883 PDBFileReader reader = new PDBFileReader(path); 884 reader.setFetchBehavior(fetchBehavior); 885 reader.setObsoleteBehavior(obsoleteBehavior); 886 887 reader.setFileParsingParameters(params); 888 889 s = reader.getStructureById(pdbId); 890 } finally { 891 flagLoadingFinished(pdbId); 892 } 893 894 return s; 895 } 896}