001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.util; 022 023import java.io.IOException; 024import java.util.ArrayList; 025import java.util.Collection; 026import java.util.Collections; 027import java.util.List; 028import java.util.TreeSet; 029 030import org.biojava.nbio.core.util.InputStreamProvider; 031import org.biojava.nbio.structure.*; 032import org.biojava.nbio.structure.align.client.StructureName; 033import org.biojava.nbio.structure.cath.CathDatabase; 034import org.biojava.nbio.structure.cath.CathDomain; 035import org.biojava.nbio.structure.cath.CathFactory; 036import org.biojava.nbio.structure.io.BcifFileReader; 037import org.biojava.nbio.structure.io.CifFileReader; 038import org.biojava.nbio.structure.io.FileParsingParameters; 039import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior; 040import org.biojava.nbio.structure.io.LocalPDBDirectory.ObsoleteBehavior; 041import org.biojava.nbio.structure.io.MMTFFileReader; 042import org.biojava.nbio.structure.io.PDBFileReader; 043import org.biojava.nbio.core.util.FileDownloadUtils; 044import org.biojava.nbio.structure.io.StructureFiletype; 045import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder; 046import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 047import org.biojava.nbio.structure.scop.ScopDatabase; 048import org.biojava.nbio.structure.scop.ScopDescription; 049import org.biojava.nbio.structure.scop.ScopDomain; 050import org.biojava.nbio.structure.scop.ScopFactory; 051import org.slf4j.Logger; 052import org.slf4j.LoggerFactory; 053 054/** 055 * A utility class that provides easy access to Structure objects. If you are running a script that is frequently 056 * re-using the same PDB structures, the AtomCache keeps an in-memory cache of the files for quicker access. The cache 057 * is a soft-cache, this means it won't cause out of memory exceptions, but garbage collects the data if the Java 058 * virtual machine needs to free up space. The AtomCache is thread-safe. 059 * 060 * @author Andreas Prlic 061 * @author Spencer Bliven 062 * @author Peter Rose 063 * @since 3.0 064 */ 065public class AtomCache { 066 private static final Logger logger = LoggerFactory.getLogger(AtomCache.class); 067 068 /** 069 * The default output bioassembly style: if true the bioassemblies are multimodel, 070 * if false the bioassemblies are flat with renamed chains for symmetry-partners. 071 */ 072 public static final boolean DEFAULT_BIOASSEMBLY_STYLE = false; 073 074 public static final String BIOL_ASSEMBLY_IDENTIFIER = "BIO:"; 075 public static final String CHAIN_NR_SYMBOL = ":"; 076 public static final String CHAIN_SPLIT_SYMBOL = "."; 077 public static final String UNDERSCORE = "_"; 078 079 private static final String FILE_SEPARATOR = System.getProperty("file.separator"); 080 081 protected FileParsingParameters params; 082 private FetchBehavior fetchBehavior; 083 private ObsoleteBehavior obsoleteBehavior; 084 private String cachePath; 085 086 // make sure IDs are loaded uniquely 087 private final Collection<String> currentlyLoading = Collections.synchronizedCollection(new TreeSet<>()); 088 089 private String path; 090 private StructureFiletype filetype = StructureFiletype.BCIF; 091 092 /** 093 * Default AtomCache constructor. 094 * 095 * Usually stores files in a temp directory, but this can be overriden by setting the PDB_DIR variable at runtime. 096 * 097 * @see UserConfiguration#UserConfiguration() 098 */ 099 public AtomCache() { 100 this(new UserConfiguration()); 101 } 102 103 /** 104 * Creates an instance of an AtomCache that is pointed to the a particular path in the file system. It will use the same value for pdbFilePath and cachePath. 105 * 106 * @param pdbFilePath 107 * a directory in the file system to use as a location to cache files. 108 */ 109 public AtomCache(String pdbFilePath) { 110 this(pdbFilePath,pdbFilePath); 111 } 112 113 /** 114 * Creates an instance of an AtomCache that is pointed to the a particular path in the file system. 115 * 116 * @param pdbFilePath 117 * a directory in the file system to use as a location to cache files. 118 * @param cachePath 119 */ 120 public AtomCache(String pdbFilePath, String cachePath) { 121 logger.debug("Initialising AtomCache with pdbFilePath={}, cachePath={}", pdbFilePath, cachePath); 122 if (!pdbFilePath.endsWith(FILE_SEPARATOR)) { 123 pdbFilePath += FILE_SEPARATOR; 124 } 125 126 // we are caching the binary files that contain the PDBs gzipped 127 // that is the most memory efficient way of caching... 128 // set the input stream provider to caching mode 129 System.setProperty(InputStreamProvider.CACHE_PROPERTY, "true"); 130 131 setPath(pdbFilePath); 132 133 this.cachePath = cachePath; 134 135 fetchBehavior = FetchBehavior.DEFAULT; 136 obsoleteBehavior = ObsoleteBehavior.DEFAULT; 137 138 currentlyLoading.clear(); 139 params = new FileParsingParameters(); 140 141 setFiletype(StructureFiletype.BCIF); 142 } 143 144 /** 145 * Creates a new AtomCache object based on the provided UserConfiguration. 146 * 147 * @param config 148 * the UserConfiguration to use for this cache. 149 */ 150 public AtomCache(UserConfiguration config) { 151 this(config.getPdbFilePath(), config.getCacheFilePath()); 152 fetchBehavior = config.getFetchBehavior(); 153 obsoleteBehavior = config.getObsoleteBehavior(); 154 filetype = config.getStructureFiletype(); 155 } 156 157 /** 158 * Returns the CA atoms for the provided name. See {@link #getStructure(String)} for supported naming conventions. 159 * <p> 160 * This method only works with protein chains. Use {@link #getRepresentativeAtoms(String)} 161 * for a more general solution. 162 * @param name 163 * @return an array of Atoms. 164 * @throws IOException 165 * @throws StructureException 166 * @see 167 */ 168 public Atom[] getAtoms(String name) throws IOException, StructureException { 169 return getAtoms(new StructureName(name)); 170 } 171 172 public Atom[] getAtoms(StructureIdentifier name) throws IOException, StructureException { 173 Atom[] atoms; 174 175 // System.out.println("loading " + name); 176 Structure s = getStructure(name); 177 atoms = StructureTools.getAtomCAArray(s); 178 179 /* 180 * synchronized (cache){ cache.put(name, atoms); } 181 */ 182 return atoms; 183 } 184 185 /** 186 * Returns the representative atoms for the provided name. 187 * See {@link #getStructure(String)} for supported naming conventions. 188 * 189 * @param name 190 * @return an array of Atoms. 191 * @throws IOException 192 * @throws StructureException 193 * @see 194 */ 195 public Atom[] getRepresentativeAtoms(String name) throws IOException, StructureException { 196 return getRepresentativeAtoms(new StructureName(name)); 197 } 198 199 public Atom[] getRepresentativeAtoms(StructureIdentifier name) throws IOException, StructureException { 200 Atom[] atoms; 201 202 Structure s = getStructure(name); 203 atoms = StructureTools.getRepresentativeAtomArray(s); 204 205 /* 206 * synchronized (cache){ cache.put(name, atoms); } 207 */ 208 return atoms; 209 } 210 211 /** 212 * Returns the biological assembly for a given PDB ID and bioAssemblyId, by building the 213 * assembly from the biounit annotations found in {@link Structure#getPDBHeader()} 214 * <p> 215 * Note, the number of available biological unit files 216 * varies. Many entries don't have a biological assembly specified (e.g. NMR structures), many entries have only one 217 * biological assembly (bioAssemblyId=1), and some structures have multiple biological assemblies. 218 * 219 * @param pdbId 220 * the PDB ID 221 * @param bioAssemblyId 222 * the 1-based index of the biological assembly (0 gets the asymmetric unit) 223 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 224 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 225 * @return a structure object 226 * @throws IOException 227 * @throws StructureException if biassemblyId < 0 or other problems while loading structure 228 * @since 3.2 229 */ 230 public Structure getBiologicalAssembly(String pdbId, int bioAssemblyId, boolean multiModel) 231 throws StructureException, IOException { 232 return getBiologicalAssembly(new PdbId(pdbId), bioAssemblyId, multiModel); 233 } 234 235 /** 236 * Returns the biological assembly for a given PDB ID and bioAssemblyId, by building the 237 * assembly from the biounit annotations found in {@link Structure#getPDBHeader()} 238 * <p> 239 * Note, the number of available biological unit files 240 * varies. Many entries don't have a biological assembly specified (e.g. NMR structures), many entries have only one 241 * biological assembly (bioAssemblyId=1), and some structures have multiple biological assemblies. 242 * 243 * @param pdbId 244 * the PDB ID 245 * @param bioAssemblyId 246 * the 1-based index of the biological assembly (0 gets the asymmetric unit) 247 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 248 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 249 * @return a structure object 250 * @throws IOException 251 * @throws StructureException if biassemblyId < 0 or other problems while loading structure 252 * @since 6.0.0 253 */ 254 public Structure getBiologicalAssembly(PdbId pdbId, int bioAssemblyId, boolean multiModel) 255 throws StructureException, IOException { 256 if (bioAssemblyId < 0) { 257 throw new StructureException("bioAssemblyID must be nonnegative: " + pdbId + " bioAssemblyId " 258 + bioAssemblyId); 259 } 260 261 boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly(); 262 263 if (!getFileParsingParams().isParseBioAssembly()) { 264 getFileParsingParams().setParseBioAssembly(true); 265 } 266 267 Structure asymUnit = getStructureForPdbId(pdbId); 268 269 getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly); 270 271 if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies() == null) { 272 logger.info("No bioassembly information found for {}, returning asymmetric unit as biological assembly", pdbId); 273 return asymUnit; 274 } 275 276 // 0 ... asym unit 277 if (bioAssemblyId == 0) { 278 logger.info("Requested biological assembly 0 for PDB id {}, returning asymmetric unit", pdbId); 279 return asymUnit; 280 } 281 // does it exist? 282 if (!asymUnit.getPDBHeader().getBioAssemblies().containsKey(bioAssemblyId)) { 283 throw new StructureException("No biological assembly available for biological assembly id " + bioAssemblyId + " of " + pdbId); 284 } 285 286 List<BiologicalAssemblyTransformation> transformations = 287 asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms(); 288 289 290 if (transformations == null || transformations.size() == 0) { 291 throw new StructureException("Could not load transformations to recreate biological assembly id " + bioAssemblyId + " of " + pdbId); 292 } 293 294 BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); 295 296 // if we use mmcif or mmtf, then we need to pass useAsymIds=true 297 boolean useAsymIds = false; 298 if (filetype == StructureFiletype.CIF || filetype == StructureFiletype.BCIF || filetype == StructureFiletype.MMTF) { 299 useAsymIds = true; 300 } 301 return builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel); 302 } 303 304 /** 305 * Returns the default biological unit (bioassemblyId=1, known in PDB as pdb1.gz). If it is not available, 306 * the asymmetric unit will be returned, e.g. for NMR structures. 307 * 308 * <p>Biological assemblies can also be accessed using 309 * <tt>getStructure("BIO:<i>[pdbId]</i>")</tt> 310 * @param pdbId the PDB id 311 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 312 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 313 * @return a structure object 314 * @throws IOException 315 * @throws StructureException 316 * @since 4.2 317 */ 318 public Structure getBiologicalAssembly(String pdbId, boolean multiModel) throws StructureException, IOException { 319 boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly(); 320 321 if (!getFileParsingParams().isParseBioAssembly()) { 322 getFileParsingParams().setParseBioAssembly(true); 323 } 324 325 Structure asymUnit = getStructureForPdbId(pdbId); 326 getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly); 327 328 329 if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies() == null) { 330 logger.info("No bioassembly information found for {}, returning asymmetric unit as biological assembly", pdbId); 331 return asymUnit; 332 } 333 334 int bioAssemblyId = 1; 335 336 // does it exist? 337 if (!asymUnit.getPDBHeader().getBioAssemblies().containsKey(bioAssemblyId)) { 338 return asymUnit; 339 } 340 341 List<BiologicalAssemblyTransformation> transformations = 342 asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms(); 343 344 345 if (transformations == null || transformations.size() == 0) { 346 throw new StructureException("Could not load transformations to recreate biological assembly id " + bioAssemblyId + " of " + pdbId); 347 } 348 349 BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); 350 351 // if we use mmcif or mmtf, then we need to pass useAsymIds=true 352 boolean useAsymIds = false; 353 if (filetype == StructureFiletype.CIF || filetype == StructureFiletype.BCIF || filetype == StructureFiletype.MMTF) { 354 useAsymIds = true; 355 } 356 return builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel); 357 } 358 359 /** 360 * Returns all biological assemblies for given PDB id. 361 * @param pdbId 362 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 363 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 364 * @return 365 * @throws StructureException 366 * @throws IOException 367 * @since 5.0 368 */ 369 public List<Structure> getBiologicalAssemblies(String pdbId, boolean multiModel) throws StructureException, IOException { 370 List<Structure> assemblies = new ArrayList<>(); 371 372 boolean prevIsParseBioAssembly = getFileParsingParams().isParseBioAssembly(); 373 374 if (!getFileParsingParams().isParseBioAssembly()) { 375 getFileParsingParams().setParseBioAssembly(true); 376 } 377 378 Structure asymUnit = getStructureForPdbId(pdbId); 379 getFileParsingParams().setParseBioAssembly(prevIsParseBioAssembly); 380 381 if (asymUnit.getPDBHeader() == null || asymUnit.getPDBHeader().getBioAssemblies() == null) { 382 logger.info("No bioassembly information found for {}, returning asymmetric unit as the only biological assembly", pdbId); 383 assemblies.add(asymUnit); 384 return assemblies; 385 } 386 387 for (int bioAssemblyId : asymUnit.getPDBHeader().getBioAssemblies().keySet()) { 388 List<BiologicalAssemblyTransformation> transformations = 389 asymUnit.getPDBHeader().getBioAssemblies().get(bioAssemblyId).getTransforms(); 390 391 if (transformations == null || transformations.size() == 0) { 392 logger.info("Could not load transformations to recreate biological assembly id {} of {}. Assembly " + 393 "id will be missing in biological assemblies.", bioAssemblyId, pdbId); 394 continue; 395 } 396 397 BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); 398 399 // if we use mmcif or mmtf, then we need to pass useAsymIds=true 400 boolean useAsymIds = false; 401 if (filetype == StructureFiletype.CIF || filetype == StructureFiletype.BCIF || filetype == StructureFiletype.MMTF) { 402 useAsymIds = true; 403 } 404 Structure s = builder.rebuildQuaternaryStructure(asymUnit, transformations, useAsymIds, multiModel); 405 assemblies.add(s); 406 } 407 return assemblies; 408 } 409 410 /** 411 * Returns the path that contains the caching file for utility data, such as domain definitions. 412 * 413 * @return 414 */ 415 public String getCachePath() { 416 return cachePath; 417 } 418 419 public FileParsingParameters getFileParsingParams() { 420 return params; 421 } 422 423 /** 424 * Get the path that is used to cache PDB files. 425 * 426 * @return path to a directory 427 */ 428 public String getPath() { 429 return path; 430 } 431 432 /** 433 * Request a Structure based on a <i>name</i>. 434 * 435 * <pre> 436 * Formal specification for how to specify the <i>name</i>: 437 * 438 * name := pdbID 439 * | pdbID '.' chainID 440 * | pdbID '.' range 441 * | scopID 442 * range := '('? range (',' range)? ')'? 443 * | chainID 444 * | chainID '_' resNum '-' resNum 445 * pdbID := [1-9][a-zA-Z0-9]{3} 446 * | PDB_[a-zA-Z0-9]{8} 447 * chainID := [a-zA-Z0-9] 448 * scopID := 'd' pdbID [a-z_][0-9_] 449 * resNum := [-+]?[0-9]+[A-Za-z]? 450 * 451 * 452 * Example structures: 453 * 1TIM #whole structure 454 * 4HHB.C #single chain 455 * 4GCR.A_1-83 #one domain, by residue number 456 * 3AA0.A,B #two chains treated as one structure 457 * PDB_00001TIM #whole structure (extended format) 458 * PDB_00004HHB.C #single chain (extended format) 459 * PDB_00004GCR.A_1-83 #one domain, by residue number (extended format) 460 * PDB_00003AA0.A,B #two chains treated as one structure (extended format) 461 * d2bq6a1 #scop domain 462 * </pre> 463 * 464 * With the additional set of rules: 465 * 466 * <ul> 467 * <li>If only a PDB code is provided, the whole structure will be return including ligands, but the first model 468 * only (for NMR). 469 * <li>Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A</li> 470 * <li>To specify a SCOP domain write a scopId e.g. d2bq6a1. Some flexibility can be allowed in SCOP domain names, 471 * see {@link #setStrictSCOP(boolean)}</li> 472 * <li>URLs are accepted as well</li> 473 * </ul> 474 * 475 * <p>Note that this method should not be used in StructureIdentifier 476 * implementations to avoid circular calls. 477 * @param name 478 * @return a Structure object, or null if name appears improperly formated (eg too short, etc) 479 * @throws IOException 480 * The PDB file cannot be cached due to IO errors 481 * @throws StructureException 482 * The name appeared valid but did not correspond to a structure. Also thrown by some submethods upon 483 * errors, eg for poorly formatted subranges. 484 */ 485 public Structure getStructure(String name) throws IOException, StructureException { 486 StructureName structureName = new StructureName(name); 487 return getStructure(structureName); 488 } 489 490 /** 491 * Get the structure corresponding to the given {@link StructureIdentifier}. 492 * Equivalent to calling {@link StructureIdentifier#loadStructure(AtomCache)} 493 * followed by {@link StructureIdentifier#reduce(Structure)}. 494 * 495 * <p>Note that this method should not be used in StructureIdentifier 496 * implementations to avoid circular calls. 497 * @param strucId 498 * @return 499 * @throws IOException 500 * @throws StructureException 501 */ 502 public Structure getStructure(StructureIdentifier strucId) throws IOException, StructureException { 503 Structure s = strucId.loadStructure(this); 504 Structure r = strucId.reduce(s); 505 r.setStructureIdentifier(strucId); 506 return r; 507 } 508 509 /** 510 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 511 * 512 * @param domain 513 * a SCOP domain 514 * @return a Structure object 515 * @throws IOException 516 * @throws StructureException 517 */ 518 public Structure getStructureForDomain(ScopDomain domain) throws IOException, StructureException { 519 return getStructureForDomain(domain, ScopFactory.getSCOP()); 520 } 521 522 /** 523 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 524 * 525 * @param domain 526 * a SCOP domain 527 * @param scopDatabase 528 * A {@link ScopDatabase} to use 529 * @return a Structure object 530 * @throws IOException 531 * @throws StructureException 532 */ 533 public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase) throws IOException, 534 StructureException { 535 return getStructureForDomain(domain, scopDatabase, false); 536 } 537 538 /** 539 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 540 * 541 * @param domain 542 * a SCOP domain 543 * @param scopDatabase 544 * A {@link ScopDatabase} to use 545 * @param strictLigandHandling 546 * If set to false, hetero-atoms are included if and only if they belong to a chain to which the SCOP 547 * domain belongs; if set to true, hetero-atoms are included if and only if they are strictly within the 548 * definition (residue numbers) of the SCOP domain 549 * @return a Structure object 550 * @throws IOException 551 * @throws StructureException 552 */ 553 public Structure getStructureForDomain(ScopDomain domain, ScopDatabase scopDatabase, boolean strictLigandHandling) 554 throws IOException, StructureException { 555 PdbId pdbId = domain.getPdbId(); 556 Structure fullStructure = getStructureForPdbId(pdbId); 557 Structure structure = domain.reduce(fullStructure); 558 559 // TODO It would be better to move all of this into the reduce method, 560 // but that would require ligand handling properties in StructureIdentifiers 561 562 // because ligands sometimes occur after TER records in PDB files, we may need to add some ligands back in 563 // specifically, we add a ligand if and only if it occurs within the domain 564 AtomPositionMap map = null; 565 List<ResidueRangeAndLength> rrs = null; 566 if (strictLigandHandling) { 567 map = new AtomPositionMap(StructureTools.getAllAtomArray(fullStructure), AtomPositionMap.ANYTHING_MATCHER); 568 rrs = ResidueRangeAndLength.parseMultiple(domain.getRanges(), map); 569 } 570 for (Chain chain : fullStructure.getNonPolyChains()) { 571 if (!structure.hasPdbChain(chain.getName())) { 572 continue; // we can't do anything with a chain our domain 573 } 574 575 Chain newChain; 576 if (!structure.hasNonPolyChain(chain.getId())) { 577 newChain = new ChainImpl(); 578 newChain.setId(chain.getId()); 579 newChain.setName(chain.getName()); 580 newChain.setEntityInfo(chain.getEntityInfo()); 581 structure.addChain(newChain); 582 } else { 583 newChain = structure.getNonPolyChain(chain.getId()); 584 } 585 586 List<Group> ligands = StructureTools.filterLigands(chain.getAtomGroups()); 587 for (Group group : ligands) { 588 boolean shouldContain = true; 589 if (strictLigandHandling) { 590 shouldContain = false; // whether the ligand occurs within the domain 591 for (ResidueRange rr : rrs) { 592 if (rr.contains(group.getResidueNumber(), map)) { 593 shouldContain = true; 594 } 595 } 596 } 597 boolean alreadyContains = newChain.getAtomGroups().contains(group); // we don't want to add duplicate 598 // ligands 599 if (shouldContain && !alreadyContains) { 600 newChain.addGroup(group); 601 } 602 } 603 } 604 605 // build a more meaningful description for the new structure 606 StringBuilder header = new StringBuilder(); 607 header.append(domain.getClassificationId()); 608 if (scopDatabase != null) { 609 int sf = domain.getSuperfamilyId(); 610 ScopDescription description = scopDatabase.getScopDescriptionBySunid(sf); 611 if (description != null) { 612 header.append(" | "); 613 header.append(description.getDescription()); 614 } 615 } 616 structure.getPDBHeader().setDescription(header.toString()); 617 618 return structure; 619 } 620 621 /** 622 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 623 * 624 * @param scopId 625 * a SCOP Id 626 * @return a Structure object 627 * @throws IOException 628 * @throws StructureException 629 */ 630 public Structure getStructureForDomain(String scopId) throws IOException, StructureException { 631 return getStructureForDomain(scopId, ScopFactory.getSCOP()); 632 } 633 634 /** 635 * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. 636 * 637 * @param scopId 638 * a SCOP Id 639 * @param scopDatabase 640 * A {@link ScopDatabase} to use 641 * @return a Structure object 642 * @throws IOException 643 * @throws StructureException 644 */ 645 public Structure getStructureForDomain(String scopId, ScopDatabase scopDatabase) throws IOException, 646 StructureException { 647 ScopDomain domain = scopDatabase.getDomainByScopID(scopId); 648 return getStructureForDomain(domain, scopDatabase); 649 } 650 651 /** 652 * set the location at which utility data should be cached. 653 * 654 * @param cachePath 655 */ 656 public void setCachePath(String cachePath) { 657 this.cachePath = cachePath; 658 } 659 660 public void setFileParsingParams(FileParsingParameters params) { 661 this.params = params; 662 } 663 664 /** 665 * <b>[Optional]</b> This method changes the behavior when obsolete entries 666 * are requested. Current behaviors are: 667 * <ul> 668 * <li>{@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION} 669 * Throw a {@link StructureException} (the default) 670 * <li>{@link ObsoleteBehavior#FETCH_OBSOLETE FETCH_OBSOLETE} 671 * Load the requested ID from the PDB's obsolete repository 672 * <li>{@link ObsoleteBehavior#FETCH_CURRENT FETCH_CURRENT} 673 * Load the most recent version of the requested structure 674 * 675 * <p>This setting may be silently ignored by implementations which do not have 676 * access to the server to determine whether an entry is obsolete, such as 677 * if {@link #isAutoFetch()} is false. Note that an obsolete entry may still be 678 * returned even this is FETCH_CURRENT if the entry is found locally. 679 * 680 * @param fetchFileEvenIfObsolete Whether to fetch obsolete records 681 * @see #setFetchCurrent(boolean) 682 * @since 4.0.0 683 */ 684 public void setObsoleteBehavior(ObsoleteBehavior behavior) { 685 obsoleteBehavior = behavior; 686 } 687 688 /** 689 * Returns how this instance deals with obsolete entries. Note that this 690 * setting may be ignored by some implementations or in some situations, 691 * such as when {@link #isAutoFetch()} is false. 692 * 693 * <p>For most implementations, the default value is 694 * {@link ObsoleteBehavior#THROW_EXCEPTION THROW_EXCEPTION}. 695 * 696 * @return The ObsoleteBehavior 697 * @since 4.0.0 698 */ 699 public ObsoleteBehavior getObsoleteBehavior() { 700 return obsoleteBehavior; 701 } 702 703 /** 704 * Get the behavior for fetching files from the server 705 * @return 706 */ 707 public FetchBehavior getFetchBehavior() { 708 return fetchBehavior; 709 } 710 711 /** 712 * Set the behavior for fetching files from the server 713 * @param fetchBehavior 714 */ 715 public void setFetchBehavior(FetchBehavior fetchBehavior) { 716 this.fetchBehavior = fetchBehavior; 717 } 718 719 /** 720 * Set the path that is used to cache PDB files. 721 * 722 * @param path 723 * to a directory 724 */ 725 public void setPath(String path) { 726 this.path = FileDownloadUtils.expandUserHome(path); 727 } 728 729 /** 730 * Returns the currently active file type that will be parsed. 731 * @return a StructureFiletype 732 */ 733 public StructureFiletype getFiletype() { 734 return filetype; 735 } 736 737 /** 738 * Set the file type that will be parsed. 739 * @param filetype a StructureFiletype 740 */ 741 public void setFiletype(StructureFiletype filetype) { 742 this.filetype = filetype; 743 } 744 745 private boolean checkLoading(PdbId pdbId) { 746 return currentlyLoading.contains(pdbId.getId()); 747 } 748 749 /** 750 * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the the {@link CathDatabase} 751 * at {@link CathFactory#getCathDatabase()}. 752 */ 753 public Structure getStructureForCathDomain(StructureName structureName) throws IOException, StructureException { 754 return getStructureForCathDomain(structureName, CathFactory.getCathDatabase()); 755 } 756 757 /** 758 * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code structureName}, using the specified {@link CathDatabase}. 759 */ 760 public Structure getStructureForCathDomain(StructureName structureName, CathDatabase cathInstall) throws IOException, StructureException { 761 CathDomain cathDomain = cathInstall.getDomainByCathId(structureName.getIdentifier()); 762 763 Structure s = getStructureForPdbId(cathDomain.getIdentifier()); 764 Structure n = cathDomain.reduce(s); 765 766 // add the ligands of the chain... 767 Chain newChain = n.getPolyChainByPDB(structureName.getChainId()); 768 List<Chain> origChains = s.getNonPolyChainsByPDB(structureName.getChainId()); 769 for (Chain origChain : origChains) { 770 List<Group> ligands = origChain.getAtomGroups(); 771 772 for (Group g : ligands) { 773 if (!newChain.getAtomGroups().contains(g)) { 774 newChain.addGroup(g); 775 } 776 } 777 } 778 779 return n; 780 } 781 782 protected void flagLoading(PdbId pdbId) { 783 String id = pdbId.getId(); 784 if (!currentlyLoading.contains(id)) { 785 currentlyLoading.add(id); 786 } 787 } 788 789 protected void flagLoadingFinished(PdbId pdbId) { 790 currentlyLoading.remove(pdbId.getId()); 791 } 792 793 /** 794 * Loads a structure directly by PDB ID 795 * @param pdbId 796 * @return 797 * @throws IOException 798 * @throws StructureException 799 */ 800 public Structure getStructureForPdbId(String id) throws IOException, StructureException { 801 if (id == null) 802 return null; 803 return getStructureForPdbId(new PdbId(id)); 804 } 805 /** 806 * Loads a structure directly by PDB ID 807 * @param pdbId 808 * @return 809 * @throws IOException 810 * @throws StructureException 811 */ 812 public Structure getStructureForPdbId(PdbId pdbId) throws IOException { 813 if (pdbId == null) 814 return null; 815 816 while (checkLoading(pdbId)) { 817 // waiting for loading to be finished... 818 try { 819 Thread.sleep(100); 820 } catch (InterruptedException e) { 821 logger.error(e.getMessage()); 822 } 823 } 824 825 switch (filetype) { 826 case CIF: 827 logger.debug("loading from mmcif"); 828 return loadStructureFromCifByPdbId(pdbId); 829 case BCIF: 830 logger.debug("loading from bcif"); 831 return loadStructureFromBcifByPdbId(pdbId); 832 case MMTF: 833 logger.debug("loading from mmtf"); 834 return loadStructureFromMmtfByPdbId(pdbId); 835 case PDB: default: 836 logger.debug("loading from pdb"); 837 return loadStructureFromPdbByPdbId(pdbId); 838 } 839 } 840 841 842 protected Structure loadStructureFromMmtfByPdbId(String pdbId) throws IOException { 843 return loadStructureFromMmtfByPdbId(new PdbId(pdbId)); 844 } 845 846 /** 847 * Load a {@link Structure} from MMTF either from the local file system. 848 * @param pdbId the input PDB id 849 * @return the {@link Structure} object of the parsed structure 850 * @throws IOException error reading from Web or file system 851 */ 852 protected Structure loadStructureFromMmtfByPdbId(PdbId pdbId) throws IOException { 853 logger.debug("Loading structure {} from mmtf file.", pdbId); 854 MMTFFileReader reader = new MMTFFileReader(); 855 reader.setFetchBehavior(fetchBehavior); 856 reader.setObsoleteBehavior(obsoleteBehavior); 857 return reader.getStructureById(pdbId); 858 } 859 860 protected Structure loadStructureFromCifByPdbId(String pdbId) throws IOException { 861 return loadStructureFromCifByPdbId(new PdbId(pdbId)); 862 } 863 864 protected Structure loadStructureFromCifByPdbId(PdbId pdbId) throws IOException { 865 logger.debug("Loading structure {} from mmCIF file {}.", pdbId, path); 866 Structure s; 867 flagLoading(pdbId); 868 try { 869 CifFileReader reader = new CifFileReader(path); 870 reader.setFetchBehavior(fetchBehavior); 871 reader.setObsoleteBehavior(obsoleteBehavior); 872 reader.setFileParsingParameters(params); 873 s = reader.getStructureById(pdbId); 874 } finally { 875 flagLoadingFinished(pdbId); 876 } 877 878 return s; 879 } 880 881 protected Structure loadStructureFromBcifByPdbId(String pdbId) throws IOException { 882 return loadStructureFromBcifByPdbId(new PdbId(pdbId)); 883 } 884 protected Structure loadStructureFromBcifByPdbId(PdbId pdbId) throws IOException { 885 logger.debug("Loading structure {} from BinaryCIF file {}.", pdbId, path); 886 Structure s; 887 flagLoading(pdbId); 888 try { 889 BcifFileReader reader = new BcifFileReader(path); 890 reader.setFetchBehavior(fetchBehavior); 891 reader.setObsoleteBehavior(obsoleteBehavior); 892 reader.setFileParsingParameters(params); 893 s = reader.getStructureById(pdbId); 894 } finally { 895 flagLoadingFinished(pdbId); 896 } 897 898 return s; 899 } 900 901 protected Structure loadStructureFromPdbByPdbId(String pdbId) throws IOException { 902 return loadStructureFromPdbByPdbId(new PdbId(pdbId)); 903 } 904 905 protected Structure loadStructureFromPdbByPdbId(PdbId pdbId) throws IOException { 906 logger.debug("Loading structure {} from PDB file {}.", pdbId, path); 907 Structure s; 908 flagLoading(pdbId); 909 try { 910 PDBFileReader reader = new PDBFileReader(path); 911 reader.setFetchBehavior(fetchBehavior); 912 reader.setObsoleteBehavior(obsoleteBehavior); 913 914 reader.setFileParsingParameters(params); 915 916 s = reader.getStructureById(pdbId); 917 } finally { 918 flagLoadingFinished(pdbId); 919 } 920 921 return s; 922 } 923}