001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure; 022 023import java.io.IOException; 024import java.util.Collections; 025import java.util.List; 026 027import org.biojava.nbio.structure.align.util.AtomCache; 028import org.biojava.nbio.structure.io.MMCIFFileReader; 029import org.biojava.nbio.structure.io.PDBFileReader; 030import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder; 031import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 032import org.biojava.nbio.structure.quaternary.io.BioUnitDataProvider; 033import org.biojava.nbio.structure.quaternary.io.BioUnitDataProviderFactory; 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036 037/** 038 * A class that provides static access methods for easy lookup of protein structure related components 039 * 040 * @author Andreas Prlic 041 * 042 * @since 3.0.5 043 */ 044public class StructureIO { 045 046 private static final Logger logger = LoggerFactory.getLogger(StructureIO.class); 047 048 private static AtomCache cache ; 049 050 051 /** Loads a structure based on a name. Supported naming conventions are: 052 * 053 * * <pre> 054 Formal specification for how to specify the <i>name</i>: 055 056 name := pdbID 057 | pdbID '.' chainID 058 | pdbID '.' range 059 | scopID 060 | biol 061 | pdp 062 range := '('? range (',' range)? ')'? 063 | chainID 064 | chainID '_' resNum '-' resNum 065 pdbID := [0-9][a-zA-Z0-9]{3} 066 chainID := [a-zA-Z0-9] 067 scopID := 'd' pdbID [a-z_][0-9_] 068 biol := 'BIOL:' pdbID [:]? [0-9]+ 069 pdp := 'PDP:' pdbID[A-Za-z0-9_]+ 070 resNum := [-+]?[0-9]+[A-Za-z]? 071 072 073 Example structures: 074 1TIM #whole structure - asym unit 075 4HHB.C #single chain 076 4GCR.A_1-83 #one domain, by residue number 077 3AA0.A,B #two chains treated as one structure 078 d2bq6a1 #scop domain 079 BIOL:1fah #biological assembly nr 1 for 1fah 080 BIOL:1fah:0 #asym unit for 1fah 081 BIOL:1fah:1 #biological assembly nr 1 for 1fah 082 BIOL:1fah:2 #biological assembly nr 2 for 1fah 083 084 </pre> 085 * 086 * With the additional set of rules: 087 * 088 * <ul> 089 * <li>If only a PDB code is provided, the whole structure will be return including ligands, but the first model only (for NMR). 090 * <li>Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A </li> 091 * <li>To specify a SCOP domain write a scopId e.g. d2bq6a1. Some flexibility can be allowed in SCOP domain names, see {@link #setStrictSCOP(boolean)}</li> 092 * <li>URLs are accepted as well</li> 093 * </ul> 094 * 095 * @param name 096 * @return a Structure object, or null if name appears improperly formated (eg too short, etc) 097 * @throws IOException The PDB file cannot be cached due to IO errors 098 * @throws StructureException The name appeared valid but did not correspond to a structure. 099 * Also thrown by some submethods upon errors, eg for poorly formatted subranges. 100 */ 101 public static Structure getStructure(String name) throws IOException, StructureException{ 102 103 checkInitAtomCache(); 104 105 // delegate this functionality to AtomCache... 106 107 return cache.getStructure(name); 108 109 } 110 111 112 private static void checkInitAtomCache() { 113 if ( cache == null){ 114 cache = new AtomCache(); 115 } 116 117 } 118 119 public static void setAtomCache(AtomCache c){ 120 cache = c; 121 } 122 123 /** 124 * Returns the first biologicalAssembly that is available for a protein structure. For more documentation on quaternary structures see: 125 * {@link http://www.pdb.org/pdb/101/static101.do?p=education_discussion/Looking-at-Structures/bioassembly_tutorial.html} 126 * 127 * 128 * @param pdbId 129 * @return a Structure object or null if that assembly is not available 130 * @throws StructureException 131 * @throws IOException 132 */ 133 public static Structure getBiologicalAssembly(String pdbId) throws IOException, StructureException{ 134 135 return getBiologicalAssembly(pdbId,1); 136 } 137 138 /** 139 * By default the getStructure method loads asym units. This access method allows to recreate the quaternary structure for a protein if it is available. 140 * 141 * @param pdbId 142 * @param biolAssemblyNr - the ith biological assembly that is available for a PDB ID (we start counting at 1, 0 represents the asym unit). 143 * @return a Structure object or null if that assembly is not available 144 * @throws StructureException if there is no bioassembly available for given biolAssemblyNr or some other problems encountered while loading it 145 * @throws IOException 146 */ 147 public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr) throws IOException, StructureException { 148 checkInitAtomCache(); 149 return getBiologicalAssembly(pdbId,biolAssemblyNr,StructureIO.cache); 150 } 151 public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr, AtomCache cache) throws IOException, StructureException { 152 153 BioUnitDataProvider provider = null; 154 try { 155 provider = BioUnitDataProviderFactory.getBioUnitDataProvider(); 156 provider.setAtomCache(cache); 157 Structure bio = getBiologicalAssembly(pdbId, biolAssemblyNr,cache,BioUnitDataProviderFactory.getBioUnitDataProvider()); 158 return bio; 159 } finally { 160 if(provider != null) { 161 //cleanup to avoid memory leaks 162 provider.setAsymUnit(null); 163 provider.setAtomCache(null); 164 } 165 } 166 } 167 public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr, AtomCache cache, BioUnitDataProvider provider) throws IOException, StructureException { 168 169 pdbId = pdbId.toLowerCase(); 170 171 172 173 Structure asymUnit = provider.getAsymUnit(pdbId); 174 175 // 0 ... asym unit 176 if ( biolAssemblyNr == 0) { 177 logger.info("Requested biological assembly 0 for PDB id "+pdbId+", returning asymmetric unit"); 178 return asymUnit; 179 } 180 // does it exist? 181 if (!asymUnit.getPDBHeader().getBioAssemblies().containsKey(biolAssemblyNr)) { 182 throw new StructureException("No biological assembly available for biological assembly nr " + biolAssemblyNr + " of " + pdbId); 183 } 184 185 List<BiologicalAssemblyTransformation> transformations = 186 asymUnit.getPDBHeader().getBioAssemblies().get(biolAssemblyNr).getTransforms(); 187 188 189 if ( transformations == null || transformations.size() == 0){ 190 191 throw new StructureException("Could not load transformations to recreate biological assembly nr " + biolAssemblyNr + " of " + pdbId); 192 } 193 BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); 194 195 return builder.rebuildQuaternaryStructure(asymUnit, transformations); 196 } 197 198 /** 199 * Does the provider PDB ID have a biological assembly? 200 * 201 * @param pdbId 202 * @return flag if one or more biological assemblies are available 203 */ 204 public static boolean hasBiologicalAssembly(String pdbId){ 205 206 pdbId = pdbId.toLowerCase(); 207 208 BioUnitDataProvider provider = BioUnitDataProviderFactory.getBioUnitDataProvider(); 209 checkInitAtomCache(); 210 provider.setAtomCache(cache); 211 return provider.hasBiolAssembly(pdbId); 212 213 } 214 215 public static int getNrBiologicalAssemblies(String pdbId){ 216 217 pdbId = pdbId.toLowerCase(); 218 219 BioUnitDataProvider provider = BioUnitDataProviderFactory.getBioUnitDataProvider(); 220 checkInitAtomCache(); 221 provider.setAtomCache(cache); 222 return provider.getNrBiolAssemblies(pdbId); 223 } 224 225 private static final String FILE_SEPARATOR = System.getProperty("file.separator"); 226 227 /** 228 * Utility method to set the location where PDB files can be found 229 * 230 * @param pathToPDBFiles 231 */ 232 public static void setPdbPath(String pathToPDBFiles){ 233 234 if ( ! pathToPDBFiles.endsWith(FILE_SEPARATOR)) 235 pathToPDBFiles += FILE_SEPARATOR; 236 } 237 238 239 public static enum StructureFiletype { 240 PDB( (new PDBFileReader()).getExtensions()), 241 CIF( new MMCIFFileReader().getExtensions()), 242 UNKNOWN(Collections.<String>emptyList()); 243 244 private List<String> extensions; 245 /** 246 * @param extensions List of supported extensions, including leading period 247 */ 248 private StructureFiletype(List<String> extensions) { 249 this.extensions = extensions; 250 } 251 /** 252 * @return a list of file extensions associated with this type 253 */ 254 public List<String> getExtensions() { 255 return extensions; 256 } 257 } 258 259 /** 260 * Attempts to guess the type of a structure file based on the extension 261 * @param filename 262 * @return 263 */ 264 public static StructureFiletype guessFiletype(String filename) { 265 String lower = filename.toLowerCase(); 266 for(StructureFiletype type : StructureFiletype.values()) { 267 for(String ext : type.getExtensions()) { 268 if(lower.endsWith(ext.toLowerCase())) { 269 return type; 270 } 271 } 272 } 273 return StructureFiletype.UNKNOWN; 274 } 275}