001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure; 022 023import java.io.IOException; 024import java.util.Collections; 025import java.util.List; 026 027import org.biojava.nbio.structure.align.util.AtomCache; 028import org.biojava.nbio.structure.io.MMCIFFileReader; 029import org.biojava.nbio.structure.io.PDBFileReader; 030 031/** 032 * A class that provides static access methods for easy lookup of protein structure related components 033 * 034 * @author Andreas Prlic 035 * 036 * @since 3.0.5 037 */ 038public class StructureIO { 039 040 //private static final Logger logger = LoggerFactory.getLogger(StructureIO.class); 041 042 private static AtomCache cache ; 043 044 045 /** 046 * Loads a structure based on a name. Supported naming conventions are: 047 * 048 * <pre> 049 Formal specification for how to specify the <i>name</i>: 050 051 name := pdbID 052 | pdbID '.' chainID 053 | pdbID '.' range 054 | scopID 055 | biol 056 | pdp 057 range := '('? range (',' range)? ')'? 058 | chainID 059 | chainID '_' resNum '-' resNum 060 pdbID := [0-9][a-zA-Z0-9]{3} 061 chainID := [a-zA-Z0-9] 062 scopID := 'd' pdbID [a-z_][0-9_] 063 biol := 'BIO:' pdbID [:]? [0-9]+ 064 pdp := 'PDP:' pdbID[A-Za-z0-9_]+ 065 resNum := [-+]?[0-9]+[A-Za-z]? 066 067 068 Example structures: 069 1TIM #whole structure - asym unit 070 4HHB.C #single chain 071 4GCR.A_1-83 #one domain, by residue number 072 3AA0.A,B #two chains treated as one structure 073 d2bq6a1 #scop domain 074 BIO:1fah #biological assembly nr 1 for 1fah 075 BIO:1fah:0 #asym unit for 1fah 076 BIO:1fah:1 #biological assembly nr 1 for 1fah 077 BIO:1fah:2 #biological assembly nr 2 for 1fah 078 079 * </pre> 080 * 081 * With the additional set of rules: 082 * 083 * <ul> 084 * <li>If only a PDB code is provided, the whole structure will be return including ligands, but the first model only (for NMR). 085 * <li>Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A </li> 086 * <li>To specify a SCOP domain write a scopId e.g. d2bq6a1. Some flexibility can be allowed in SCOP domain names, see {@link #setStrictSCOP(boolean)}</li> 087 * <li>URLs are accepted as well</li> 088 * </ul> 089 * 090 * @param name 091 * @return a Structure object, or null if name appears improperly formated (eg too short, etc) 092 * @throws IOException The PDB file cannot be cached due to IO errors 093 * @throws StructureException The name appeared valid but did not correspond to a structure. 094 * Also thrown by some submethods upon errors, eg for poorly formatted subranges. 095 */ 096 public static Structure getStructure(String name) throws IOException, StructureException{ 097 098 checkInitAtomCache(); 099 100 // delegate this functionality to AtomCache... 101 102 return cache.getStructure(name); 103 104 } 105 106 107 private static void checkInitAtomCache() { 108 if ( cache == null){ 109 cache = new AtomCache(); 110 } 111 112 } 113 114 public static void setAtomCache(AtomCache c){ 115 cache = c; 116 } 117 118 public static AtomCache getAtomCache() { 119 checkInitAtomCache(); 120 return cache; 121 } 122 123 124 /** 125 * Returns the first biological assembly that is available for the given PDB id. 126 * <p> 127 * The output Structure will be different depending on the multiModel parameter: 128 * <li> 129 * the symmetry-expanded chains are added as new models, one per transformId. All original models but 130 * the first one are discarded. 131 * </li> 132 * <li> 133 * as original with symmetry-expanded chains added with renamed chain ids and names (in the form 134 * originalAsymId_transformId and originalAuthId_transformId) 135 * </li> 136 * <p> 137 * For more documentation on quaternary structures see: 138 * {@link http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies} 139 * 140 * 141 * @param pdbId 142 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 143 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 144 * @return a Structure object or null if that assembly is not available 145 * @throws StructureException 146 * @throws IOException 147 */ 148 public static Structure getBiologicalAssembly(String pdbId, boolean multiModel) throws IOException, StructureException{ 149 150 checkInitAtomCache(); 151 152 pdbId = pdbId.toLowerCase(); 153 154 Structure s = cache.getBiologicalAssembly(pdbId, multiModel); 155 156 return s; 157 } 158 159 /** 160 * Returns the first biological assembly that is available for the given PDB id, 161 * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE} 162 * <p> 163 * For more documentation on quaternary structures see: 164 * {@link http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies} 165 * 166 * 167 * @param pdbId 168 * @return a Structure object or null if that assembly is not available 169 * @throws StructureException 170 * @throws IOException 171 */ 172 public static Structure getBiologicalAssembly(String pdbId) throws IOException, StructureException{ 173 return getBiologicalAssembly(pdbId, AtomCache.DEFAULT_BIOASSEMBLY_STYLE); 174 } 175 176 /** 177 * Returns the biological assembly for the given PDB id and bioassembly identifier. 178 * <p> 179 * The output Structure will be different depending on the multiModel parameter: 180 * <li> 181 * the symmetry-expanded chains are added as new models, one per transformId. All original models but 182 * the first one are discarded. 183 * </li> 184 * <li> 185 * as original with symmetry-expanded chains added with renamed chain ids and names (in the form 186 * originalAsymId_transformId and originalAuthId_transformId) 187 * </li> 188 * @param pdbId 189 * @param biolAssemblyNr - the ith biological assembly that is available for a PDB ID (we start counting at 1, 0 represents the asym unit). 190 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 191 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 192 * @return a Structure object or null if that assembly is not available 193 * @throws StructureException if there is no bioassembly available for given biolAssemblyNr or some other problems encountered while loading it 194 * @throws IOException 195 */ 196 public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr, boolean multiModel) throws IOException, StructureException { 197 198 checkInitAtomCache(); 199 200 pdbId = pdbId.toLowerCase(); 201 202 Structure s = cache.getBiologicalAssembly(pdbId, biolAssemblyNr, multiModel); 203 204 return s; 205 } 206 207 /** 208 * Returns the biological assembly for the given PDB id and bioassembly identifier, 209 * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE} 210 * @param pdbId 211 * @param biolAssemblyNr - the ith biological assembly that is available for a PDB ID (we start counting at 1, 0 represents the asym unit). 212 * @return a Structure object or null if that assembly is not available 213 * @throws StructureException if there is no bioassembly available for given biolAssemblyNr or some other problems encountered while loading it 214 * @throws IOException 215 */ 216 public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr) throws IOException, StructureException { 217 return getBiologicalAssembly(pdbId, biolAssemblyNr, AtomCache.DEFAULT_BIOASSEMBLY_STYLE); 218 } 219 220 221 /** 222 * Returns all biological assemblies for the given PDB id. 223 * <p> 224 * The output Structure will be different depending on the multiModel parameter: 225 * <li> 226 * the symmetry-expanded chains are added as new models, one per transformId. All original models but 227 * the first one are discarded. 228 * </li> 229 * <li> 230 * as original with symmetry-expanded chains added with renamed chain ids and names (in the form 231 * originalAsymId_transformId and originalAuthId_transformId) 232 * </li> 233 * If only one biological assembly is required use {@link #getBiologicalAssembly(String)} or {@link #getBiologicalAssembly(String, int)} instead. 234 * @param pdbId 235 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 236 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 237 * @return 238 * @throws IOException 239 * @throws StructureException 240 * @since 5.0 241 */ 242 public static List<Structure> getBiologicalAssemblies(String pdbId, boolean multiModel) throws IOException, StructureException { 243 244 checkInitAtomCache(); 245 246 pdbId = pdbId.toLowerCase(); 247 248 List<Structure> s = cache.getBiologicalAssemblies(pdbId, multiModel); 249 250 return s; 251 252 } 253 254 /** 255 * Returns all biological assemblies for the given PDB id, 256 * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE} 257 * <p> 258 * If only one biological assembly is required use {@link #getBiologicalAssembly(String)} or {@link #getBiologicalAssembly(String, int)} instead. 259 * @param pdbId 260 * @return 261 * @throws IOException 262 * @throws StructureException 263 * @since 5.0 264 */ 265 public static List<Structure> getBiologicalAssemblies(String pdbId) throws IOException, StructureException { 266 return getBiologicalAssemblies(pdbId, AtomCache.DEFAULT_BIOASSEMBLY_STYLE); 267 } 268 269 270 private static final String FILE_SEPARATOR = System.getProperty("file.separator"); 271 272 /** 273 * Utility method to set the location where PDB files can be found 274 * 275 * @param pathToPDBFiles 276 */ 277 public static void setPdbPath(String pathToPDBFiles){ 278 279 if ( ! pathToPDBFiles.endsWith(FILE_SEPARATOR)) 280 pathToPDBFiles += FILE_SEPARATOR; 281 } 282 283 284 public static enum StructureFiletype { 285 PDB( (new PDBFileReader()).getExtensions()), 286 CIF( new MMCIFFileReader().getExtensions()), 287 UNKNOWN(Collections.<String>emptyList()); 288 289 private List<String> extensions; 290 /** 291 * @param extensions List of supported extensions, including leading period 292 */ 293 private StructureFiletype(List<String> extensions) { 294 this.extensions = extensions; 295 } 296 /** 297 * @return a list of file extensions associated with this type 298 */ 299 public List<String> getExtensions() { 300 return extensions; 301 } 302 } 303 304 /** 305 * Attempts to guess the type of a structure file based on the extension 306 * @param filename 307 * @return 308 */ 309 public static StructureFiletype guessFiletype(String filename) { 310 String lower = filename.toLowerCase(); 311 for(StructureFiletype type : StructureFiletype.values()) { 312 for(String ext : type.getExtensions()) { 313 if(lower.endsWith(ext.toLowerCase())) { 314 return type; 315 } 316 } 317 } 318 return StructureFiletype.UNKNOWN; 319 } 320}