001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure; 022 023import org.biojava.nbio.structure.align.util.AtomCache; 024import org.biojava.nbio.structure.io.StructureFiletype; 025 026import java.io.IOException; 027import java.util.List; 028 029/** 030 * A class that provides static access methods for easy lookup of protein structure related components 031 * 032 * @author Andreas Prlic 033 * 034 * @since 3.0.5 035 */ 036public class StructureIO { 037 private static AtomCache cache ; 038 039 /** 040 * Loads a structure based on a name. Supported naming conventions are: 041 * 042 * <pre> 043 Formal specification for how to specify the <i>name</i>: 044 045 name := pdbID 046 | pdbID '.' chainID 047 | pdbID '.' range 048 | scopID 049 | biol 050 | pdp 051 range := '('? range (',' range)? ')'? 052 | chainID 053 | chainID '_' resNum '-' resNum 054 pdbID := [1-9][a-zA-Z0-9]{3} 055 | PDB_[a-zA-Z0-9]{8} 056 chainID := [a-zA-Z0-9] 057 scopID := 'd' pdbID [a-z_][0-9_] 058 biol := 'BIO:' pdbID [:]? [0-9]+ 059 resNum := [-+]?[0-9]+[A-Za-z]? 060 061 062 Example structures: 063 1TIM #whole structure - asym unit (short format) 064 4HHB.C #single chain 065 4GCR.A_1-83 #one domain, by residue number 066 3AA0.A,B #two chains treated as one structure 067 PDB_00001TIM #whole structure - asym unit (extended format) 068 PDB_00004HHB.C #single chain 069 PDB_00004GCR.A_1-83 #one domain, by residue number 070 PDB_00003AA0.A,B #two chains treated as one structure 071 d2bq6a1 #scop domain 072 BIO:1fah #biological assembly nr 1 for 1fah 073 BIO:1fah:0 #asym unit for 1fah 074 BIO:1fah:1 #biological assembly nr 1 for 1fah 075 BIO:1fah:2 #biological assembly nr 2 for 1fah 076 077 * </pre> 078 * 079 * With the additional set of rules: 080 * 081 * <ul> 082 * <li>If only a PDB code is provided, the whole structure will be return including ligands, but the first model only (for NMR). 083 * <li>Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A </li> 084 * <li>To specify a SCOP domain write a scopId e.g. d2bq6a1</li> 085 * <li>URLs are accepted as well</li> 086 * </ul> 087 * 088 * @param name 089 * @return a Structure object, or null if name appears improperly formated (eg too short, etc) 090 * @throws IOException The PDB file cannot be cached due to IO errors 091 * @throws StructureException The name appeared valid but did not correspond to a structure. 092 * Also thrown by some submethods upon errors, eg for poorly formatted subranges. 093 */ 094 public static Structure getStructure(String name) throws IOException, StructureException { 095 checkInitAtomCache(); 096 // delegate this functionality to AtomCache... 097 return cache.getStructure(name); 098 } 099 100 private static void checkInitAtomCache() { 101 if (cache == null) { 102 cache = new AtomCache(); 103 } 104 } 105 106 public static void setAtomCache(AtomCache c){ 107 cache = c; 108 } 109 110 public static AtomCache getAtomCache() { 111 checkInitAtomCache(); 112 return cache; 113 } 114 115 /** 116 * Returns the first biological assembly that is available for the given PDB id. 117 * <p> 118 * The output Structure will be different depending on the multiModel parameter: 119 * <ul> 120 * <li> 121 * the symmetry-expanded chains are added as new models, one per transformId. All original models but 122 * the first one are discarded. 123 * </li> 124 * <li> 125 * as original with symmetry-expanded chains added with renamed chain ids and names (in the form 126 * originalAsymId_transformId and originalAuthId_transformId) 127 * </li> 128 * </ul> 129 * <p> 130 * For more documentation on quaternary structures see: 131 * <a href="http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies">http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies</a> 132 * 133 * 134 * @param pdbId 135 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 136 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 137 * @return a Structure object or null if that assembly is not available 138 * @throws StructureException 139 * @throws IOException 140 */ 141 public static Structure getBiologicalAssembly(String pdbId, boolean multiModel) throws IOException, StructureException { 142 checkInitAtomCache(); 143 pdbId = pdbId.toLowerCase(); 144 return cache.getBiologicalAssembly(pdbId, multiModel); 145 } 146 147 /** 148 * Returns the first biological assembly that is available for the given PDB id, 149 * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE} 150 * <p> 151 * For more documentation on quaternary structures see: 152 * <a href="http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies">http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies</a> 153 * 154 * 155 * @param pdbId 156 * @return a Structure object or null if that assembly is not available 157 * @throws StructureException 158 * @throws IOException 159 */ 160 public static Structure getBiologicalAssembly(String pdbId) throws IOException, StructureException { 161 return getBiologicalAssembly(pdbId, AtomCache.DEFAULT_BIOASSEMBLY_STYLE); 162 } 163 164 /** 165 * Returns the biological assembly for the given PDB id and bioassembly identifier. 166 * <p> 167 * The output Structure will be different depending on the multiModel parameter: 168 * <ul> 169 * <li> 170 * the symmetry-expanded chains are added as new models, one per transformId. All original models but 171 * the first one are discarded. 172 * </li> 173 * <li> 174 * as original with symmetry-expanded chains added with renamed chain ids and names (in the form 175 * originalAsymId_transformId and originalAuthId_transformId) 176 * </li> 177 * </ul> 178 * @param pdbId 179 * @param biolAssemblyNr - the ith biological assembly that is available for a PDB ID (we start counting at 1, 0 represents the asym unit). 180 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 181 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 182 * @return a Structure object or null if that assembly is not available 183 * @throws StructureException if there is no bioassembly available for given biolAssemblyNr or some other problems encountered while loading it 184 * @throws IOException 185 */ 186 public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr, boolean multiModel) throws IOException, StructureException { 187 checkInitAtomCache(); 188 pdbId = pdbId.toLowerCase(); 189 return cache.getBiologicalAssembly(pdbId, biolAssemblyNr, multiModel); 190 } 191 192 /** 193 * Returns the biological assembly for the given PDB id and bioassembly identifier, 194 * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE} 195 * @param pdbId 196 * @param biolAssemblyNr - the ith biological assembly that is available for a PDB ID (we start counting at 1, 0 represents the asym unit). 197 * @return a Structure object or null if that assembly is not available 198 * @throws StructureException if there is no bioassembly available for given biolAssemblyNr or some other problems encountered while loading it 199 * @throws IOException 200 */ 201 public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr) throws IOException, StructureException { 202 return getBiologicalAssembly(pdbId, biolAssemblyNr, AtomCache.DEFAULT_BIOASSEMBLY_STYLE); 203 } 204 205 /** 206 * Returns all biological assemblies for the given PDB id. 207 * <p> 208 * The output Structure will be different depending on the multiModel parameter: 209 * <ul> 210 * <li> 211 * the symmetry-expanded chains are added as new models, one per transformId. All original models but 212 * the first one are discarded. 213 * </li> 214 * <li> 215 * as original with symmetry-expanded chains added with renamed chain ids and names (in the form 216 * originalAsymId_transformId and originalAuthId_transformId) 217 * </li> 218 * </ul> 219 * If only one biological assembly is required use {@link #getBiologicalAssembly(String)} or {@link #getBiologicalAssembly(String, int)} instead. 220 * @param pdbId 221 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 222 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 223 * @return 224 * @throws IOException 225 * @throws StructureException 226 * @since 5.0 227 */ 228 public static List<Structure> getBiologicalAssemblies(String pdbId, boolean multiModel) throws IOException, StructureException { 229 checkInitAtomCache(); 230 pdbId = pdbId.toLowerCase(); 231 return cache.getBiologicalAssemblies(pdbId, multiModel); 232 } 233 234 /** 235 * Returns all biological assemblies for the given PDB id, 236 * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE} 237 * <p> 238 * If only one biological assembly is required use {@link #getBiologicalAssembly(String)} or {@link #getBiologicalAssembly(String, int)} instead. 239 * @param pdbId 240 * @return 241 * @throws IOException 242 * @throws StructureException 243 * @since 5.0 244 */ 245 public static List<Structure> getBiologicalAssemblies(String pdbId) throws IOException, StructureException { 246 return getBiologicalAssemblies(pdbId, AtomCache.DEFAULT_BIOASSEMBLY_STYLE); 247 } 248 249 /** 250 * Attempts to guess the type of a structure file based on the extension 251 * @param filename 252 * @return 253 */ 254 public static StructureFiletype guessFiletype(String filename) { 255 String lower = filename.toLowerCase(); 256 for (StructureFiletype type : StructureFiletype.values()) { 257 for (String ext : type.getExtensions()) { 258 if (lower.endsWith(ext.toLowerCase())) { 259 return type; 260 } 261 } 262 } 263 return StructureFiletype.UNKNOWN; 264 } 265}