001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure;
022
023import org.biojava.nbio.structure.align.util.AtomCache;
024import org.biojava.nbio.structure.io.StructureFiletype;
025
026import java.io.IOException;
027import java.util.List;
028
029/**
030 * A class that provides static access methods for easy lookup of protein structure related components
031 *
032 * @author Andreas Prlic
033 *
034 * @since 3.0.5
035 */
036public class StructureIO {
037        private static AtomCache cache ;
038
039        /**
040         * Loads a structure based on a name. Supported naming conventions are:
041         *
042         *  <pre>
043                Formal specification for how to specify the <i>name</i>:
044
045                name     := pdbID
046                                           | pdbID '.' chainID
047                                           | pdbID '.' range
048                                           | scopID
049                                           | biol
050                                           | pdp
051                range         := '('? range (',' range)? ')'?
052                                           | chainID
053                                           | chainID '_' resNum '-' resNum
054                pdbID         := [1-9][a-zA-Z0-9]{3}
055                                           | PDB_[a-zA-Z0-9]{8}
056                chainID       := [a-zA-Z0-9]
057                scopID        := 'd' pdbID [a-z_][0-9_]
058                biol              := 'BIO:' pdbID [:]? [0-9]+
059                resNum        := [-+]?[0-9]+[A-Za-z]?
060
061
062                Example structures:
063                1TIM                #whole structure - asym unit (short format)
064                4HHB.C              #single chain
065                4GCR.A_1-83         #one domain, by residue number
066                3AA0.A,B            #two chains treated as one structure
067                PDB_00001TIM        #whole structure - asym unit (extended format)
068                PDB_00004HHB.C      #single chain
069                PDB_00004GCR.A_1-83 #one domain, by residue number
070                PDB_00003AA0.A,B    #two chains treated as one structure
071                d2bq6a1     #scop domain
072                BIO:1fah   #biological assembly nr 1 for 1fah
073                BIO:1fah:0 #asym unit for 1fah
074                BIO:1fah:1 #biological assembly nr 1 for 1fah
075                BIO:1fah:2 #biological assembly nr 2 for 1fah
076
077         * </pre>
078         *
079         * With the additional set of rules:
080         *
081         *  <ul>
082         *  <li>If only a PDB code is provided, the whole structure will be return including ligands, but the first model only (for NMR).
083         *      <li>Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A </li>
084         *  <li>To specify a SCOP domain write a scopId e.g. d2bq6a1</li>
085         *  <li>URLs are accepted as well</li>
086         *  </ul>
087         *
088         * @param name
089         * @return a Structure object, or null if name appears improperly formated (eg too short, etc)
090         * @throws IOException The PDB file cannot be cached due to IO errors
091         * @throws StructureException The name appeared valid but did not correspond to a structure.
092         *      Also thrown by some submethods upon errors, eg for poorly formatted subranges.
093         */
094        public static Structure getStructure(String name) throws IOException, StructureException {
095                checkInitAtomCache();
096                // delegate this functionality to AtomCache...
097                return cache.getStructure(name);
098        }
099
100        private static void checkInitAtomCache() {
101                if (cache == null) {
102                        cache = new AtomCache();
103                }
104        }
105
106        public static void setAtomCache(AtomCache c){
107                cache = c;
108        }
109
110        public static AtomCache getAtomCache() {
111                checkInitAtomCache();
112                return cache;
113        }
114
115        /**
116         * Returns the first biological assembly that is available for the given PDB id.
117         * <p>
118         * The output Structure will be different depending on the multiModel parameter:
119         * <ul>
120         * <li>
121         * the symmetry-expanded chains are added as new models, one per transformId. All original models but
122         * the first one are discarded.
123         * </li>
124         * <li>
125         * as original with symmetry-expanded chains added with renamed chain ids and names (in the form
126         * originalAsymId_transformId and originalAuthId_transformId)
127         * </li>
128         * </ul>
129         * <p>
130         * For more documentation on quaternary structures see:
131         * <a href="http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies">http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies</a>
132         *
133         *
134         * @param pdbId
135         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
136         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
137         * @return a Structure object or null if that assembly is not available
138         * @throws StructureException
139         * @throws IOException
140         */
141        public static Structure getBiologicalAssembly(String pdbId, boolean multiModel) throws IOException, StructureException {
142                checkInitAtomCache();
143                pdbId = pdbId.toLowerCase();
144                return cache.getBiologicalAssembly(pdbId, multiModel);
145        }
146
147        /**
148         * Returns the first biological assembly that is available for the given PDB id,
149         * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE}
150         * <p>
151         * For more documentation on quaternary structures see:
152         * <a href="http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies">http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies</a>
153         *
154         *
155         * @param pdbId
156         * @return a Structure object or null if that assembly is not available
157         * @throws StructureException
158         * @throws IOException
159         */
160        public static Structure getBiologicalAssembly(String pdbId) throws IOException, StructureException {
161                return getBiologicalAssembly(pdbId, AtomCache.DEFAULT_BIOASSEMBLY_STYLE);
162        }
163
164        /**
165         * Returns the biological assembly for the given PDB id and bioassembly identifier.
166         * <p>
167         * The output Structure will be different depending on the multiModel parameter:
168         * <ul>
169         * <li>
170         * the symmetry-expanded chains are added as new models, one per transformId. All original models but
171         * the first one are discarded.
172         * </li>
173         * <li>
174         * as original with symmetry-expanded chains added with renamed chain ids and names (in the form
175         * originalAsymId_transformId and originalAuthId_transformId)
176         * </li>
177         * </ul>
178         * @param pdbId
179         * @param biolAssemblyNr - the ith biological assembly that is available for a PDB ID (we start counting at 1, 0 represents the asym unit).
180         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
181         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
182         * @return a Structure object or null if that assembly is not available
183         * @throws StructureException if there is no bioassembly available for given biolAssemblyNr or some other problems encountered while loading it
184         * @throws IOException
185         */
186        public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr, boolean multiModel) throws IOException, StructureException {
187                checkInitAtomCache();
188                pdbId = pdbId.toLowerCase();
189                return cache.getBiologicalAssembly(pdbId, biolAssemblyNr, multiModel);
190        }
191
192        /**
193         * Returns the biological assembly for the given PDB id and bioassembly identifier,
194         * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE}
195         * @param pdbId
196         * @param biolAssemblyNr - the ith biological assembly that is available for a PDB ID (we start counting at 1, 0 represents the asym unit).
197         * @return a Structure object or null if that assembly is not available
198         * @throws StructureException if there is no bioassembly available for given biolAssemblyNr or some other problems encountered while loading it
199         * @throws IOException
200         */
201        public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr) throws IOException, StructureException {
202                return getBiologicalAssembly(pdbId, biolAssemblyNr, AtomCache.DEFAULT_BIOASSEMBLY_STYLE);
203        }
204
205        /**
206         * Returns all biological assemblies for the given PDB id.
207         * <p>
208         * The output Structure will be different depending on the multiModel parameter:
209         * <ul>
210         * <li>
211         * the symmetry-expanded chains are added as new models, one per transformId. All original models but
212         * the first one are discarded.
213         * </li>
214         * <li>
215         * as original with symmetry-expanded chains added with renamed chain ids and names (in the form
216         * originalAsymId_transformId and originalAuthId_transformId)
217         * </li>
218         * </ul>
219         * If only one biological assembly is required use {@link #getBiologicalAssembly(String)} or {@link #getBiologicalAssembly(String, int)} instead.
220         * @param pdbId
221         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
222         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
223         * @return
224         * @throws IOException
225         * @throws StructureException
226         * @since 5.0
227         */
228        public static List<Structure> getBiologicalAssemblies(String pdbId, boolean multiModel) throws IOException, StructureException {
229                checkInitAtomCache();
230                pdbId = pdbId.toLowerCase();
231                return cache.getBiologicalAssemblies(pdbId, multiModel);
232        }
233
234        /**
235         * Returns all biological assemblies for the given PDB id,
236         * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE}
237         * <p>
238         * If only one biological assembly is required use {@link #getBiologicalAssembly(String)} or {@link #getBiologicalAssembly(String, int)} instead.
239         * @param pdbId
240         * @return
241         * @throws IOException
242         * @throws StructureException
243         * @since 5.0
244         */
245        public static List<Structure> getBiologicalAssemblies(String pdbId) throws IOException, StructureException {
246                return getBiologicalAssemblies(pdbId, AtomCache.DEFAULT_BIOASSEMBLY_STYLE);
247        }
248
249        /**
250         * Attempts to guess the type of a structure file based on the extension
251         * @param filename
252         * @return
253         */
254        public static StructureFiletype guessFiletype(String filename) {
255                String lower = filename.toLowerCase();
256                for (StructureFiletype type : StructureFiletype.values()) {
257                        for (String ext : type.getExtensions()) {
258                                if (lower.endsWith(ext.toLowerCase())) {
259                                        return type;
260                                }
261                        }
262                }
263                return StructureFiletype.UNKNOWN;
264        }
265}