001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure;
022
023import java.io.IOException;
024import java.util.Collections;
025import java.util.List;
026
027import org.biojava.nbio.structure.align.util.AtomCache;
028import org.biojava.nbio.structure.io.MMCIFFileReader;
029import org.biojava.nbio.structure.io.PDBFileReader;
030
031/**
032 * A class that provides static access methods for easy lookup of protein structure related components
033 *
034 * @author Andreas Prlic
035 *
036 * @since 3.0.5
037 */
038public class StructureIO {
039
040        //private static final Logger logger = LoggerFactory.getLogger(StructureIO.class);
041
042        private static AtomCache cache ;
043
044
045        /** 
046         * Loads a structure based on a name. Supported naming conventions are:
047         *
048         *  <pre>
049                Formal specification for how to specify the <i>name</i>:
050
051                name     := pdbID
052                               | pdbID '.' chainID
053                               | pdbID '.' range
054                               | scopID
055                               | biol
056                               | pdp
057                range         := '('? range (',' range)? ')'?
058                               | chainID
059                               | chainID '_' resNum '-' resNum
060                pdbID         := [0-9][a-zA-Z0-9]{3}
061                chainID       := [a-zA-Z0-9]
062                scopID        := 'd' pdbID [a-z_][0-9_]
063                biol              := 'BIO:' pdbID [:]? [0-9]+
064                pdp                       := 'PDP:' pdbID[A-Za-z0-9_]+
065                resNum        := [-+]?[0-9]+[A-Za-z]?
066
067
068                Example structures:
069                1TIM            #whole structure - asym unit
070                4HHB.C          #single chain
071                4GCR.A_1-83 #one domain, by residue number
072                3AA0.A,B    #two chains treated as one structure
073                d2bq6a1     #scop domain
074                BIO:1fah   #biological assembly nr 1 for 1fah
075                BIO:1fah:0 #asym unit for 1fah
076                BIO:1fah:1 #biological assembly nr 1 for 1fah
077                BIO:1fah:2 #biological assembly nr 2 for 1fah
078
079     * </pre>
080         *
081         * With the additional set of rules:
082         *
083         *  <ul>
084         *  <li>If only a PDB code is provided, the whole structure will be return including ligands, but the first model only (for NMR).
085         *      <li>Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A </li>
086         *  <li>To specify a SCOP domain write a scopId e.g. d2bq6a1. Some flexibility can be allowed in SCOP domain names, see {@link #setStrictSCOP(boolean)}</li>
087         *  <li>URLs are accepted as well</li>
088         *  </ul>
089         *
090         * @param name
091         * @return a Structure object, or null if name appears improperly formated (eg too short, etc)
092         * @throws IOException The PDB file cannot be cached due to IO errors
093         * @throws StructureException The name appeared valid but did not correspond to a structure.
094         *      Also thrown by some submethods upon errors, eg for poorly formatted subranges.
095         */
096        public static Structure getStructure(String name) throws IOException, StructureException{
097
098                checkInitAtomCache();
099
100                // delegate this functionality to AtomCache...
101
102                return cache.getStructure(name);
103
104        }
105
106
107        private static void checkInitAtomCache() {
108                if ( cache == null){
109                        cache = new AtomCache();
110                }
111
112        }
113
114        public static void setAtomCache(AtomCache c){
115                cache = c;
116        }
117
118        public static AtomCache getAtomCache() {
119                return cache;
120        }
121
122
123        /**
124         * Returns the first biological assembly that is available for the given PDB id.
125         * <p>
126         * The output Structure will be different depending on the multiModel parameter:
127         * <li>
128         * the symmetry-expanded chains are added as new models, one per transformId. All original models but 
129         * the first one are discarded.
130         * </li>
131         * <li>
132         * as original with symmetry-expanded chains added with renamed chain ids and names (in the form 
133         * originalAsymId_transformId and originalAuthId_transformId)
134         * </li> 
135         * <p> 
136         * For more documentation on quaternary structures see:
137         * {@link http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies}
138         *
139         *
140         * @param pdbId
141         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 
142         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).              
143         * @return a Structure object or null if that assembly is not available
144         * @throws StructureException
145         * @throws IOException
146         */
147        public static Structure getBiologicalAssembly(String pdbId, boolean multiModel) throws IOException, StructureException{
148
149                checkInitAtomCache();           
150
151                pdbId = pdbId.toLowerCase();
152                
153                Structure s = cache.getBiologicalAssembly(pdbId, multiModel); 
154                
155                return s;
156        }
157        
158        /**
159         * Returns the first biological assembly that is available for the given PDB id, 
160         * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE}
161         * <p> 
162         * For more documentation on quaternary structures see:
163         * {@link http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies}
164         *
165         *
166         * @param pdbId
167         * @return a Structure object or null if that assembly is not available
168         * @throws StructureException
169         * @throws IOException
170         */
171        public static Structure getBiologicalAssembly(String pdbId) throws IOException, StructureException{
172                return getBiologicalAssembly(pdbId, AtomCache.DEFAULT_BIOASSEMBLY_STYLE);
173        }       
174
175        /**
176         * Returns the biological assembly for the given PDB id and bioassembly identifier.
177         * <p>
178         * The output Structure will be different depending on the multiModel parameter:
179         * <li>
180         * the symmetry-expanded chains are added as new models, one per transformId. All original models but 
181         * the first one are discarded.
182         * </li>
183         * <li>
184         * as original with symmetry-expanded chains added with renamed chain ids and names (in the form 
185         * originalAsymId_transformId and originalAuthId_transformId)
186         * </li>  
187         * @param pdbId
188         * @param biolAssemblyNr - the ith biological assembly that is available for a PDB ID (we start counting at 1, 0 represents the asym unit).
189         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 
190         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).              
191         * @return a Structure object or null if that assembly is not available
192         * @throws StructureException if there is no bioassembly available for given biolAssemblyNr or some other problems encountered while loading it
193         * @throws IOException
194         */
195        public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr, boolean multiModel) throws IOException, StructureException {
196                
197                checkInitAtomCache();           
198
199                pdbId = pdbId.toLowerCase();
200                
201                Structure s = cache.getBiologicalAssembly(pdbId, biolAssemblyNr, multiModel); 
202                
203                return s;
204        }
205        
206        /**
207         * Returns the biological assembly for the given PDB id and bioassembly identifier,
208         * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE}
209         * @param pdbId
210         * @param biolAssemblyNr - the ith biological assembly that is available for a PDB ID (we start counting at 1, 0 represents the asym unit).
211         * @return a Structure object or null if that assembly is not available
212         * @throws StructureException if there is no bioassembly available for given biolAssemblyNr or some other problems encountered while loading it
213         * @throws IOException
214         */
215        public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr) throws IOException, StructureException {
216                return getBiologicalAssembly(pdbId, biolAssemblyNr, AtomCache.DEFAULT_BIOASSEMBLY_STYLE);
217        }
218                
219        
220        /**
221         * Returns all biological assemblies for the given PDB id.
222         * <p>
223         * The output Structure will be different depending on the multiModel parameter:
224         * <li>
225         * the symmetry-expanded chains are added as new models, one per transformId. All original models but 
226         * the first one are discarded.
227         * </li>
228         * <li>
229         * as original with symmetry-expanded chains added with renamed chain ids and names (in the form 
230         * originalAsymId_transformId and originalAuthId_transformId)
231         * </li>  
232         * If only one biological assembly is required use {@link #getBiologicalAssembly(String)} or {@link #getBiologicalAssembly(String, int)} instead.
233         * @param pdbId
234         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 
235         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).              
236         * @return
237         * @throws IOException
238         * @throws StructureException
239         * @since 5.0
240         */
241        public static List<Structure> getBiologicalAssemblies(String pdbId, boolean multiModel) throws IOException, StructureException {
242
243                checkInitAtomCache();           
244
245                pdbId = pdbId.toLowerCase();            
246                
247                List<Structure> s = cache.getBiologicalAssemblies(pdbId, multiModel); 
248                
249                return s;
250                
251        }
252
253        /**
254         * Returns all biological assemblies for the given PDB id,
255         * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE}
256         * <p>
257         * If only one biological assembly is required use {@link #getBiologicalAssembly(String)} or {@link #getBiologicalAssembly(String, int)} instead.
258         * @param pdbId
259         * @return
260         * @throws IOException
261         * @throws StructureException
262         * @since 5.0
263         */
264        public static List<Structure> getBiologicalAssemblies(String pdbId) throws IOException, StructureException {
265                return getBiologicalAssemblies(pdbId, AtomCache.DEFAULT_BIOASSEMBLY_STYLE);
266        }
267        
268
269        private static final String FILE_SEPARATOR = System.getProperty("file.separator");
270
271        /**
272         * Utility method to set the location where PDB files can be found
273         *
274         * @param pathToPDBFiles
275         */
276        public static void setPdbPath(String pathToPDBFiles){
277
278                if ( ! pathToPDBFiles.endsWith(FILE_SEPARATOR))
279                        pathToPDBFiles += FILE_SEPARATOR;
280        }
281
282
283        public static enum StructureFiletype {
284                PDB( (new PDBFileReader()).getExtensions()),
285                CIF( new MMCIFFileReader().getExtensions()),
286                UNKNOWN(Collections.<String>emptyList());
287
288                private List<String> extensions;
289                /**
290                 * @param extensions List of supported extensions, including leading period
291                 */
292                private StructureFiletype(List<String> extensions) {
293                        this.extensions = extensions;
294                }
295                /**
296                 * @return a list of file extensions associated with this type
297                 */
298                public List<String> getExtensions() {
299                        return extensions;
300                }
301        }
302
303        /**
304         * Attempts to guess the type of a structure file based on the extension
305         * @param filename
306         * @return
307         */
308        public static StructureFiletype guessFiletype(String filename) {
309                String lower = filename.toLowerCase();
310                for(StructureFiletype type : StructureFiletype.values()) {
311                        for(String ext : type.getExtensions()) {
312                                if(lower.endsWith(ext.toLowerCase())) {
313                                        return type;
314                                }
315                        }
316                }
317                return StructureFiletype.UNKNOWN;
318        }
319}