001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Jun 16, 2010
021 * Author: ap3
022 *
023 */
024
025package org.biojava.nbio.structure.io;
026
027import java.io.Serializable;
028
029import org.biojava.nbio.structure.AminoAcid;
030import org.slf4j.Logger;
031import org.slf4j.LoggerFactory;
032
033/**
034 * A class that configures parameters that can be sent to the PDB file parsers
035 *
036 * <ul>
037 * <li> {@link #setParseCAOnly(boolean)} - parse only the Atom records for C-alpha atoms</li>
038 * <li> {@link #setParseSecStruc(boolean)} - a flag if the secondary structure information from the PDB file (author's assignment) should be parsed.
039 *      If true the assignment can be accessed through {@link AminoAcid}.getSecStruc(); </li>
040 * <li> {@link #setAlignSeqRes(boolean)} - should the AminoAcid sequences from the SEQRES
041 *      and ATOM records of a PDB file be aligned? (default:yes)</li>
042 * <li> {@link #setHeaderOnly(boolean)} - parse only the PDB/mmCIF file header, ignoring coordinates
043 * </li>
044 * <li> {@link #setCreateAtomBonds(boolean)} - create atom bonds from parsed bonds in PDB/mmCIF files and chemical component files
045 * </li>
046 * </ul>
047 *
048 * @author Andreas Prlic
049 *
050 */
051public class FileParsingParameters implements Serializable
052{
053
054        private static final Logger logger = LoggerFactory.getLogger(FileParsingParameters.class);
055
056
057        private static final long serialVersionUID = 5878292315163939027L;
058
059
060
061        /**
062         * Flag to detect if the secondary structure info should be read
063         *
064         */
065        private boolean parseSecStruc;
066
067        /**
068         * Flag to control if SEQRES and ATOM records should be aligned
069         */
070        private boolean alignSeqRes;
071
072        /**
073         * Flag to control reading in only Calpha atoms - this is useful for parsing large structures like 1htq.
074         */
075        private boolean parseCAOnly;
076
077        /**
078         * Flag to parse header only
079         */
080        private boolean headerOnly;
081
082
083        /**
084         * Update locally cached files to the latest version of remediated files
085         */
086        private boolean updateRemediatedFiles;
087
088        /**
089         * The maximum number of atoms that will be parsed before the parser switches to a CA-only
090         * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be
091         * ignored.
092         */
093        public static final int ATOM_CA_THRESHOLD = 500000;
094
095        private int atomCaThreshold;
096
097
098        /**
099         * Should we parse the biological assembly information from a file?
100         */
101        private boolean parseBioAssembly;
102
103        /**
104         * Should we create bonds between atoms when parsing a file?
105         */
106        private boolean createAtomBonds;
107
108        /**
109         * Should we create charges on atoms when parsing a file?
110         */
111        private boolean createAtomCharges;
112
113        /**
114         * Should we use internal (asym_id) or public facing (author) chain ids
115         */
116        private boolean useInternalChainId;
117        /**
118         * The maximum number of atoms we will add to a structure,
119         * this protects from memory overflows in the few really big protein structures.
120         */
121        public static final int MAX_ATOMS = Integer.MAX_VALUE; // no limit, we don't want to truncate molecules, but the user should make sure there is more memory available
122        //public static final int MAX_ATOMS = 700000; // tested with java -Xmx300M
123
124        int maxAtoms ;
125
126        String[] fullAtomNames;
127
128        public FileParsingParameters(){
129                setDefault();
130        }
131
132        public void setDefault(){
133
134                parseSecStruc = false;
135                // Default is to align / when false the unaligned SEQRES is stored.
136                alignSeqRes   = true;
137                parseCAOnly = false;
138
139                headerOnly = false;
140
141                updateRemediatedFiles = false;
142                fullAtomNames = null;
143
144                maxAtoms = MAX_ATOMS;
145
146                atomCaThreshold = ATOM_CA_THRESHOLD;
147
148                parseBioAssembly = false;
149
150                createAtomBonds = false;
151
152                createAtomCharges = true;
153
154                useInternalChainId = false;
155        }
156
157        /**
158         * Is secondary structure assignment being parsed from the file?
159         * default is null
160         * @return boolean if HELIX STRAND and TURN fields are being parsed
161         */
162        public boolean isParseSecStruc() {
163                return parseSecStruc;
164        }
165
166        /**
167         * A flag to tell the parser to parse the Author's secondary structure assignment from the file
168         * default is set to false, i.e. do NOT parse.
169         * @param parseSecStruc if HELIX STRAND and TURN fields are being parsed
170         */
171        public void setParseSecStruc(boolean parseSecStruc) {
172                this.parseSecStruc = parseSecStruc;
173        }
174
175        /** Parse only the PDB file header out of the files
176         *
177         * @return flag
178         */
179        public boolean isHeaderOnly()
180        {
181                return headerOnly;
182        }
183
184        /** Parse only the PDB file header out of the files
185         *
186         * @param headerOnly flag
187         */
188        public void setHeaderOnly(boolean headerOnly)
189        {
190                this.headerOnly = headerOnly;
191        }
192
193        /**
194         * The flag if only the C-alpha atoms of the structure should be parsed.
195         *
196         * @return the flag
197         */
198        public boolean isParseCAOnly() {
199                return parseCAOnly;
200        }
201        /**
202         * Flag if only the C-alpha atoms of the structure should be parsed.
203         *
204         * @param parseCAOnly boolean flag to enable or disable C-alpha only parsing
205         */
206        public void setParseCAOnly(boolean parseCAOnly) {
207                this.parseCAOnly = parseCAOnly;
208        }
209
210
211
212        /** Flag if the SEQRES amino acids should be aligned with the ATOM amino acids.
213         *
214         * @return flag if SEQRES - ATOM amino acids alignment is enabled
215         */
216        public boolean isAlignSeqRes() {
217                return alignSeqRes;
218        }
219
220
221
222        /**
223         * Define if the SEQRES in the structure should be aligned with the ATOM records
224         * if yes, the AminoAcids in structure.getSeqRes will have the coordinates set.
225         * @param alignSeqRes
226         */
227        public void setAlignSeqRes(boolean alignSeqRes) {
228                this.alignSeqRes = alignSeqRes;
229        }
230
231        /** A flag if local files should be replaced with the latest version of remediated PDB files. Default: false
232         *
233         * @returns updateRemediatedFiles flag
234         * @deprecated Properties which impact downloading and caching behavior
235         *  have been moved to the {@link StructureIOFile} implementations.
236         *  See {@link LocalPDBDirectory#getFetchBehavior(LocalPDBDirectory.FetchBehavior)}
237         */
238        @Deprecated
239        public boolean isUpdateRemediatedFiles() {
240                return updateRemediatedFiles;
241        }
242
243        /** A flag if local files should be replaced with the latest version of remediated PDB files. Default: false
244         *
245         * @param updateRemediatedFiles
246         * @deprecated Properties which impact downloading and caching behavior
247         *  have been moved to the {@link StructureIOFile} implementations.
248         *  See {@link LocalPDBDirectory#setFetchBehavior(LocalPDBDirectory.FetchBehavior)}
249         */
250        @Deprecated
251        public void setUpdateRemediatedFiles(boolean updateRemediatedFiles) {
252                logger.warn("FileParsingParameters.setUpdateRemediatedFiles() is deprecated, please use LocalPDBDirectory.setFetchBehavior() instead. The option will be removed in upcoming releases");
253                this.updateRemediatedFiles = updateRemediatedFiles;
254        }
255
256        /**
257         * By default the parser will read in all atoms (unless using the CAonly switch). This allows to specify a set of atoms to be read. e.g.
258         * {"CA", "CB" }. Returns null if all atoms are accepted.
259         * @return accepted atom names, or null if all atoms are accepted. default null
260         */
261        public String[] getAcceptedAtomNames() {
262                return fullAtomNames;
263        }
264
265
266        /**
267         * By default the parser will read in all atoms (unless using the CAonly switch). This allows to specify a set of atoms to be read. e.g.
268         * {"CA", "CB" }. Returns null if all atoms are accepted.
269         * @param accepted atom names, or null if all atoms are accepted. default null
270         */
271
272        public void setAcceptedAtomNames(String[] fullAtomNames) {
273                this.fullAtomNames = fullAtomNames;
274        }
275
276
277        /**
278         * The maximum numbers of atoms to load in a protein structure (prevents memory overflows)
279         *
280         * @return maximum nr of atoms to load, default Integer.MAX_VALUE;
281         */
282        public int getMaxAtoms() {
283                return maxAtoms;
284        }
285
286        /**
287         * The maximum numbers of atoms to load in a protein structure (prevents memory overflows)
288         *
289         * @param maxAtoms maximun nr of atoms to load
290         */
291        public void setMaxAtoms(int maxAtoms) {
292                this.maxAtoms = maxAtoms;
293        }
294
295
296        /**
297         * The maximum number of atoms that will be parsed before the parser switches to a CA-only
298         * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be
299         * ignored.
300         *
301         *
302         * @return atomCaThreshold.
303         */
304        public int getAtomCaThreshold() {
305                return atomCaThreshold;
306        }
307
308
309        /**
310         * The maximum number of atoms that will be parsed before the parser switches to a CA-only
311         * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be
312         * ignored.
313         * @param atomCaThreshold maximum number of atoms for all atom representation.
314         */
315        public void setAtomCaThreshold(int atomCaThreshold) {
316                this.atomCaThreshold = atomCaThreshold;
317        }
318
319
320        /** Should the biological assembly info (REMARK 350) be parsed from the PDB file?
321         *
322         * @return boolean flag yes/no
323         */
324        public boolean isParseBioAssembly() {
325                return parseBioAssembly;
326        }
327
328        /** Should the biological assembly info (REMARK 350) be parsed from the PDB file?
329         *
330         * @param parseBioAssembly  boolean flag yes/no
331         */
332
333        public void setParseBioAssembly(boolean parseBioAssembly) {
334                this.parseBioAssembly = parseBioAssembly;
335        }
336
337        /**
338         * Should we create bonds between atoms when parsing a file?
339         *
340         * @return true if we should create the bonds, false if not
341         */
342        public boolean shouldCreateAtomBonds() {
343                return createAtomBonds;
344        }
345
346        /**
347         * Should we create bonds between atoms when parsing a file.
348         * Will create intra-group bonds from information available in chemical component files and
349         * some other bonds from struc_conn category in mmCIF file.
350         *
351         * @param createAtomBonds
352         *            true if we should create the bonds, false if not
353         * @see BondMaker
354         */
355        public void setCreateAtomBonds(boolean createAtomBonds) {
356                this.createAtomBonds = createAtomBonds;
357        }
358
359        /**
360         * Should we create charges on atoms when parsing a file?
361         *
362         * @return true if we should create the charges, false if not
363         */
364        public boolean shouldCreateAtomCharges() {
365                return createAtomCharges;
366        }
367
368        /**
369         * Should we create charges on atoms when parsing a file?
370         *
371         * @param createAtomCharges
372         *            true if we should create the charges, false if not
373         */
374        public void setCreateAtomCharges(boolean createAtomCharges) {
375                this.createAtomCharges = createAtomCharges;
376        }
377
378        /**
379         * Should we use internal (asym_id) or public facing (author) chain ids
380         * @return
381         * @since 4.2
382         */
383        public boolean isUseInternalChainId() {
384                return useInternalChainId;
385        }
386
387        /**
388         * Set the useInternalChainId parsing mode. This is an experimental
389         * parsing mode that applies only to the mmCIF parser. It will create chains
390         * following the model specified in the mmCIF dictionary where both polymer and
391         * non-polymer entities are assigned separate chains. The chain identifiers
392         * used are the asym_ids specified in mmCIF file. Some BioJava features might not
393         * work properly in this parsing mode.
394         * @param useInternalChainId
395         * @since 4.2
396         */
397        public void setUseInternalChainId(boolean useInternalChainId) {
398                this.useInternalChainId = useInternalChainId;
399        }
400}