001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Jun 16, 2010
021 * Author: ap3
022 *
023 */
024
025package org.biojava.nbio.structure.io;
026
027import java.io.Serializable;
028
029import org.biojava.nbio.structure.AminoAcid;
030
031/**
032 * A class that configures parameters that can be sent to the PDB file parsers
033 *
034 * <ul>
035 * <li> {@link #setParseCAOnly(boolean)} - parse only the Atom records for C-alpha atoms</li>
036 * <li> {@link #setParseSecStruc(boolean)} - a flag if the secondary structure information from the PDB file (author's assignment) should be parsed.
037 *      If true the assignment can be accessed through {@link AminoAcid}.getSecStruc(); </li>
038 * <li> {@link #setAlignSeqRes(boolean)} - should the AminoAcid sequences from the SEQRES
039 *      and ATOM records of a PDB file be aligned? (default:yes)</li>
040 * <li> {@link #setHeaderOnly(boolean)} - parse only the PDB/mmCIF file header, ignoring coordinates
041 * </li>
042 * <li> {@link #setCreateAtomBonds(boolean)} - create atom bonds from parsed bonds in PDB/mmCIF files and chemical component files
043 * </li>
044 * </ul>
045 *
046 * @author Andreas Prlic
047 *
048 */
049public class FileParsingParameters implements Serializable
050{
051
052        private static final long serialVersionUID = 5878292315163939027L;
053
054
055
056        /**
057         * Flag to detect if the secondary structure info should be read
058         *
059         */
060        private boolean parseSecStruc;
061
062        /**
063         * Flag to control if SEQRES and ATOM records should be aligned
064         */
065        private boolean alignSeqRes;
066
067        /**
068         * Flag to control reading in only Calpha atoms - this is useful for parsing large structures like 1htq.
069         */
070        private boolean parseCAOnly;
071
072        /**
073         * Flag to parse header only
074         */
075        private boolean headerOnly;
076
077        /**
078         * The maximum number of atoms that will be parsed before the parser switches to a CA-only
079         * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be
080         * ignored.
081         */
082        public static final int ATOM_CA_THRESHOLD = 500000;
083
084        private int atomCaThreshold;
085
086
087        /**
088         * Should we parse the biological assembly information from a file?
089         */
090        private boolean parseBioAssembly;
091
092        /**
093         * Should we create bonds between atoms when parsing a file?
094         */
095        private boolean createAtomBonds;
096
097        /**
098         * Should we create charges on atoms when parsing a file?
099         */
100        private boolean createAtomCharges;
101
102        /**
103         * The maximum number of atoms we will add to a structure,
104         * this protects from memory overflows in the few really big protein structures.
105         */
106        public static final int MAX_ATOMS = Integer.MAX_VALUE; // no limit, we don't want to truncate molecules, but the user should make sure there is more memory available
107        //public static final int MAX_ATOMS = 700000; // tested with java -Xmx300M
108
109        int maxAtoms ;
110
111        String[] fullAtomNames;
112
113        public FileParsingParameters(){
114                setDefault();
115        }
116
117        public void setDefault(){
118
119                parseSecStruc = false;
120                // Default is to align / when false the unaligned SEQRES is stored.
121                alignSeqRes   = true;
122                parseCAOnly = false;
123
124                headerOnly = false;
125
126                fullAtomNames = null;
127
128                maxAtoms = MAX_ATOMS;
129
130                atomCaThreshold = ATOM_CA_THRESHOLD;
131
132                parseBioAssembly = false;
133
134                createAtomBonds = false;
135
136                createAtomCharges = true;
137
138        }
139
140        /**
141         * Is secondary structure assignment being parsed from the file?
142         * default is null
143         * @return boolean if HELIX STRAND and TURN fields are being parsed
144         */
145        public boolean isParseSecStruc() {
146                return parseSecStruc;
147        }
148
149        /**
150         * A flag to tell the parser to parse the Author's secondary structure assignment from the file
151         * default is set to false, i.e. do NOT parse.
152         * @param parseSecStruc if HELIX STRAND and TURN fields are being parsed
153         */
154        public void setParseSecStruc(boolean parseSecStruc) {
155                this.parseSecStruc = parseSecStruc;
156        }
157
158        /** Parse only the PDB file header out of the files
159         *
160         * @return flag
161         */
162        public boolean isHeaderOnly()
163        {
164                return headerOnly;
165        }
166
167        /** Parse only the PDB file header out of the files
168         *
169         * @param headerOnly flag
170         */
171        public void setHeaderOnly(boolean headerOnly)
172        {
173                this.headerOnly = headerOnly;
174        }
175
176        /**
177         * The flag if only the C-alpha atoms of the structure should be parsed.
178         *
179         * @return the flag
180         */
181        public boolean isParseCAOnly() {
182                return parseCAOnly;
183        }
184        /**
185         * Flag if only the C-alpha atoms of the structure should be parsed.
186         *
187         * @param parseCAOnly boolean flag to enable or disable C-alpha only parsing
188         */
189        public void setParseCAOnly(boolean parseCAOnly) {
190                this.parseCAOnly = parseCAOnly;
191        }
192
193
194
195        /** Flag if the SEQRES amino acids should be aligned with the ATOM amino acids.
196         *
197         * @return flag if SEQRES - ATOM amino acids alignment is enabled
198         */
199        public boolean isAlignSeqRes() {
200                return alignSeqRes;
201        }
202
203
204
205        /**
206         * Define if the SEQRES in the structure should be aligned with the ATOM records
207         * if yes, the AminoAcids in structure.getSeqRes will have the coordinates set.
208         * @param alignSeqRes
209         */
210        public void setAlignSeqRes(boolean alignSeqRes) {
211                this.alignSeqRes = alignSeqRes;
212        }
213
214        /**
215         * By default the parser will read in all atoms (unless using the CAonly switch). This allows to specify a set of atoms to be read. e.g.
216         * {"CA", "CB" }. Returns null if all atoms are accepted.
217         * @return accepted atom names, or null if all atoms are accepted. default null
218         */
219        public String[] getAcceptedAtomNames() {
220                return fullAtomNames;
221        }
222
223
224        /**
225         * By default the parser will read in all atoms (unless using the CAonly switch). This allows to specify a set of atoms to be read. e.g.
226         * {"CA", "CB" }. Returns null if all atoms are accepted.
227         * @param fullAtomNames accepted atom names, or null if all atoms are accepted. default null
228         */
229
230        public void setAcceptedAtomNames(String[] fullAtomNames) {
231                this.fullAtomNames = fullAtomNames;
232        }
233
234
235        /**
236         * The maximum numbers of atoms to load in a protein structure (prevents memory overflows)
237         *
238         * @return maximum nr of atoms to load, default Integer.MAX_VALUE;
239         */
240        public int getMaxAtoms() {
241                return maxAtoms;
242        }
243
244        /**
245         * The maximum numbers of atoms to load in a protein structure (prevents memory overflows)
246         *
247         * @param maxAtoms maximun nr of atoms to load
248         */
249        public void setMaxAtoms(int maxAtoms) {
250                this.maxAtoms = maxAtoms;
251        }
252
253
254        /**
255         * The maximum number of atoms that will be parsed before the parser switches to a CA-only
256         * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be
257         * ignored.
258         *
259         *
260         * @return atomCaThreshold.
261         */
262        public int getAtomCaThreshold() {
263                return atomCaThreshold;
264        }
265
266
267        /**
268         * The maximum number of atoms that will be parsed before the parser switches to a CA-only
269         * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be
270         * ignored.
271         * @param atomCaThreshold maximum number of atoms for all atom representation.
272         */
273        public void setAtomCaThreshold(int atomCaThreshold) {
274                this.atomCaThreshold = atomCaThreshold;
275        }
276
277
278        /** Should the biological assembly info (REMARK 350) be parsed from the PDB file?
279         *
280         * @return boolean flag yes/no
281         */
282        public boolean isParseBioAssembly() {
283                return parseBioAssembly;
284        }
285
286        /** Should the biological assembly info (REMARK 350) be parsed from the PDB file?
287         *
288         * @param parseBioAssembly  boolean flag yes/no
289         */
290
291        public void setParseBioAssembly(boolean parseBioAssembly) {
292                this.parseBioAssembly = parseBioAssembly;
293        }
294
295        /**
296         * Should we create bonds between atoms when parsing a file?
297         *
298         * @return true if we should create the bonds, false if not
299         */
300        public boolean shouldCreateAtomBonds() {
301                return createAtomBonds;
302        }
303
304        /**
305         * Should we create bonds between atoms when parsing a file.
306         * Will create intra-group bonds from information available in chemical component files and
307         * some other bonds from struc_conn category in mmCIF file.
308         *
309         * @param createAtomBonds
310         *            true if we should create the bonds, false if not
311         * @see BondMaker
312         */
313        public void setCreateAtomBonds(boolean createAtomBonds) {
314                this.createAtomBonds = createAtomBonds;
315        }
316
317        /**
318         * Should we create charges on atoms when parsing a file?
319         *
320         * @return true if we should create the charges, false if not
321         */
322        public boolean shouldCreateAtomCharges() {
323                return createAtomCharges;
324        }
325
326        /**
327         * Should we create charges on atoms when parsing a file?
328         *
329         * @param createAtomCharges
330         *            true if we should create the charges, false if not
331         */
332        public void setCreateAtomCharges(boolean createAtomCharges) {
333                this.createAtomCharges = createAtomCharges;
334        }
335
336
337
338}