001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Jun 16, 2010
021 * Author: ap3
022 *
023 */
024
025package org.biojava.nbio.structure.io;
026
027import java.io.Serializable;
028
029import org.biojava.nbio.structure.AminoAcid;
030
031/**
032 * A class that configures parameters that can be sent to the PDB file parsers
033 *
034 * <ul>
035 * <li> {@link #setParseCAOnly(boolean)} - parse only the Atom records for C-alpha atoms</li>
036 * <li> {@link #setParseSecStruc(boolean)} - a flag if the secondary structure information from the PDB file (author's assignment) should be parsed.
037 *      If true the assignment can be accessed through {@link AminoAcid}.getSecStruc(); </li>
038 * <li> {@link #setAlignSeqRes(boolean)} - should the AminoAcid sequences from the SEQRES
039 *      and ATOM records of a PDB file be aligned? (default:yes)</li>
040 * <li> {@link #setHeaderOnly(boolean)} - parse only the PDB/mmCIF file header, ignoring coordinates
041 * </li>
042 * <li> {@link #setCreateAtomBonds(boolean)} - create atom bonds from parsed bonds in PDB/mmCIF files and chemical component files
043 * </li>
044 * </ul>
045 *
046 * @author Andreas Prlic
047 *
048 */
049public class FileParsingParameters implements Serializable
050{
051
052        private static final long serialVersionUID = 5878292315163939027L;
053
054
055        /**
056         * Flag to detect if the secondary structure info should be read
057         *
058         */
059        private boolean parseSecStruc;
060
061        /**
062         * Flag to control if SEQRES and ATOM records should be aligned
063         */
064        private boolean alignSeqRes;
065
066        /**
067         * Flag to control reading in only Calpha atoms - this is useful for parsing large structures like 1htq.
068         */
069        private boolean parseCAOnly;
070
071        /**
072         * Flag to parse header only
073         */
074        private boolean headerOnly;
075
076        /**
077         * The maximum number of atoms that will be parsed before the parser switches to a CA-only
078         * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be
079         * ignored.
080         */
081        public static final int ATOM_CA_THRESHOLD = 500000;
082
083        private int atomCaThreshold;
084
085
086        /**
087         * Should we parse the biological assembly information from a file?
088         */
089        private boolean parseBioAssembly;
090
091        /**
092         * Should we create bonds between atoms when parsing a file?
093         */
094        private boolean createAtomBonds;
095
096        /**
097         * Should we create charges on atoms when parsing a file?
098         */
099        private boolean createAtomCharges;
100
101        /**
102         * The maximum number of atoms we will add to a structure,
103         * this protects from memory overflows in the few really big protein structures.
104         */
105        public static final int MAX_ATOMS = Integer.MAX_VALUE; // no limit, we don't want to truncate molecules, but the user should make sure there is more memory available
106        //public static final int MAX_ATOMS = 700000; // tested with java -Xmx300M
107
108        int maxAtoms ;
109
110        String[] fullAtomNames;
111
112        public FileParsingParameters(){
113                setDefault();
114        }
115
116        public void setDefault(){
117
118                parseSecStruc = false;
119                // Default is to align / when false the unaligned SEQRES is stored.
120                alignSeqRes   = true;
121                parseCAOnly = false;
122
123                headerOnly = false;
124
125                fullAtomNames = null;
126
127                maxAtoms = MAX_ATOMS;
128
129                atomCaThreshold = ATOM_CA_THRESHOLD;
130
131                parseBioAssembly = false;
132
133                createAtomBonds = false;
134
135                createAtomCharges = true;
136
137        }
138
139        /**
140         * Is secondary structure assignment being parsed from the file?
141         * default is null
142         * @return boolean if HELIX STRAND and TURN fields are being parsed
143         */
144        public boolean isParseSecStruc() {
145                return parseSecStruc;
146        }
147
148        /**
149         * A flag to tell the parser to parse the Author's secondary structure assignment from the file
150         * default is set to false, i.e. do NOT parse.
151         * @param parseSecStruc if HELIX STRAND and TURN fields are being parsed
152         */
153        public void setParseSecStruc(boolean parseSecStruc) {
154                this.parseSecStruc = parseSecStruc;
155        }
156
157        /** Parse only the PDB file header out of the files
158         *
159         * @return flag
160         */
161        public boolean isHeaderOnly()
162        {
163                return headerOnly;
164        }
165
166        /** Parse only the PDB file header out of the files
167         *
168         * @param headerOnly flag
169         */
170        public void setHeaderOnly(boolean headerOnly)
171        {
172                this.headerOnly = headerOnly;
173        }
174
175        /**
176         * The flag if only the C-alpha atoms of the structure should be parsed.
177         *
178         * @return the flag
179         */
180        public boolean isParseCAOnly() {
181                return parseCAOnly;
182        }
183        /**
184         * Flag if only the C-alpha atoms of the structure should be parsed.
185         *
186         * @param parseCAOnly boolean flag to enable or disable C-alpha only parsing
187         */
188        public void setParseCAOnly(boolean parseCAOnly) {
189                this.parseCAOnly = parseCAOnly;
190        }
191
192
193
194        /** Flag if the SEQRES amino acids should be aligned with the ATOM amino acids.
195         *
196         * @return flag if SEQRES - ATOM amino acids alignment is enabled
197         */
198        public boolean isAlignSeqRes() {
199                return alignSeqRes;
200        }
201
202
203
204        /**
205         * Define if the SEQRES in the structure should be aligned with the ATOM records
206         * if yes, the AminoAcids in structure.getSeqRes will have the coordinates set.
207         * @param alignSeqRes
208         */
209        public void setAlignSeqRes(boolean alignSeqRes) {
210                this.alignSeqRes = alignSeqRes;
211        }
212
213        /**
214         * By default the parser will read in all atoms (unless using the CAonly switch). This allows to specify a set of atoms to be read. e.g.
215         * {"CA", "CB" }. Returns null if all atoms are accepted.
216         * @return accepted atom names, or null if all atoms are accepted. default null
217         */
218        public String[] getAcceptedAtomNames() {
219                return fullAtomNames;
220        }
221
222
223        /**
224         * By default the parser will read in all atoms (unless using the CAonly switch). This allows to specify a set of atoms to be read. e.g.
225         * {"CA", "CB" }. Returns null if all atoms are accepted.
226         * @param fullAtomNames accepted atom names, or null if all atoms are accepted. default null
227         */
228
229        public void setAcceptedAtomNames(String[] fullAtomNames) {
230                this.fullAtomNames = fullAtomNames;
231        }
232
233
234        /**
235         * The maximum numbers of atoms to load in a protein structure (prevents memory overflows)
236         *
237         * @return maximum nr of atoms to load, default Integer.MAX_VALUE;
238         */
239        public int getMaxAtoms() {
240                return maxAtoms;
241        }
242
243        /**
244         * The maximum numbers of atoms to load in a protein structure (prevents memory overflows)
245         *
246         * @param maxAtoms maximun nr of atoms to load
247         */
248        public void setMaxAtoms(int maxAtoms) {
249                this.maxAtoms = maxAtoms;
250        }
251
252
253        /**
254         * The maximum number of atoms that will be parsed before the parser switches to a CA-only
255         * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be
256         * ignored.
257         *
258         *
259         * @return atomCaThreshold.
260         */
261        public int getAtomCaThreshold() {
262                return atomCaThreshold;
263        }
264
265
266        /**
267         * The maximum number of atoms that will be parsed before the parser switches to a CA-only
268         * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be
269         * ignored.
270         * @param atomCaThreshold maximum number of atoms for all atom representation.
271         */
272        public void setAtomCaThreshold(int atomCaThreshold) {
273                this.atomCaThreshold = atomCaThreshold;
274        }
275
276
277        /** Should the biological assembly info (REMARK 350) be parsed from the PDB file?
278         *
279         * @return boolean flag yes/no
280         */
281        public boolean isParseBioAssembly() {
282                return parseBioAssembly;
283        }
284
285        /** Should the biological assembly info (REMARK 350) be parsed from the PDB file?
286         *
287         * @param parseBioAssembly  boolean flag yes/no
288         */
289
290        public void setParseBioAssembly(boolean parseBioAssembly) {
291                this.parseBioAssembly = parseBioAssembly;
292        }
293
294        /**
295         * Should we create bonds between atoms when parsing a file?
296         *
297         * @return true if we should create the bonds, false if not
298         */
299        public boolean shouldCreateAtomBonds() {
300                return createAtomBonds;
301        }
302
303        /**
304         * Should we create bonds between atoms when parsing a file.
305         * Will create intra-group bonds from information available in chemical component files and
306         * some other bonds from struc_conn category in mmCIF file.
307         *
308         * @param createAtomBonds
309         *            true if we should create the bonds, false if not
310         * @see BondMaker
311         */
312        public void setCreateAtomBonds(boolean createAtomBonds) {
313                this.createAtomBonds = createAtomBonds;
314        }
315
316        /**
317         * Should we create charges on atoms when parsing a file?
318         *
319         * @return true if we should create the charges, false if not
320         */
321        public boolean shouldCreateAtomCharges() {
322                return createAtomCharges;
323        }
324
325        /**
326         * Should we create charges on atoms when parsing a file?
327         *
328         * @param createAtomCharges
329         *            true if we should create the charges, false if not
330         */
331        public void setCreateAtomCharges(boolean createAtomCharges) {
332                this.createAtomCharges = createAtomCharges;
333        }
334
335
336
337}