001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Jun 16, 2010 021 * Author: ap3 022 * 023 */ 024 025package org.biojava.nbio.structure.io; 026 027import java.io.Serializable; 028 029import org.biojava.nbio.structure.AminoAcid; 030 031/** 032 * A class that configures parameters that can be sent to the PDB file parsers 033 * 034 * <ul> 035 * <li> {@link #setParseCAOnly(boolean)} - parse only the Atom records for C-alpha atoms</li> 036 * <li> {@link #setParseSecStruc(boolean)} - a flag if the secondary structure information from the PDB file (author's assignment) should be parsed. 037 * If true the assignment can be accessed through {@link AminoAcid}.getSecStruc(); </li> 038 * <li> {@link #setAlignSeqRes(boolean)} - should the AminoAcid sequences from the SEQRES 039 * and ATOM records of a PDB file be aligned? (default:yes)</li> 040 * <li> {@link #setHeaderOnly(boolean)} - parse only the PDB/mmCIF file header, ignoring coordinates 041 * </li> 042 * <li> {@link #setCreateAtomBonds(boolean)} - create atom bonds from parsed bonds in PDB/mmCIF files and chemical component files 043 * </li> 044 * </ul> 045 * 046 * @author Andreas Prlic 047 * 048 */ 049public class FileParsingParameters implements Serializable 050{ 051 052 private static final long serialVersionUID = 5878292315163939027L; 053 054 055 /** 056 * Flag to detect if the secondary structure info should be read 057 * 058 */ 059 private boolean parseSecStruc; 060 061 /** 062 * Flag to control if SEQRES and ATOM records should be aligned 063 */ 064 private boolean alignSeqRes; 065 066 /** 067 * Flag to control reading in only Calpha atoms - this is useful for parsing large structures like 1htq. 068 */ 069 private boolean parseCAOnly; 070 071 /** 072 * Flag to parse header only 073 */ 074 private boolean headerOnly; 075 076 /** 077 * The maximum number of atoms that will be parsed before the parser switches to a CA-only 078 * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be 079 * ignored. 080 */ 081 public static final int ATOM_CA_THRESHOLD = 500000; 082 083 private int atomCaThreshold; 084 085 086 /** 087 * Should we parse the biological assembly information from a file? 088 */ 089 private boolean parseBioAssembly; 090 091 /** 092 * Should we create bonds between atoms when parsing a file? 093 */ 094 private boolean createAtomBonds; 095 096 /** 097 * Should we create charges on atoms when parsing a file? 098 */ 099 private boolean createAtomCharges; 100 101 /** 102 * The maximum number of atoms we will add to a structure, 103 * this protects from memory overflows in the few really big protein structures. 104 */ 105 public static final int MAX_ATOMS = Integer.MAX_VALUE; // no limit, we don't want to truncate molecules, but the user should make sure there is more memory available 106 //public static final int MAX_ATOMS = 700000; // tested with java -Xmx300M 107 108 int maxAtoms ; 109 110 String[] fullAtomNames; 111 112 public FileParsingParameters(){ 113 setDefault(); 114 } 115 116 public void setDefault(){ 117 118 parseSecStruc = false; 119 // Default is to align / when false the unaligned SEQRES is stored. 120 alignSeqRes = true; 121 parseCAOnly = false; 122 123 headerOnly = false; 124 125 fullAtomNames = null; 126 127 maxAtoms = MAX_ATOMS; 128 129 atomCaThreshold = ATOM_CA_THRESHOLD; 130 131 parseBioAssembly = false; 132 133 createAtomBonds = false; 134 135 createAtomCharges = true; 136 137 } 138 139 /** 140 * Is secondary structure assignment being parsed from the file? 141 * default is null 142 * @return boolean if HELIX STRAND and TURN fields are being parsed 143 */ 144 public boolean isParseSecStruc() { 145 return parseSecStruc; 146 } 147 148 /** 149 * A flag to tell the parser to parse the Author's secondary structure assignment from the file 150 * default is set to false, i.e. do NOT parse. 151 * @param parseSecStruc if HELIX STRAND and TURN fields are being parsed 152 */ 153 public void setParseSecStruc(boolean parseSecStruc) { 154 this.parseSecStruc = parseSecStruc; 155 } 156 157 /** Parse only the PDB file header out of the files 158 * 159 * @return flag 160 */ 161 public boolean isHeaderOnly() 162 { 163 return headerOnly; 164 } 165 166 /** Parse only the PDB file header out of the files 167 * 168 * @param headerOnly flag 169 */ 170 public void setHeaderOnly(boolean headerOnly) 171 { 172 this.headerOnly = headerOnly; 173 } 174 175 /** 176 * The flag if only the C-alpha atoms of the structure should be parsed. 177 * 178 * @return the flag 179 */ 180 public boolean isParseCAOnly() { 181 return parseCAOnly; 182 } 183 /** 184 * Flag if only the C-alpha atoms of the structure should be parsed. 185 * 186 * @param parseCAOnly boolean flag to enable or disable C-alpha only parsing 187 */ 188 public void setParseCAOnly(boolean parseCAOnly) { 189 this.parseCAOnly = parseCAOnly; 190 } 191 192 193 194 /** Flag if the SEQRES amino acids should be aligned with the ATOM amino acids. 195 * 196 * @return flag if SEQRES - ATOM amino acids alignment is enabled 197 */ 198 public boolean isAlignSeqRes() { 199 return alignSeqRes; 200 } 201 202 203 204 /** 205 * Define if the SEQRES in the structure should be aligned with the ATOM records 206 * if yes, the AminoAcids in structure.getSeqRes will have the coordinates set. 207 * @param alignSeqRes 208 */ 209 public void setAlignSeqRes(boolean alignSeqRes) { 210 this.alignSeqRes = alignSeqRes; 211 } 212 213 /** 214 * By default the parser will read in all atoms (unless using the CAonly switch). This allows to specify a set of atoms to be read. e.g. 215 * {"CA", "CB" }. Returns null if all atoms are accepted. 216 * @return accepted atom names, or null if all atoms are accepted. default null 217 */ 218 public String[] getAcceptedAtomNames() { 219 return fullAtomNames; 220 } 221 222 223 /** 224 * By default the parser will read in all atoms (unless using the CAonly switch). This allows to specify a set of atoms to be read. e.g. 225 * {"CA", "CB" }. Returns null if all atoms are accepted. 226 * @param fullAtomNames accepted atom names, or null if all atoms are accepted. default null 227 */ 228 229 public void setAcceptedAtomNames(String[] fullAtomNames) { 230 this.fullAtomNames = fullAtomNames; 231 } 232 233 234 /** 235 * The maximum numbers of atoms to load in a protein structure (prevents memory overflows) 236 * 237 * @return maximum nr of atoms to load, default Integer.MAX_VALUE; 238 */ 239 public int getMaxAtoms() { 240 return maxAtoms; 241 } 242 243 /** 244 * The maximum numbers of atoms to load in a protein structure (prevents memory overflows) 245 * 246 * @param maxAtoms maximun nr of atoms to load 247 */ 248 public void setMaxAtoms(int maxAtoms) { 249 this.maxAtoms = maxAtoms; 250 } 251 252 253 /** 254 * The maximum number of atoms that will be parsed before the parser switches to a CA-only 255 * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be 256 * ignored. 257 * 258 * 259 * @return atomCaThreshold. 260 */ 261 public int getAtomCaThreshold() { 262 return atomCaThreshold; 263 } 264 265 266 /** 267 * The maximum number of atoms that will be parsed before the parser switches to a CA-only 268 * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be 269 * ignored. 270 * @param atomCaThreshold maximum number of atoms for all atom representation. 271 */ 272 public void setAtomCaThreshold(int atomCaThreshold) { 273 this.atomCaThreshold = atomCaThreshold; 274 } 275 276 277 /** Should the biological assembly info (REMARK 350) be parsed from the PDB file? 278 * 279 * @return boolean flag yes/no 280 */ 281 public boolean isParseBioAssembly() { 282 return parseBioAssembly; 283 } 284 285 /** Should the biological assembly info (REMARK 350) be parsed from the PDB file? 286 * 287 * @param parseBioAssembly boolean flag yes/no 288 */ 289 290 public void setParseBioAssembly(boolean parseBioAssembly) { 291 this.parseBioAssembly = parseBioAssembly; 292 } 293 294 /** 295 * Should we create bonds between atoms when parsing a file? 296 * 297 * @return true if we should create the bonds, false if not 298 */ 299 public boolean shouldCreateAtomBonds() { 300 return createAtomBonds; 301 } 302 303 /** 304 * Should we create bonds between atoms when parsing a file. 305 * Will create intra-group bonds from information available in chemical component files and 306 * some other bonds from struc_conn category in mmCIF file. 307 * 308 * @param createAtomBonds 309 * true if we should create the bonds, false if not 310 * @see BondMaker 311 */ 312 public void setCreateAtomBonds(boolean createAtomBonds) { 313 this.createAtomBonds = createAtomBonds; 314 } 315 316 /** 317 * Should we create charges on atoms when parsing a file? 318 * 319 * @return true if we should create the charges, false if not 320 */ 321 public boolean shouldCreateAtomCharges() { 322 return createAtomCharges; 323 } 324 325 /** 326 * Should we create charges on atoms when parsing a file? 327 * 328 * @param createAtomCharges 329 * true if we should create the charges, false if not 330 */ 331 public void setCreateAtomCharges(boolean createAtomCharges) { 332 this.createAtomCharges = createAtomCharges; 333 } 334 335 336 337}