001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Jun 16, 2010 021 * Author: ap3 022 * 023 */ 024 025package org.biojava.nbio.structure.io; 026 027import java.io.Serializable; 028 029import org.biojava.nbio.structure.AminoAcid; 030 031/** 032 * A class that configures parameters that can be sent to the PDB file parsers 033 * 034 * <ul> 035 * <li> {@link #setParseCAOnly(boolean)} - parse only the Atom records for C-alpha atoms</li> 036 * <li> {@link #setParseSecStruc(boolean)} - a flag if the secondary structure information from the PDB file (author's assignment) should be parsed. 037 * If true the assignment can be accessed through {@link AminoAcid}.getSecStruc(); </li> 038 * <li> {@link #setAlignSeqRes(boolean)} - should the AminoAcid sequences from the SEQRES 039 * and ATOM records of a PDB file be aligned? (default:yes)</li> 040 * <li> {@link #setHeaderOnly(boolean)} - parse only the PDB/mmCIF file header, ignoring coordinates 041 * </li> 042 * <li> {@link #setCreateAtomBonds(boolean)} - create atom bonds from parsed bonds in PDB/mmCIF files and chemical component files 043 * </li> 044 * </ul> 045 * 046 * @author Andreas Prlic 047 * 048 */ 049public class FileParsingParameters implements Serializable 050{ 051 052 private static final long serialVersionUID = 5878292315163939027L; 053 054 055 056 /** 057 * Flag to detect if the secondary structure info should be read 058 * 059 */ 060 private boolean parseSecStruc; 061 062 /** 063 * Flag to control if SEQRES and ATOM records should be aligned 064 */ 065 private boolean alignSeqRes; 066 067 /** 068 * Flag to control reading in only Calpha atoms - this is useful for parsing large structures like 1htq. 069 */ 070 private boolean parseCAOnly; 071 072 /** 073 * Flag to parse header only 074 */ 075 private boolean headerOnly; 076 077 /** 078 * The maximum number of atoms that will be parsed before the parser switches to a CA-only 079 * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be 080 * ignored. 081 */ 082 public static final int ATOM_CA_THRESHOLD = 500000; 083 084 private int atomCaThreshold; 085 086 087 /** 088 * Should we parse the biological assembly information from a file? 089 */ 090 private boolean parseBioAssembly; 091 092 /** 093 * Should we create bonds between atoms when parsing a file? 094 */ 095 private boolean createAtomBonds; 096 097 /** 098 * Should we create charges on atoms when parsing a file? 099 */ 100 private boolean createAtomCharges; 101 102 /** 103 * The maximum number of atoms we will add to a structure, 104 * this protects from memory overflows in the few really big protein structures. 105 */ 106 public static final int MAX_ATOMS = Integer.MAX_VALUE; // no limit, we don't want to truncate molecules, but the user should make sure there is more memory available 107 //public static final int MAX_ATOMS = 700000; // tested with java -Xmx300M 108 109 int maxAtoms ; 110 111 String[] fullAtomNames; 112 113 public FileParsingParameters(){ 114 setDefault(); 115 } 116 117 public void setDefault(){ 118 119 parseSecStruc = false; 120 // Default is to align / when false the unaligned SEQRES is stored. 121 alignSeqRes = true; 122 parseCAOnly = false; 123 124 headerOnly = false; 125 126 fullAtomNames = null; 127 128 maxAtoms = MAX_ATOMS; 129 130 atomCaThreshold = ATOM_CA_THRESHOLD; 131 132 parseBioAssembly = false; 133 134 createAtomBonds = false; 135 136 createAtomCharges = true; 137 138 } 139 140 /** 141 * Is secondary structure assignment being parsed from the file? 142 * default is null 143 * @return boolean if HELIX STRAND and TURN fields are being parsed 144 */ 145 public boolean isParseSecStruc() { 146 return parseSecStruc; 147 } 148 149 /** 150 * A flag to tell the parser to parse the Author's secondary structure assignment from the file 151 * default is set to false, i.e. do NOT parse. 152 * @param parseSecStruc if HELIX STRAND and TURN fields are being parsed 153 */ 154 public void setParseSecStruc(boolean parseSecStruc) { 155 this.parseSecStruc = parseSecStruc; 156 } 157 158 /** Parse only the PDB file header out of the files 159 * 160 * @return flag 161 */ 162 public boolean isHeaderOnly() 163 { 164 return headerOnly; 165 } 166 167 /** Parse only the PDB file header out of the files 168 * 169 * @param headerOnly flag 170 */ 171 public void setHeaderOnly(boolean headerOnly) 172 { 173 this.headerOnly = headerOnly; 174 } 175 176 /** 177 * The flag if only the C-alpha atoms of the structure should be parsed. 178 * 179 * @return the flag 180 */ 181 public boolean isParseCAOnly() { 182 return parseCAOnly; 183 } 184 /** 185 * Flag if only the C-alpha atoms of the structure should be parsed. 186 * 187 * @param parseCAOnly boolean flag to enable or disable C-alpha only parsing 188 */ 189 public void setParseCAOnly(boolean parseCAOnly) { 190 this.parseCAOnly = parseCAOnly; 191 } 192 193 194 195 /** Flag if the SEQRES amino acids should be aligned with the ATOM amino acids. 196 * 197 * @return flag if SEQRES - ATOM amino acids alignment is enabled 198 */ 199 public boolean isAlignSeqRes() { 200 return alignSeqRes; 201 } 202 203 204 205 /** 206 * Define if the SEQRES in the structure should be aligned with the ATOM records 207 * if yes, the AminoAcids in structure.getSeqRes will have the coordinates set. 208 * @param alignSeqRes 209 */ 210 public void setAlignSeqRes(boolean alignSeqRes) { 211 this.alignSeqRes = alignSeqRes; 212 } 213 214 /** 215 * By default the parser will read in all atoms (unless using the CAonly switch). This allows to specify a set of atoms to be read. e.g. 216 * {"CA", "CB" }. Returns null if all atoms are accepted. 217 * @return accepted atom names, or null if all atoms are accepted. default null 218 */ 219 public String[] getAcceptedAtomNames() { 220 return fullAtomNames; 221 } 222 223 224 /** 225 * By default the parser will read in all atoms (unless using the CAonly switch). This allows to specify a set of atoms to be read. e.g. 226 * {"CA", "CB" }. Returns null if all atoms are accepted. 227 * @param fullAtomNames accepted atom names, or null if all atoms are accepted. default null 228 */ 229 230 public void setAcceptedAtomNames(String[] fullAtomNames) { 231 this.fullAtomNames = fullAtomNames; 232 } 233 234 235 /** 236 * The maximum numbers of atoms to load in a protein structure (prevents memory overflows) 237 * 238 * @return maximum nr of atoms to load, default Integer.MAX_VALUE; 239 */ 240 public int getMaxAtoms() { 241 return maxAtoms; 242 } 243 244 /** 245 * The maximum numbers of atoms to load in a protein structure (prevents memory overflows) 246 * 247 * @param maxAtoms maximun nr of atoms to load 248 */ 249 public void setMaxAtoms(int maxAtoms) { 250 this.maxAtoms = maxAtoms; 251 } 252 253 254 /** 255 * The maximum number of atoms that will be parsed before the parser switches to a CA-only 256 * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be 257 * ignored. 258 * 259 * 260 * @return atomCaThreshold. 261 */ 262 public int getAtomCaThreshold() { 263 return atomCaThreshold; 264 } 265 266 267 /** 268 * The maximum number of atoms that will be parsed before the parser switches to a CA-only 269 * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be 270 * ignored. 271 * @param atomCaThreshold maximum number of atoms for all atom representation. 272 */ 273 public void setAtomCaThreshold(int atomCaThreshold) { 274 this.atomCaThreshold = atomCaThreshold; 275 } 276 277 278 /** Should the biological assembly info (REMARK 350) be parsed from the PDB file? 279 * 280 * @return boolean flag yes/no 281 */ 282 public boolean isParseBioAssembly() { 283 return parseBioAssembly; 284 } 285 286 /** Should the biological assembly info (REMARK 350) be parsed from the PDB file? 287 * 288 * @param parseBioAssembly boolean flag yes/no 289 */ 290 291 public void setParseBioAssembly(boolean parseBioAssembly) { 292 this.parseBioAssembly = parseBioAssembly; 293 } 294 295 /** 296 * Should we create bonds between atoms when parsing a file? 297 * 298 * @return true if we should create the bonds, false if not 299 */ 300 public boolean shouldCreateAtomBonds() { 301 return createAtomBonds; 302 } 303 304 /** 305 * Should we create bonds between atoms when parsing a file. 306 * Will create intra-group bonds from information available in chemical component files and 307 * some other bonds from struc_conn category in mmCIF file. 308 * 309 * @param createAtomBonds 310 * true if we should create the bonds, false if not 311 * @see BondMaker 312 */ 313 public void setCreateAtomBonds(boolean createAtomBonds) { 314 this.createAtomBonds = createAtomBonds; 315 } 316 317 /** 318 * Should we create charges on atoms when parsing a file? 319 * 320 * @return true if we should create the charges, false if not 321 */ 322 public boolean shouldCreateAtomCharges() { 323 return createAtomCharges; 324 } 325 326 /** 327 * Should we create charges on atoms when parsing a file? 328 * 329 * @param createAtomCharges 330 * true if we should create the charges, false if not 331 */ 332 public void setCreateAtomCharges(boolean createAtomCharges) { 333 this.createAtomCharges = createAtomCharges; 334 } 335 336 337 338}