001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Jun 16, 2010 021 * Author: ap3 022 * 023 */ 024 025package org.biojava.nbio.structure.io; 026 027import java.io.Serializable; 028 029import org.biojava.nbio.structure.AminoAcid; 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032 033/** 034 * A class that configures parameters that can be sent to the PDB file parsers 035 * 036 * <ul> 037 * <li> {@link #setParseCAOnly(boolean)} - parse only the Atom records for C-alpha atoms</li> 038 * <li> {@link #setParseSecStruc(boolean)} - a flag if the secondary structure information from the PDB file (author's assignment) should be parsed. 039 * If true the assignment can be accessed through {@link AminoAcid}.getSecStruc(); </li> 040 * <li> {@link #setAlignSeqRes(boolean)} - should the AminoAcid sequences from the SEQRES 041 * and ATOM records of a PDB file be aligned? (default:yes)</li> 042 * <li> {@link #setHeaderOnly(boolean)} - parse only the PDB/mmCIF file header, ignoring coordinates 043 * </li> 044 * <li> {@link #setCreateAtomBonds(boolean)} - create atom bonds from parsed bonds in PDB/mmCIF files and chemical component files 045 * </li> 046 * </ul> 047 * 048 * @author Andreas Prlic 049 * 050 */ 051public class FileParsingParameters implements Serializable 052{ 053 054 private static final Logger logger = LoggerFactory.getLogger(FileParsingParameters.class); 055 056 057 private static final long serialVersionUID = 5878292315163939027L; 058 059 060 061 /** 062 * Flag to detect if the secondary structure info should be read 063 * 064 */ 065 private boolean parseSecStruc; 066 067 /** 068 * Flag to control if SEQRES and ATOM records should be aligned 069 */ 070 private boolean alignSeqRes; 071 072 /** 073 * Flag to control reading in only Calpha atoms - this is useful for parsing large structures like 1htq. 074 */ 075 private boolean parseCAOnly; 076 077 /** 078 * Flag to parse header only 079 */ 080 private boolean headerOnly; 081 082 083 /** 084 * Update locally cached files to the latest version of remediated files 085 */ 086 private boolean updateRemediatedFiles; 087 088 /** 089 * The maximum number of atoms that will be parsed before the parser switches to a CA-only 090 * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be 091 * ignored. 092 */ 093 public static final int ATOM_CA_THRESHOLD = 500000; 094 095 private int atomCaThreshold; 096 097 098 /** 099 * Should we parse the biological assembly information from a file? 100 */ 101 private boolean parseBioAssembly; 102 103 /** 104 * Should we create bonds between atoms when parsing a file? 105 */ 106 private boolean createAtomBonds; 107 108 /** 109 * Should we create charges on atoms when parsing a file? 110 */ 111 private boolean createAtomCharges; 112 113 /** 114 * Should we use internal (asym_id) or public facing (author) chain ids 115 */ 116 private boolean useInternalChainId; 117 /** 118 * The maximum number of atoms we will add to a structure, 119 * this protects from memory overflows in the few really big protein structures. 120 */ 121 public static final int MAX_ATOMS = Integer.MAX_VALUE; // no limit, we don't want to truncate molecules, but the user should make sure there is more memory available 122 //public static final int MAX_ATOMS = 700000; // tested with java -Xmx300M 123 124 int maxAtoms ; 125 126 String[] fullAtomNames; 127 128 public FileParsingParameters(){ 129 setDefault(); 130 } 131 132 public void setDefault(){ 133 134 parseSecStruc = false; 135 // Default is to align / when false the unaligned SEQRES is stored. 136 alignSeqRes = true; 137 parseCAOnly = false; 138 139 headerOnly = false; 140 141 updateRemediatedFiles = false; 142 fullAtomNames = null; 143 144 maxAtoms = MAX_ATOMS; 145 146 atomCaThreshold = ATOM_CA_THRESHOLD; 147 148 parseBioAssembly = false; 149 150 createAtomBonds = false; 151 152 createAtomCharges = true; 153 154 useInternalChainId = false; 155 } 156 157 /** 158 * Is secondary structure assignment being parsed from the file? 159 * default is null 160 * @return boolean if HELIX STRAND and TURN fields are being parsed 161 */ 162 public boolean isParseSecStruc() { 163 return parseSecStruc; 164 } 165 166 /** 167 * A flag to tell the parser to parse the Author's secondary structure assignment from the file 168 * default is set to false, i.e. do NOT parse. 169 * @param parseSecStruc if HELIX STRAND and TURN fields are being parsed 170 */ 171 public void setParseSecStruc(boolean parseSecStruc) { 172 this.parseSecStruc = parseSecStruc; 173 } 174 175 /** Parse only the PDB file header out of the files 176 * 177 * @return flag 178 */ 179 public boolean isHeaderOnly() 180 { 181 return headerOnly; 182 } 183 184 /** Parse only the PDB file header out of the files 185 * 186 * @param headerOnly flag 187 */ 188 public void setHeaderOnly(boolean headerOnly) 189 { 190 this.headerOnly = headerOnly; 191 } 192 193 /** 194 * The flag if only the C-alpha atoms of the structure should be parsed. 195 * 196 * @return the flag 197 */ 198 public boolean isParseCAOnly() { 199 return parseCAOnly; 200 } 201 /** 202 * Flag if only the C-alpha atoms of the structure should be parsed. 203 * 204 * @param parseCAOnly boolean flag to enable or disable C-alpha only parsing 205 */ 206 public void setParseCAOnly(boolean parseCAOnly) { 207 this.parseCAOnly = parseCAOnly; 208 } 209 210 211 212 /** Flag if the SEQRES amino acids should be aligned with the ATOM amino acids. 213 * 214 * @return flag if SEQRES - ATOM amino acids alignment is enabled 215 */ 216 public boolean isAlignSeqRes() { 217 return alignSeqRes; 218 } 219 220 221 222 /** 223 * Define if the SEQRES in the structure should be aligned with the ATOM records 224 * if yes, the AminoAcids in structure.getSeqRes will have the coordinates set. 225 * @param alignSeqRes 226 */ 227 public void setAlignSeqRes(boolean alignSeqRes) { 228 this.alignSeqRes = alignSeqRes; 229 } 230 231 /** A flag if local files should be replaced with the latest version of remediated PDB files. Default: false 232 * 233 * @returns updateRemediatedFiles flag 234 * @deprecated Properties which impact downloading and caching behavior 235 * have been moved to the {@link StructureIOFile} implementations. 236 * See {@link LocalPDBDirectory#getFetchBehavior(LocalPDBDirectory.FetchBehavior)} 237 */ 238 @Deprecated 239 public boolean isUpdateRemediatedFiles() { 240 return updateRemediatedFiles; 241 } 242 243 /** A flag if local files should be replaced with the latest version of remediated PDB files. Default: false 244 * 245 * @param updateRemediatedFiles 246 * @deprecated Properties which impact downloading and caching behavior 247 * have been moved to the {@link StructureIOFile} implementations. 248 * See {@link LocalPDBDirectory#setFetchBehavior(LocalPDBDirectory.FetchBehavior)} 249 */ 250 @Deprecated 251 public void setUpdateRemediatedFiles(boolean updateRemediatedFiles) { 252 logger.warn("FileParsingParameters.setUpdateRemediatedFiles() is deprecated, please use LocalPDBDirectory.setFetchBehavior() instead. The option will be removed in upcoming releases"); 253 this.updateRemediatedFiles = updateRemediatedFiles; 254 } 255 256 /** 257 * By default the parser will read in all atoms (unless using the CAonly switch). This allows to specify a set of atoms to be read. e.g. 258 * {"CA", "CB" }. Returns null if all atoms are accepted. 259 * @return accepted atom names, or null if all atoms are accepted. default null 260 */ 261 public String[] getAcceptedAtomNames() { 262 return fullAtomNames; 263 } 264 265 266 /** 267 * By default the parser will read in all atoms (unless using the CAonly switch). This allows to specify a set of atoms to be read. e.g. 268 * {"CA", "CB" }. Returns null if all atoms are accepted. 269 * @param accepted atom names, or null if all atoms are accepted. default null 270 */ 271 272 public void setAcceptedAtomNames(String[] fullAtomNames) { 273 this.fullAtomNames = fullAtomNames; 274 } 275 276 277 /** 278 * The maximum numbers of atoms to load in a protein structure (prevents memory overflows) 279 * 280 * @return maximum nr of atoms to load, default Integer.MAX_VALUE; 281 */ 282 public int getMaxAtoms() { 283 return maxAtoms; 284 } 285 286 /** 287 * The maximum numbers of atoms to load in a protein structure (prevents memory overflows) 288 * 289 * @param maxAtoms maximun nr of atoms to load 290 */ 291 public void setMaxAtoms(int maxAtoms) { 292 this.maxAtoms = maxAtoms; 293 } 294 295 296 /** 297 * The maximum number of atoms that will be parsed before the parser switches to a CA-only 298 * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be 299 * ignored. 300 * 301 * 302 * @return atomCaThreshold. 303 */ 304 public int getAtomCaThreshold() { 305 return atomCaThreshold; 306 } 307 308 309 /** 310 * The maximum number of atoms that will be parsed before the parser switches to a CA-only 311 * representation of the PDB file. If this limit is exceeded also the SEQRES groups will be 312 * ignored. 313 * @param atomCaThreshold maximum number of atoms for all atom representation. 314 */ 315 public void setAtomCaThreshold(int atomCaThreshold) { 316 this.atomCaThreshold = atomCaThreshold; 317 } 318 319 320 /** Should the biological assembly info (REMARK 350) be parsed from the PDB file? 321 * 322 * @return boolean flag yes/no 323 */ 324 public boolean isParseBioAssembly() { 325 return parseBioAssembly; 326 } 327 328 /** Should the biological assembly info (REMARK 350) be parsed from the PDB file? 329 * 330 * @param parseBioAssembly boolean flag yes/no 331 */ 332 333 public void setParseBioAssembly(boolean parseBioAssembly) { 334 this.parseBioAssembly = parseBioAssembly; 335 } 336 337 /** 338 * Should we create bonds between atoms when parsing a file? 339 * 340 * @return true if we should create the bonds, false if not 341 */ 342 public boolean shouldCreateAtomBonds() { 343 return createAtomBonds; 344 } 345 346 /** 347 * Should we create bonds between atoms when parsing a file. 348 * Will create intra-group bonds from information available in chemical component files and 349 * some other bonds from struc_conn category in mmCIF file. 350 * 351 * @param createAtomBonds 352 * true if we should create the bonds, false if not 353 * @see BondMaker 354 */ 355 public void setCreateAtomBonds(boolean createAtomBonds) { 356 this.createAtomBonds = createAtomBonds; 357 } 358 359 /** 360 * Should we create charges on atoms when parsing a file? 361 * 362 * @return true if we should create the charges, false if not 363 */ 364 public boolean shouldCreateAtomCharges() { 365 return createAtomCharges; 366 } 367 368 /** 369 * Should we create charges on atoms when parsing a file? 370 * 371 * @param createAtomCharges 372 * true if we should create the charges, false if not 373 */ 374 public void setCreateAtomCharges(boolean createAtomCharges) { 375 this.createAtomCharges = createAtomCharges; 376 } 377 378 /** 379 * Should we use internal (asym_id) or public facing (author) chain ids 380 * @return 381 * @since 4.2 382 */ 383 public boolean isUseInternalChainId() { 384 return useInternalChainId; 385 } 386 387 /** 388 * Set the useInternalChainId parsing mode. This is an experimental 389 * parsing mode that applies only to the mmCIF parser. It will create chains 390 * following the model specified in the mmCIF dictionary where both polymer and 391 * non-polymer entities are assigned separate chains. The chain identifiers 392 * used are the asym_ids specified in mmCIF file. Some BioJava features might not 393 * work properly in this parsing mode. 394 * @param useInternalChainId 395 * @since 4.2 396 */ 397 public void setUseInternalChainId(boolean useInternalChainId) { 398 this.useInternalChainId = useInternalChainId; 399 } 400}