001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * created at Apr 26, 2008 021 */ 022package org.biojava.nbio.structure.io.mmcif; 023 024import java.text.ParseException; 025import java.text.SimpleDateFormat; 026import java.util.ArrayList; 027import java.util.Date; 028import java.util.HashMap; 029import java.util.List; 030import java.util.Locale; 031import java.util.Map; 032 033import javax.vecmath.Matrix4d; 034 035import org.biojava.nbio.structure.AminoAcid; 036import org.biojava.nbio.structure.AminoAcidImpl; 037import org.biojava.nbio.structure.Atom; 038import org.biojava.nbio.structure.AtomImpl; 039import org.biojava.nbio.structure.Chain; 040import org.biojava.nbio.structure.ChainImpl; 041import org.biojava.nbio.structure.Compound; 042import org.biojava.nbio.structure.DBRef; 043import org.biojava.nbio.structure.Element; 044import org.biojava.nbio.structure.Group; 045import org.biojava.nbio.structure.GroupType; 046import org.biojava.nbio.structure.HetatomImpl; 047import org.biojava.nbio.structure.NucleotideImpl; 048import org.biojava.nbio.structure.PDBCrystallographicInfo; 049import org.biojava.nbio.structure.PDBHeader; 050import org.biojava.nbio.structure.ResidueNumber; 051import org.biojava.nbio.structure.SeqMisMatch; 052import org.biojava.nbio.structure.SeqMisMatchImpl; 053import org.biojava.nbio.structure.Site; 054import org.biojava.nbio.structure.Structure; 055import org.biojava.nbio.structure.StructureException; 056import org.biojava.nbio.structure.StructureImpl; 057import org.biojava.nbio.structure.StructureTools; 058import org.biojava.nbio.structure.io.BondMaker; 059import org.biojava.nbio.structure.io.ChargeAdder; 060import org.biojava.nbio.structure.io.FileParsingParameters; 061import org.biojava.nbio.structure.io.SeqRes2AtomAligner; 062import org.biojava.nbio.structure.io.mmcif.model.AtomSite; 063import org.biojava.nbio.structure.io.mmcif.model.AtomSites; 064import org.biojava.nbio.structure.io.mmcif.model.AuditAuthor; 065import org.biojava.nbio.structure.io.mmcif.model.Cell; 066import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 067import org.biojava.nbio.structure.io.mmcif.model.ChemCompAtom; 068import org.biojava.nbio.structure.io.mmcif.model.ChemCompBond; 069import org.biojava.nbio.structure.io.mmcif.model.ChemCompDescriptor; 070import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBremark; 071import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBrev; 072import org.biojava.nbio.structure.io.mmcif.model.DatabasePdbrevRecord; 073import org.biojava.nbio.structure.io.mmcif.model.Entity; 074import org.biojava.nbio.structure.io.mmcif.model.EntityPolySeq; 075import org.biojava.nbio.structure.io.mmcif.model.EntitySrcGen; 076import org.biojava.nbio.structure.io.mmcif.model.EntitySrcNat; 077import org.biojava.nbio.structure.io.mmcif.model.EntitySrcSyn; 078import org.biojava.nbio.structure.io.mmcif.model.Exptl; 079import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompDescriptor; 080import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompIdentifier; 081import org.biojava.nbio.structure.io.mmcif.model.PdbxEntityNonPoly; 082import org.biojava.nbio.structure.io.mmcif.model.PdbxNonPolyScheme; 083import org.biojava.nbio.structure.io.mmcif.model.PdbxPolySeqScheme; 084import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssembly; 085import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssemblyGen; 086import org.biojava.nbio.structure.io.mmcif.model.PdbxStructOperList; 087import org.biojava.nbio.structure.io.mmcif.model.Refine; 088import org.biojava.nbio.structure.io.mmcif.model.Struct; 089import org.biojava.nbio.structure.io.mmcif.model.StructAsym; 090import org.biojava.nbio.structure.io.mmcif.model.StructConn; 091import org.biojava.nbio.structure.io.mmcif.model.StructKeywords; 092import org.biojava.nbio.structure.io.mmcif.model.StructNcsOper; 093import org.biojava.nbio.structure.io.mmcif.model.StructRef; 094import org.biojava.nbio.structure.io.mmcif.model.StructRefSeq; 095import org.biojava.nbio.structure.io.mmcif.model.StructRefSeqDif; 096import org.biojava.nbio.structure.io.mmcif.model.StructSite; 097import org.biojava.nbio.structure.io.mmcif.model.StructSiteGen; 098import org.biojava.nbio.structure.io.mmcif.model.Symmetry; 099import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; 100import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder; 101import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 102import org.biojava.nbio.structure.xtal.CrystalCell; 103import org.biojava.nbio.structure.xtal.SpaceGroup; 104import org.biojava.nbio.structure.xtal.SymoplibParser; 105import org.slf4j.Logger; 106import org.slf4j.LoggerFactory; 107 108/** 109 * A MMcifConsumer implementation that builds an in-memory representation of the 110 * content of a mmcif file as a BioJava Structure object. 111 * 112 * @author Andreas Prlic 113 * @since 1.7 114 */ 115 116public class SimpleMMcifConsumer implements MMcifConsumer { 117 118 private static final Logger logger = LoggerFactory.getLogger(SimpleMMcifConsumer.class); 119 120 private Structure structure; 121 private Chain current_chain; 122 private Group current_group; 123 124 125 private List<Chain> current_model; 126 private List<Entity> entities; 127 private List<StructRef> strucRefs; 128 private List<Chain> seqResChains; 129 private List<Chain> entityChains; // needed to link entities, chains and compounds... 130 private List<StructAsym> structAsyms; // needed to link entities, chains and compounds... 131 private List<PdbxStructOperList> structOpers ; // 132 private List<PdbxStructAssembly> strucAssemblies; 133 private List<PdbxStructAssemblyGen> strucAssemblyGens; 134 private List<EntitySrcGen> entitySrcGens; 135 private List<EntitySrcNat> entitySrcNats; 136 private List<EntitySrcSyn> entitySrcSyns; 137 private List<StructConn> structConn; 138 private List<StructNcsOper> structNcsOper; 139 private List<StructRefSeqDif> sequenceDifs; 140 private List<StructSiteGen> structSiteGens; 141 142 private Matrix4d parsedScaleMatrix; 143 144 /** 145 * A map of asym ids (internal chain ids) to strand ids (author chain ids) 146 * extracted from pdbx_poly_seq_scheme/pdbx_non_poly_seq_scheme categories 147 */ 148 private Map<String,String> asymStrandId; 149 150 /** 151 * A map of asym ids (internal chain ids) to strand ids (author chain ids) 152 * extracted from the information in _atom_sites category. Will be used 153 * if no mapping is found in pdbx_poly_seq_scheme/pdbx_non_poly_seq_scheme 154 */ 155 private Map<String,String> asymId2StrandIdFromAtomSites; 156 157 /** 158 * A map of asym ids (internal chain ids) to entity ids extracted from 159 * the _struct_asym category 160 */ 161 private Map<String,String> asymId2entityId; 162 163 private String current_nmr_model ; 164 165 private FileParsingParameters params; 166 167 public SimpleMMcifConsumer(){ 168 params = new FileParsingParameters(); 169 documentStart(); 170 171 } 172 173 @Override 174 public void newEntity(Entity entity) { 175 logger.debug("New entity: {}",entity.toString()); 176 entities.add(entity); 177 } 178 179 @Override 180 public void newPdbxStructOperList(PdbxStructOperList structOper){ 181 182 structOpers.add(structOper); 183 } 184 185 @Override 186 public void newStructAsym(StructAsym sasym){ 187 188 structAsyms.add(sasym); 189 } 190 191 private Entity getEntity(int entity_id){ 192 try { 193 for (Entity e: entities){ 194 int eId = Integer.parseInt(e.getId()); 195 if (eId== entity_id){ 196 return e; 197 } 198 } 199 } catch (NumberFormatException e) { 200 logger.warn("Entity id does not look like a number:", e.getMessage()); 201 } 202 return null; 203 } 204 205 @Override 206 public void newStructKeywords(StructKeywords kw){ 207 PDBHeader header = structure.getPDBHeader(); 208 if ( header == null) 209 header = new PDBHeader(); 210 header.setDescription(kw.getPdbx_keywords()); 211 header.setClassification(kw.getPdbx_keywords()); 212 } 213 214 @Override 215 public void setStruct(Struct struct) { 216 217 PDBHeader header = structure.getPDBHeader(); 218 if ( header == null) 219 header = new PDBHeader(); 220 221 header.setTitle(struct.getTitle()); 222 header.setIdCode(struct.getEntry_id()); 223 //header.setDescription(struct.getPdbx_descriptor()); 224 //header.setClassification(struct.getPdbx_descriptor()); 225 //header.setDescription(struct.getPdbx_descriptor()); 226 227 228 229 structure.setPDBHeader(header); 230 structure.setPDBCode(struct.getEntry_id()); 231 } 232 233 /** initiate new group, either Hetatom, Nucleotide, or AminoAcid */ 234 private Group getNewGroup(String recordName,Character aminoCode1, long seq_id,String groupCode3) { 235 236 Group g = ChemCompGroupFactory.getGroupFromChemCompDictionary(groupCode3); 237 if ( g != null && !g.getChemComp().isEmpty()) { 238 if ( g instanceof AminoAcidImpl) { 239 AminoAcidImpl aa = (AminoAcidImpl) g; 240 aa.setId(seq_id); 241 } else if ( g instanceof NucleotideImpl) { 242 NucleotideImpl nuc = (NucleotideImpl) g; 243 nuc.setId(seq_id); 244 } else if ( g instanceof HetatomImpl) { 245 HetatomImpl het = (HetatomImpl)g; 246 het.setId(seq_id); 247 } 248 return g; 249 } 250 251 252 253 Group group; 254 if ( recordName.equals("ATOM") ) { 255 if (StructureTools.isNucleotide(groupCode3)) { 256 // it is a nucleotide 257 NucleotideImpl nu = new NucleotideImpl(); 258 group = nu; 259 nu.setId(seq_id); 260 261 } else if (aminoCode1==null || aminoCode1 == StructureTools.UNKNOWN_GROUP_LABEL){ 262 HetatomImpl h = new HetatomImpl(); 263 h.setId(seq_id); 264 group = h; 265 266 } else { 267 AminoAcidImpl aa = new AminoAcidImpl() ; 268 aa.setAminoType(aminoCode1); 269 aa.setId(seq_id); 270 group = aa ; 271 } 272 } 273 else { 274 if (StructureTools.isNucleotide(groupCode3)) { 275 // it is a nucleotide 276 NucleotideImpl nu = new NucleotideImpl(); 277 group = nu; 278 nu.setId(seq_id); 279 } 280 else if (aminoCode1 != null ) { 281 AminoAcidImpl aa = new AminoAcidImpl() ; 282 aa.setAminoType(aminoCode1); 283 aa.setId(seq_id); 284 group = aa ; 285 } else { 286 HetatomImpl h = new HetatomImpl(); 287 h.setId(seq_id); 288 group = h; 289 } 290 } 291 return group ; 292 } 293 294 /** 295 * Test if the given chainID is already present in the list of chains given. If yes, returns the chain 296 * otherwise returns null. 297 */ 298 private static Chain isKnownChain(String chainID, List<Chain> chains){ 299 300 for (int i = 0; i< chains.size();i++){ 301 Chain testchain = chains.get(i); 302 //System.out.println("comparing chainID >"+chainID+"< against testchain " + i+" >" +testchain.getName()+"<"); 303 if (chainID.equals(testchain.getChainID())) { 304 //System.out.println("chain "+ chainID+" already known ..."); 305 return testchain; 306 } 307 } 308 309 return null; 310 } 311 312 @Override 313 public void newAtomSite(AtomSite atom) { 314 315 if (params.isHeaderOnly()) return; 316 317 // Warning: getLabel_asym_id is not the "chain id" in the PDB file 318 // it is the internally used chain id. 319 // later on we will fix this... 320 321 // later one needs to map the asym id to the pdb_strand_id 322 323 //TODO: add support for FileParsingParams.getMaxAtoms() 324 325 boolean startOfNewChain = false; 326 327 String chain_id = atom.getLabel_asym_id(); 328 329 String recordName = atom.getGroup_PDB(); 330 String residueNumberS = atom.getAuth_seq_id(); 331 Integer residueNrInt = Integer.parseInt(residueNumberS); 332 333 // the 3-letter name of the group: 334 String groupCode3 = atom.getLabel_comp_id(); 335 336 Character aminoCode1 = null; 337 if ( recordName.equals("ATOM") ) 338 aminoCode1 = StructureTools.get1LetterCodeAmino(groupCode3); 339 else { 340 aminoCode1 = StructureTools.get1LetterCodeAmino(groupCode3); 341 342 // for nucleotides this will be null.. 343 if (aminoCode1 != null && aminoCode1.equals(StructureTools.UNKNOWN_GROUP_LABEL)) 344 aminoCode1 = null; 345 } 346 String insCodeS = atom.getPdbx_PDB_ins_code(); 347 Character insCode = null; 348 if (! insCodeS.equals("?")) { 349 insCode = insCodeS.charAt(0); 350 } 351 // we store the internal seq id in the Atom._id field 352 // this is not a PDB file field but we need this to internally assign the insertion codes later 353 // from the pdbx_poly_seq entries.. 354 355 long seq_id = -1; 356 try { 357 seq_id = Long.parseLong(atom.getLabel_seq_id()); 358 } catch (NumberFormatException e){ 359 // non polymer chains (ligands and small molecules) will have a label_seq_id set to '.', thus it is ok to 360 // silently ignore this 361 //logger.debug("Could not parse number for _atom_site.label_seq_id: "+e.getMessage()); 362 } 363 364 String nmrModel = atom.getPdbx_PDB_model_num(); 365 366 if ( current_nmr_model == null) { 367 current_nmr_model = nmrModel; 368 } 369 370 if (! current_nmr_model.equals(nmrModel)){ 371 current_nmr_model = nmrModel; 372 373 // add previous data 374 if ( current_chain != null ) { 375 current_chain.addGroup(current_group); 376 current_group.trimToSize(); 377 } 378 379 // we came to the beginning of a new NMR model 380 structure.addModel(current_model); 381 current_model = new ArrayList<Chain>(); 382 current_chain = null; 383 current_group = null; 384 } 385 386 387 if (current_chain == null) { 388 current_chain = new ChainImpl(); 389 current_chain.setChainID(chain_id); 390 current_model.add(current_chain); 391 startOfNewChain = true; 392 } 393 394 //System.out.println("BEFORE: " + chain_id + " " + current_chain.getName()); 395 if ( ! chain_id.equals(current_chain.getChainID()) ) { 396 397 startOfNewChain = true; 398 399 // end up old chain... 400 current_chain.addGroup(current_group); 401 402 // see if old chain is known ... 403 Chain testchain ; 404 testchain = isKnownChain(current_chain.getChainID(),current_model); 405 406 //System.out.println("trying to re-using known chain " + current_chain.getName() + " " + chain_id); 407 if ( testchain != null && testchain.getChainID().equals(chain_id)){ 408 //System.out.println("re-using known chain " + current_chain.getName() + " " + chain_id); 409 410 } else { 411 412 testchain = isKnownChain(chain_id,current_model); 413 } 414 415 if ( testchain == null) { 416 //System.out.println("unknown chain. creating new chain."); 417 418 current_chain = new ChainImpl(); 419 current_chain.setChainID(chain_id); 420 421 } else { 422 current_chain = testchain; 423 } 424 425 if ( ! current_model.contains(current_chain)) 426 current_model.add(current_chain); 427 428 } 429 430 431 ResidueNumber residueNumber = new ResidueNumber(chain_id,residueNrInt, insCode); 432 433 if (current_group == null) { 434 435 current_group = getNewGroup(recordName,aminoCode1,seq_id, groupCode3); 436 437 current_group.setResidueNumber(residueNumber); 438 current_group.setPDBName(groupCode3); 439 } 440 441 if ( startOfNewChain){ 442 current_group = getNewGroup(recordName,aminoCode1,seq_id, groupCode3); 443 444 current_group.setResidueNumber(residueNumber); 445 current_group.setPDBName(groupCode3); 446 } 447 448 Group altGroup = null; 449 String altLocS = atom.getLabel_alt_id(); 450 Character altLoc = ' '; 451 if ( altLocS.length()>0) { 452 altLoc = altLocS.charAt(0); 453 if ( altLoc.equals('.') ) 454 altLoc = ' '; 455 456 } 457 458 // check if residue number is the same ... 459 // insertion code is part of residue number 460 if ( ! residueNumber.equals(current_group.getResidueNumber())) { 461 //System.out.println("end of residue: "+current_group.getPDBCode()+" "+residueNrInt); 462 current_chain.addGroup(current_group); 463 current_group.trimToSize(); 464 current_group = getNewGroup(recordName,aminoCode1,seq_id,groupCode3); 465 current_group.setPDBName(groupCode3); 466 current_group.setResidueNumber(residueNumber); 467 468 469 // System.out.println("Made new group: " + groupCode3 + " " + resNum + " " + iCode); 470 471 } else { 472 // same residueNumber, but altLocs... 473 474 // test altLoc 475 if ( ! altLoc.equals(' ') && ( ! altLoc.equals('.'))) { 476 logger.debug("found altLoc! " + altLoc + " " + current_group + " " + altGroup); 477 altGroup = getCorrectAltLocGroup( altLoc,recordName,aminoCode1,groupCode3, seq_id); 478 if (altGroup.getChain()==null) { 479 altGroup.setChain(current_chain); 480 } 481 } 482 } 483 484 //atomCount++; 485 //System.out.println("fixing atom name for >" + atom.getLabel_atom_id() + "< >" + fullname + "<"); 486 487 488 if ( params.isParseCAOnly() ){ 489 // yes , user wants to get CA only 490 // only parse CA atoms... 491 if (! (atom.getLabel_atom_id().equals(StructureTools.CA_ATOM_NAME) && atom.getType_symbol().equals("C"))) { 492 //System.out.println("ignoring " + line); 493 //atomCount--; 494 return; 495 } 496 } 497 498 // filling the map in case there's no pdbx_poly_seq_scheme/pdbx_non_poly_seq_scheme in the file 499 asymId2StrandIdFromAtomSites.put(atom.getLabel_asym_id(), atom.getAuth_asym_id()); 500 501 //see if chain_id is one of the previous chains ... 502 503 Atom a = convertAtom(atom); 504 505 //see if chain_id is one of the previous chains ... 506 if ( altGroup != null) { 507 altGroup.addAtom(a); 508 altGroup = null; 509 } 510 else { 511 current_group.addAtom(a); 512 } 513 514 515 // make sure that main group has all atoms 516 // GitHub issue: #76 517 if ( ! current_group.hasAtom(a.getName())) { 518 current_group.addAtom(a); 519 } 520 521 522 //System.out.println(">" + atom.getLabel_atom_id()+"< " + a.getGroup().getPDBName() + " " + a.getGroup().getChemComp() ); 523 524 //System.out.println(current_group); 525 526 } 527 528 /** convert a MMCif AtomSite object to a BioJava Atom object 529 * 530 * @param atom the mmmcif AtomSite record 531 * @return an Atom 532 */ 533 private Atom convertAtom(AtomSite atom){ 534 535 536 Atom a = new AtomImpl(); 537 538 a.setPDBserial(Integer.parseInt(atom.getId())); 539 a.setName(atom.getLabel_atom_id()); 540 541 double x = Double.parseDouble (atom.getCartn_x()); 542 double y = Double.parseDouble (atom.getCartn_y()); 543 double z = Double.parseDouble (atom.getCartn_z()); 544 a.setX(x); 545 a.setY(y); 546 a.setZ(z); 547 548 float occupancy = Float.parseFloat (atom.getOccupancy()); 549 a.setOccupancy(occupancy); 550 551 float temp = Float.parseFloat (atom.getB_iso_or_equiv()); 552 a.setTempFactor(temp); 553 554 String alt = atom.getLabel_alt_id(); 555 if (( alt != null ) && ( alt.length() > 0) && (! alt.equals("."))){ 556 a.setAltLoc(new Character(alt.charAt(0))); 557 } else { 558 a.setAltLoc(new Character(' ')); 559 } 560 561 Element element = Element.R; 562 try { 563 element = Element.valueOfIgnoreCase(atom.getType_symbol()); 564 } catch (IllegalArgumentException e) { 565 logger.info("Element {} was not recognised as a BioJava-known element, the element will be represented as the generic element {}", atom.getType_symbol(), Element.R.name()); 566 } 567 a.setElement(element); 568 569 return a; 570 571 } 572 573 574 private Group getCorrectAltLocGroup( Character altLoc, 575 String recordName, Character aminoCode1, String groupCode3, long seq_id) { 576 577 // see if we know this altLoc already; 578 List<Atom> atoms = current_group.getAtoms(); 579 if ( atoms.size() > 0) { 580 Atom a1 = atoms.get(0); 581 // we are just adding atoms to the current group 582 // probably there is a second group following later... 583 if (a1.getAltLoc().equals(altLoc)) { 584 585 return current_group; 586 } 587 } 588 589 List<Group> altLocs = current_group.getAltLocs(); 590 for ( Group altLocG : altLocs ){ 591 atoms = altLocG.getAtoms(); 592 if ( atoms.size() > 0) { 593 for ( Atom a1 : atoms) { 594 if (a1.getAltLoc().equals( altLoc)) { 595 596 return altLocG; 597 } 598 } 599 } 600 } 601 602 // no matching altLoc group found. 603 // build it up. 604 605 if ( groupCode3.equals(current_group.getPDBName())) { 606 if ( current_group.getAtoms().size() == 0) { 607 //System.out.println("current group is empty " + current_group + " " + altLoc); 608 return current_group; 609 } 610 //System.out.println("cloning current group " + current_group + " " + current_group.getAtoms().get(0).getAltLoc() + " altLoc " + altLoc); 611 Group altLocG = (Group) current_group.clone(); 612 // drop atoms from cloned group... 613 // https://redmine.open-bio.org/issues/3307 614 altLocG.setAtoms(new ArrayList<Atom>()); 615 altLocG.getAltLocs().clear(); 616 current_group.addAltLoc(altLocG); 617 return altLocG; 618 } 619 620 // System.out.println("new group " + recordName + " " + aminoCode1 + " " +groupCode3); 621 //String recordName,Character aminoCode1, long seq_id,String groupCode3) { 622 Group altLocG = getNewGroup(recordName,aminoCode1,seq_id,groupCode3); 623 624 altLocG.setPDBName(groupCode3); 625 altLocG.setResidueNumber(current_group.getResidueNumber()); 626 current_group.addAltLoc(altLocG); 627 return altLocG; 628 } 629 630 /** Start the parsing 631 * 632 */ 633 @Override 634 public void documentStart() { 635 structure = new StructureImpl(); 636 637 current_chain = null; 638 current_group = null; 639 current_nmr_model = null; 640 //atomCount = 0; 641 642 current_model = new ArrayList<Chain>(); 643 entities = new ArrayList<Entity>(); 644 strucRefs = new ArrayList<StructRef>(); 645 seqResChains = new ArrayList<Chain>(); 646 entityChains = new ArrayList<Chain>(); 647 structAsyms = new ArrayList<StructAsym>(); 648 asymStrandId = new HashMap<String, String>(); 649 asymId2StrandIdFromAtomSites = new HashMap<String, String>(); 650 asymId2entityId = new HashMap<String,String>(); 651 structOpers = new ArrayList<PdbxStructOperList>(); 652 strucAssemblies = new ArrayList<PdbxStructAssembly>(); 653 strucAssemblyGens = new ArrayList<PdbxStructAssemblyGen>(); 654 entitySrcGens = new ArrayList<EntitySrcGen>(); 655 entitySrcNats = new ArrayList<EntitySrcNat>(); 656 entitySrcSyns = new ArrayList<EntitySrcSyn>(); 657 structConn = new ArrayList<StructConn>(); 658 structNcsOper = new ArrayList<StructNcsOper>(); 659 sequenceDifs = new ArrayList<StructRefSeqDif>(); 660 structSiteGens = new ArrayList<StructSiteGen>(); 661 } 662 663 664 @Override 665 public void documentEnd() { 666 667 // Expected that there is one current_chain that needs to be added to the model 668 // When in headerOnly mode, no Atoms are read, and there will not be an active 669 // current_chain. 670 if ( current_chain != null ) { 671 672 current_chain.addGroup(current_group); 673 if (isKnownChain(current_chain.getChainID(),current_model) == null) { 674 current_model.add(current_chain); 675 } 676 } else if (!params.isHeaderOnly()){ 677 logger.warn("current chain is null at end of document."); 678 } 679 680 structure.addModel(current_model); 681 682 // Goal is to reproduce the PDB files exactly: 683 // What has to be done is to use the auth_mon_id for the assignment. For this 684 685 // map entities to Chains and Compound objects... 686 687 688 for (StructAsym asym : structAsyms) { 689 logger.debug("Entity {} matches asym_id: {}", asym.getEntity_id(), asym.getId() ); 690 691 asymId2entityId.put(asym.getId(), asym.getEntity_id()); 692 693 Chain s = getEntityChain(asym.getEntity_id()); 694 Chain seqres = (Chain)s.clone(); 695 // to solve issue #160 (e.g. 3u7t) 696 seqres = removeSeqResHeterogeneity(seqres); 697 seqres.setChainID(asym.getId()); 698 699 seqResChains.add(seqres); 700 logger.debug(" seqres: " + asym.getId() + " " + seqres + "<") ; 701 702 // adding the compounds (entities) 703 addCompounds(asym); 704 705 } 706 707 if (structAsyms.isEmpty()) { 708 logger.warn("No _struct_asym category in file, no SEQRES groups will be added."); 709 } 710 711 // Only align if requested (default) and not when headerOnly mode with no Atoms. 712 // Otherwise, we store the empty SeqRes Groups unchanged in the right chains. 713 if ( params.isAlignSeqRes() && !params.isHeaderOnly() ){ 714 logger.debug("Parsing mode align_seqres, will parse SEQRES and align to ATOM sequence"); 715 alignSeqRes(); 716 } else { 717 logger.debug("Parsing mode unalign_seqres, will parse SEQRES but not align it to ATOM sequence"); 718 SeqRes2AtomAligner.storeUnAlignedSeqRes(structure, seqResChains, params.isHeaderOnly()); 719 } 720 721 if (asymStrandId.isEmpty()) { 722 logger.warn("No pdbx_poly_seq_scheme/pdbx_non_poly_seq_scheme categories present. Will use chain id mapping from _atom_sites category"); 723 724 asymStrandId = asymId2StrandIdFromAtomSites; 725 } 726 // If we only parse the header - we have no option but to use the other mapping (which can be broken) 727 if (asymId2StrandIdFromAtomSites.isEmpty()){ 728 729 logger.warn("No _atom_sites category auth to asymid mappings. Will use chain id mapping from pdbx_poly_seq_scheme/pdbx_non_poly_seq_scheme categories"); 730 asymId2StrandIdFromAtomSites = asymStrandId; 731 } 732 733 // mismatching Author assigned chain IDS and PDB internal chain ids: 734 // fix the chain IDS in the current model: 735 736 if(params.isUseInternalChainId()==false){ 737 for (int i =0; i< structure.nrModels() ; i++){ 738 List<Chain> model = structure.getModel(i); 739 740 List<Chain> pdbChains = new ArrayList<Chain>(); 741 for (Chain chain : model) { 742 for (String asym : asymId2StrandIdFromAtomSites.keySet()) { 743 if ( chain.getChainID().equals(asym)){ 744 String newChainId = asymId2StrandIdFromAtomSites.get(asym); 745 746 logger.debug("Renaming chain with asym_id {} ({} atom groups) to author_asym_id/strand_id {}", 747 asym, chain.getAtomGroups().size(), newChainId); 748 749 chain.setChainID(newChainId); 750 chain.setInternalChainID(asym); 751 // set chain of all groups 752 for(Group g : chain.getAtomGroups()) { 753 ResidueNumber resNum = g.getResidueNumber(); 754 if(resNum != null) 755 resNum.setChainId(newChainId); 756 } 757 for(Group g : chain.getSeqResGroups()) { 758 ResidueNumber resNum = g.getResidueNumber(); 759 if(resNum != null) 760 resNum.setChainId(newChainId); 761 } 762 Chain known = isKnownChain(chain.getChainID(), pdbChains); 763 if ( known == null ){ 764 pdbChains.add(chain); 765 } else { 766 // and now we join the 2 chains together again, because in cif files the data can be split up... 767 for ( Group g : chain.getAtomGroups()){ 768 known.addGroup(g); 769 } 770 } 771 772 break; 773 } 774 } 775 } 776 777 structure.setModel(i,pdbChains); 778 } 779 } 780 else{ 781 // Just set the internal id as the auth id -> if we're using the asymid 782 for (int i =0; i< structure.nrModels() ; i++){ 783 List<Chain> model = structure.getModel(i); 784 for (Chain chain : model) { 785 for (String asym : asymId2StrandIdFromAtomSites.keySet()) { 786 if (chain.getChainID().equals(asym)){ 787 String authChainId = asymId2StrandIdFromAtomSites.get(asym); 788 chain.setInternalChainID(authChainId); 789 break; 790 } 791 } 792 } 793 } 794 } 795 796 // NOTE bonds and charges can only be done at this point that the chain id mapping is properly sorted out 797 if (!params.isHeaderOnly()) { 798 if ( params.shouldCreateAtomBonds()) { 799 addBonds(); 800 } 801 802 if ( params.shouldCreateAtomCharges()) { 803 addCharges(); 804 } 805 } 806 807 // compounds (entities) 808 // In addCompounds above we created the compounds if they were present in the file 809 // Now we need to make sure that they are linked to chains and also that if they are not present in the file we need to add them now 810 linkCompounds(); 811 812 if (!params.isHeaderOnly()) { 813 814 // Do structure.setSites(sites) after any chain renaming to be like PDB. 815 addSites(); 816 } 817 818 819 820 // set the oligomeric state info in the header... 821 if (params.isParseBioAssembly()) { 822 823 // the more detailed mapping of chains to rotation operations happens in StructureIO... 824 825 Map<Integer,BioAssemblyInfo> bioAssemblies = new HashMap<Integer, BioAssemblyInfo>(); 826 827 for ( PdbxStructAssembly psa : strucAssemblies){ 828 829 List<PdbxStructAssemblyGen> psags = new ArrayList<PdbxStructAssemblyGen>(1); 830 831 for ( PdbxStructAssemblyGen psag: strucAssemblyGens ) { 832 if ( psag.getAssembly_id().equals(psa.getId())) { 833 psags.add(psag); 834 } 835 } 836 837 BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); 838 839 // these are the transformations that need to be applied to our model 840 List<BiologicalAssemblyTransformation> transformations = builder.getBioUnitTransformationList(psa, psags, structOpers); 841 842 int mmSize = 0; 843 int bioAssemblyId = -1; 844 try { 845 bioAssemblyId = Integer.parseInt(psa.getId()); 846 } catch (NumberFormatException e) { 847 logger.info("Could not parse a numerical bio assembly id from '{}'",psa.getId()); 848 } 849 try { 850 mmSize = Integer.parseInt(psa.getOligomeric_count()); 851 } catch (NumberFormatException e) { 852 if (bioAssemblyId!=-1) 853 // if we have a numerical id, then it's unusual to have no oligomeric size: we warn about it 854 logger.warn("Could not parse oligomeric count from '{}' for biological assembly id {}", 855 psa.getOligomeric_count(),psa.getId()); 856 else 857 // no numerical id (PAU,XAU in virus entries), it's normal to have no oligomeric size 858 logger.info("Could not parse oligomeric count from '{}' for biological assembly id {}", 859 psa.getOligomeric_count(),psa.getId()); 860 } 861 862 // if bioassembly id is not numerical we throw it away 863 // this happens usually for viral capsid entries, like 1ei7 864 // see issue #230 in github 865 if (bioAssemblyId!=-1) { 866 BioAssemblyInfo bioAssembly = new BioAssemblyInfo(); 867 bioAssembly.setId(bioAssemblyId); 868 bioAssembly.setMacromolecularSize(mmSize); 869 bioAssembly.setTransforms(transformations); 870 bioAssemblies.put(bioAssemblyId,bioAssembly); 871 } 872 873 } 874 structure.getPDBHeader().setBioAssemblies(bioAssemblies); 875 } 876 877 setStructNcsOps(); 878 879 setCrystallographicInfoMetadata(); 880 881 882 Map<String,List<SeqMisMatch>> misMatchMap = new HashMap<String, List<SeqMisMatch>>(); 883 for (StructRefSeqDif sdif : sequenceDifs) { 884 SeqMisMatch misMatch = new SeqMisMatchImpl(); 885 misMatch.setDetails(sdif.getDetails()); 886 887 String insCode = sdif.getPdbx_pdb_ins_code(); 888 if ( insCode != null && insCode.equals("?")) 889 insCode = null; 890 misMatch.setInsCode(insCode); 891 misMatch.setOrigGroup(sdif.getDb_mon_id()); 892 misMatch.setPdbGroup(sdif.getMon_id()); 893 misMatch.setPdbResNum(sdif.getPdbx_auth_seq_num()); 894 misMatch.setUniProtId(sdif.getPdbx_seq_db_accession_code()); 895 misMatch.setSeqNum(sdif.getSeq_num()); 896 897 898 List<SeqMisMatch> mms = misMatchMap.get(sdif.getPdbx_pdb_strand_id()); 899 if ( mms == null) { 900 mms = new ArrayList<SeqMisMatch>(); 901 misMatchMap.put(sdif.getPdbx_pdb_strand_id(),mms); 902 } 903 mms.add(misMatch); 904 905 } 906 907 for (String chainId : misMatchMap.keySet()){ 908 try { 909 Chain c = structure.getChainByPDB(chainId); 910 c.setSeqMisMatches(misMatchMap.get(chainId)); 911 } catch (Exception e){ 912 logger.warn("could not set mismatches for chain " + chainId); 913 914 } 915 } 916 917 } 918 919 /** 920 * Here we link compounds (entities) to chains. 921 * Also if compounds are not present in file, this initialises the compounds with some heuristics, see {@link CompoundFinder} 922 */ 923 private void linkCompounds() { 924 925 926 for (int i =0; i< structure.nrModels() ; i++){ 927 for (Chain chain : structure.getModel(i)) { 928 String entityId; 929 if( params.isUseInternalChainId()){ 930 entityId = asymId2entityId.get(chain.getChainID()); 931 } 932 else{ 933 entityId = asymId2entityId.get(chain.getInternalChainID()); 934 } 935 if (entityId==null) { 936 // this can happen for instance if the cif file didn't have _struct_asym category at all 937 // and thus we have no asymId2entityId mapping at all 938 logger.warn("No entity id could be found for chain {}", chain.getInternalChainID()); 939 continue; 940 } 941 int eId = Integer.parseInt(entityId); 942 943 // Compounds are not added for non-polymeric entities, if a chain is non-polymeric its compound won't be found. 944 // TODO: add all entities and unique compounds and add methods to directly get polymer or non-polymer 945 // asyms (chains). Either create a unique StructureImpl or modify existing for a better representation of the 946 // mmCIF internal data structures but is compatible with Structure interface. 947 // Some examples of PDB entries with this kind of problem: 948 // - 2uub: asym_id X, chainId Z, entity_id 24: fully non-polymeric but still with its own chainId 949 // - 3o6j: asym_id K, chainId Z, entity_id 6 : a single water molecule 950 // - 1dz9: asym_id K, chainId K, entity_id 6 : a potassium ion alone 951 952 Compound compound = structure.getCompoundById(eId); 953 if (compound==null) { 954 // Supports the case where the only chain members were from non-polymeric entity that is missing. 955 // Solved by creating a new Compound(entity) to which this chain will belong. 956 logger.warn("Could not find a compound for entity_id {}, for chain id {}, creating a new compound.", 957 eId, chain.getChainID()); 958 compound = new Compound(); 959 compound.setMolId(eId); 960 compound.addChain(chain); 961 chain.setCompound(compound); 962 structure.addCompound(compound); 963 } else { 964 logger.debug("Adding chain with chain id {} (asym id {}) to compound with entity_id {}", 965 chain.getChainID(), chain.getInternalChainID(), eId); 966 compound.addChain(chain); 967 chain.setCompound(compound); 968 } 969 970 } 971 972 } 973 974 // to make sure we have Compounds linked to chains, we call getCompounds() which will lazily initialise the 975 // compounds using heuristics (see CompoundFinder) in the case that they were not explicitly present in the file 976 List<Compound> compounds = structure.getCompounds(); 977 978 // final sanity check: it can happen that from the annotated compounds some are not linked to any chains 979 // e.g. 3s26: a sugar entity does not have any chains associated to it (it seems to be happening with many sugar compounds) 980 // we simply log it, this can sign some other problems if the compounds are used down the line 981 for (Compound compound:compounds) { 982 if (compound.getChains().isEmpty()) { 983 logger.info("Compound {} '{}' has no chains associated to it", 984 compound.getId()==null?"with no entity id":compound.getId(), compound.getMolName()); 985 } 986 } 987 988 } 989 990 private void addCharges() { 991 ChargeAdder.addCharges(structure); 992 } 993 994 /** 995 * The method will return a new reference to a Chain with any consecutive groups 996 * having same residue numbers removed. 997 * This is necessary to solve the microheterogeneity issue in entries like 3u7t (see github issue #160) 998 * @param c 999 * @return 1000 */ 1001 private Chain removeSeqResHeterogeneity(Chain c) { 1002 1003 Chain trimmedChain = new ChainImpl(); 1004 1005 ResidueNumber lastResNum = null; 1006 1007 for (Group g:c.getAtomGroups()) { 1008 1009 // note we have to deep copy this, otherwise they stay linked and would get altered in addGroup(g) 1010 ResidueNumber currentResNum = new ResidueNumber( 1011 g.getResidueNumber().getChainId(), 1012 g.getResidueNumber().getSeqNum(), 1013 g.getResidueNumber().getInsCode()); 1014 1015 if (lastResNum == null || !lastResNum.equals(currentResNum) ) { 1016 trimmedChain.addGroup(g); 1017 } else { 1018 logger.debug("Removing seqres group because it seems to be repeated in entity_poly_seq, most likely has hetero='y': "+g); 1019 } 1020 1021 lastResNum = currentResNum; 1022 1023 } 1024 return trimmedChain; 1025 } 1026 1027 private void addBonds() { 1028 BondMaker maker = new BondMaker(structure, params); 1029 maker.makeBonds(); 1030 maker.formBondsFromStructConn(structConn); 1031 } 1032 1033 private void alignSeqRes() { 1034 1035 logger.debug("Parsing mode align_seqres, will align to ATOM to SEQRES sequence"); 1036 1037 // fix SEQRES residue numbering for all models 1038 1039 for (int model=0;model<structure.nrModels();model++) { 1040 1041 List<Chain> atomList = structure.getModel(model); 1042 1043 for (Chain seqResChain: seqResChains){ 1044 1045 // this extracts the matching atom chain from atomList 1046 Chain atomChain = SeqRes2AtomAligner.getMatchingAtomRes(seqResChain, atomList); 1047 1048 if (atomChain == null) { 1049 // most likely there's no observed residues at all for the seqres chain: can't map 1050 // e.g. 3zyb: chains with asym_id L,M,N,O,P have no observed residues 1051 logger.warn("Could not map SEQRES chain with asym_id={} to any ATOM chain. Most likely there's no observed residues in the chain.", 1052 seqResChain.getChainID()); 1053 continue; 1054 } 1055 1056 //map the atoms to the seqres... 1057 1058 // we need to first clone the seqres so that they stay independent for different models 1059 List<Group> seqResGroups = new ArrayList<Group>(); 1060 for (int i=0;i<seqResChain.getAtomGroups().size();i++) { 1061 seqResGroups.add((Group)seqResChain.getAtomGroups().get(i).clone()); 1062 } 1063 1064 for ( int seqResPos = 0 ; seqResPos < seqResGroups.size(); seqResPos++) { 1065 Group seqresG = seqResGroups.get(seqResPos); 1066 boolean found = false; 1067 for ( Group atomG: atomChain.getAtomGroups()) { 1068 1069 int internalNr = getInternalNr (atomG); 1070 1071 if (seqresG.getResidueNumber().getSeqNum() == internalNr ) { 1072 seqResGroups.set(seqResPos, atomG); 1073 found = true; 1074 break; 1075 } 1076 1077 1078 } 1079 if ( ! found) 1080 // so far the residue number has tracked internal numbering. 1081 // however there are no atom records, as such this can't be a PDB residue number... 1082 seqresG.setResidueNumber(null); 1083 } 1084 atomChain.setSeqResGroups(seqResGroups); 1085 1086 } 1087 } 1088 } 1089 1090 private int getInternalNr(Group atomG) { 1091 if ( atomG.getType().equals(GroupType.AMINOACID)) { 1092 AminoAcidImpl aa = (AminoAcidImpl) atomG; 1093 return new Long(aa.getId()).intValue(); 1094 } else if ( atomG.getType().equals(GroupType.NUCLEOTIDE)) { 1095 NucleotideImpl nu = (NucleotideImpl) atomG; 1096 return new Long(nu.getId()).intValue(); 1097 } else { 1098 HetatomImpl he = (HetatomImpl) atomG; 1099 return new Long(he.getId()).intValue(); 1100 } 1101 } 1102 1103 private void addCompounds(StructAsym asym) { 1104 int eId = 0; 1105 try { 1106 eId = Integer.parseInt(asym.getEntity_id()); 1107 } catch (NumberFormatException e) { 1108 logger.warn("Could not parse mol_id from string {}. Will use 0 for creating Compound",asym.getEntity_id()); 1109 } 1110 Entity e = getEntity(eId); 1111 1112 for (EntitySrcGen esg : entitySrcGens) { 1113 1114 if (! esg.getEntity_id().equals(asym.getEntity_id())) 1115 continue; 1116 1117 // found the matching EntitySrcGen 1118 // get the corresponding Entity 1119 Compound c = structure.getCompoundById(eId); 1120 if ( c == null){ 1121 if (e!=null) { 1122 if (e.getType().equals("polymer")) { 1123 c = createNewCompoundFromESG(esg, eId); 1124 c.setMolName(e.getPdbx_description()); 1125 structure.addCompound(c); 1126 logger.debug("Adding Compound with entity id {} from _entity_src_syn, with name: {}",eId,c.getMolName()); 1127 } else if (e.getType().equals("non-solvent")) { 1128 // TODO handle non-polymer compounds. 1129 } else if (e.getType().equals("water")) { 1130 // TODO handle solvent entity. 1131 } else { 1132 logger.warn("Could not add entity id " + esg.getEntity_id() + " that has unknown _entity.type"); 1133 } 1134 } 1135 } 1136 1137 } 1138 1139 for (EntitySrcNat esn : entitySrcNats) { 1140 if (! esn.getEntity_id().equals(asym.getEntity_id())) 1141 continue; 1142 1143 // found the matching EntitySrcGen 1144 // get the corresponding Entity 1145 Compound c = structure.getCompoundById(eId); 1146 if ( c == null){ 1147 if (e!=null && e.getType().equals("polymer")) { 1148 c = createNewCompoundFromESN(esn, eId); 1149 c.setMolName(e.getPdbx_description()); 1150 structure.addCompound(c); 1151 logger.debug("Adding Compound with entity id {} from _entity_src_syn, with name: {}",eId,c.getMolName()); 1152 } 1153 } 1154 1155 } 1156 1157 for (EntitySrcSyn ess : entitySrcSyns) { 1158 if (! ess.getEntity_id().equals(asym.getEntity_id())) 1159 continue; 1160 1161 // found the matching EntitySrcGen 1162 // get the corresponding Entity 1163 Compound c = structure.getCompoundById(eId); 1164 if ( c == null){ 1165 if (e!=null && e.getType().equals("polymer")) { 1166 c = createNewCompoundFromESS(ess, eId); 1167 c.setMolName(e.getPdbx_description()); 1168 structure.addCompound(c); 1169 logger.debug("Adding Compound with entity id {} from _entity_src_syn, with name: {}",eId,c.getMolName()); 1170 } 1171 } 1172 } 1173 1174 // for some mmCIF files like 1yrm all 3 of _entity_src_gen, _entity_src_nat and _pdbx_entity_src_syn are missing 1175 // we need to fill the Compounds in some other way: 1176 1177 Compound c = structure.getCompoundById(eId); 1178 1179 if (c==null) { 1180 c = new Compound(); 1181 c.setMolId(eId); 1182 1183 // we only add the compound if a polymeric one (to match what the PDB parser does) 1184 if (e!=null && e.getType().equals("polymer")) { 1185 c.setMolName(e.getPdbx_description()); 1186 structure.addCompound(c); 1187 logger.debug("Adding Compound with entity id {} from _entity, with name: {}",eId, c.getMolName()); 1188 } 1189 } 1190 } 1191 1192 private Compound createNewCompoundFromESG(EntitySrcGen esg, int eId) { 1193 1194 Compound c = new Compound(); 1195 c.setMolId(eId); 1196 c.setAtcc(esg.getPdbx_gene_src_atcc()); 1197 c.setCell(esg.getPdbx_gene_src_cell()); 1198 c.setOrganismCommon(esg.getGene_src_common_name()); 1199 c.setOrganismScientific(esg.getPdbx_gene_src_scientific_name()); 1200 c.setOrganismTaxId(esg.getPdbx_gene_src_ncbi_taxonomy_id()); 1201 c.setExpressionSystemTaxId(esg.getPdbx_host_org_ncbi_taxonomy_id()); 1202 c.setExpressionSystem(esg.getPdbx_host_org_scientific_name()); 1203 return c; 1204 1205 } 1206 1207 private Compound createNewCompoundFromESN(EntitySrcNat esn, int eId) { 1208 1209 Compound c = new Compound(); 1210 1211 c.setMolId(eId); 1212 c.setAtcc(esn.getPdbx_atcc()); 1213 c.setCell(esn.getPdbx_cell()); 1214 c.setOrganismCommon(esn.getCommon_name()); 1215 c.setOrganismScientific(esn.getPdbx_organism_scientific()); 1216 c.setOrganismTaxId(esn.getPdbx_ncbi_taxonomy_id()); 1217 1218 return c; 1219 1220 } 1221 1222 private Compound createNewCompoundFromESS(EntitySrcSyn ess, int eId) { 1223 1224 Compound c = new Compound(); 1225 1226 c.setMolId(eId); 1227 c.setOrganismCommon(ess.getOrganism_common_name()); 1228 c.setOrganismScientific(ess.getOrganism_scientific()); 1229 c.setOrganismTaxId(ess.getNcbi_taxonomy_id()); 1230 1231 1232 return c; 1233 1234 } 1235 1236 private void setStructNcsOps() { 1237 1238 ArrayList<Matrix4d> ncsOperators = new ArrayList<Matrix4d>(); 1239 1240 for (StructNcsOper sNcsOper:structNcsOper) { 1241 1242 if (!sNcsOper.getCode().equals("generate")) continue; 1243 1244 try { 1245 Matrix4d op = new Matrix4d(); 1246 op.setElement(3, 0, 0.0); 1247 op.setElement(3, 1, 0.0); 1248 op.setElement(3, 2, 0.0); 1249 op.setElement(3, 3, 1.0); 1250 1251 1252 op.setElement(0, 0, Double.parseDouble(sNcsOper.getMatrix11())); 1253 op.setElement(0, 1, Double.parseDouble(sNcsOper.getMatrix12())); 1254 op.setElement(0, 2, Double.parseDouble(sNcsOper.getMatrix13())); 1255 1256 op.setElement(1, 0, Double.parseDouble(sNcsOper.getMatrix21())); 1257 op.setElement(1, 1, Double.parseDouble(sNcsOper.getMatrix22())); 1258 op.setElement(1, 2, Double.parseDouble(sNcsOper.getMatrix23())); 1259 1260 op.setElement(2, 0, Double.parseDouble(sNcsOper.getMatrix31())); 1261 op.setElement(2, 1, Double.parseDouble(sNcsOper.getMatrix32())); 1262 op.setElement(2, 2, Double.parseDouble(sNcsOper.getMatrix33())); 1263 1264 op.setElement(0, 3, Double.parseDouble(sNcsOper.getVector1())); 1265 op.setElement(1, 3, Double.parseDouble(sNcsOper.getVector2())); 1266 op.setElement(2, 3, Double.parseDouble(sNcsOper.getVector3())); 1267 1268 ncsOperators.add(op); 1269 1270 } catch (NumberFormatException e) { 1271 logger.warn("Error parsing doubles in NCS operator list, skipping operator {}", structNcsOper.indexOf(sNcsOper)+1); 1272 } 1273 1274 } 1275 1276 // we only set it if not empty, otherwise remains null 1277 if (ncsOperators.size()>0) { 1278 structure.getCrystallographicInfo().setNcsOperators( 1279 ncsOperators.toArray(new Matrix4d[ncsOperators.size()])); 1280 } 1281 } 1282 1283 private void setCrystallographicInfoMetadata() { 1284 if (parsedScaleMatrix!=null) { 1285 1286 PDBCrystallographicInfo crystalInfo = structure.getCrystallographicInfo(); 1287 1288 boolean nonStd = false; 1289 if (crystalInfo.getCrystalCell()!=null && !crystalInfo.getCrystalCell().checkScaleMatrix(parsedScaleMatrix)) { 1290 nonStd = true; 1291 } 1292 1293 crystalInfo.setNonStandardCoordFrameConvention(nonStd); 1294 } 1295 } 1296 1297 /** This method will return the parsed protein structure, once the parsing has been finished 1298 * 1299 * @return a BioJava protein structure object 1300 */ 1301 public Structure getStructure() { 1302 1303 return structure; 1304 } 1305 1306 @Override 1307 public void newDatabasePDBrevRecord(DatabasePdbrevRecord record) { 1308 1309 PDBHeader header = structure.getPDBHeader(); 1310 1311 if ( header == null) { 1312 header = new PDBHeader(); 1313 structure.setPDBHeader(header); 1314 } 1315 1316 List<DatabasePdbrevRecord> revRecords = header.getRevisionRecords(); 1317 if ( revRecords == null) { 1318 revRecords = new ArrayList<DatabasePdbrevRecord>(); 1319 header.setRevisionRecords(revRecords); 1320 } 1321 revRecords.add(record); 1322 1323 1324 } 1325 1326 1327 @Override 1328 public void newDatabasePDBrev(DatabasePDBrev dbrev) { 1329 //System.out.println("got a database revision:" + dbrev); 1330 SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd",Locale.US); 1331 PDBHeader header = structure.getPDBHeader(); 1332 1333 if ( header == null) { 1334 header = new PDBHeader(); 1335 } 1336 1337 1338 if (dbrev.getNum().equals("1")){ 1339 1340 try { 1341 Date dep = dateFormat.parse(dbrev.getDate_original()); 1342 header.setDepDate(dep); 1343 1344 } catch (ParseException e){ 1345 logger.warn("Could not parse date string '{}', deposition date will be unavailable", dbrev.getDate_original()); 1346 } 1347 1348 try { 1349 Date mod = dateFormat.parse(dbrev.getDate()); 1350 header.setModDate(mod); 1351 1352 } catch (ParseException e){ 1353 logger.warn("Could not parse date string '{}', modification date will be unavailable", dbrev.getDate()); 1354 } 1355 1356 1357 } else { 1358 try { 1359 1360 Date mod = dateFormat.parse(dbrev.getDate()); 1361 header.setModDate(mod); 1362 1363 } catch (ParseException e){ 1364 logger.warn("Could not parse date string '{}', modification date will be unavailable", dbrev.getDate()); 1365 } 1366 } 1367 1368 structure.setPDBHeader(header); 1369 } 1370 1371 @Override 1372 public void newDatabasePDBremark(DatabasePDBremark remark) { 1373 //System.out.println(remark); 1374 String id = remark.getId(); 1375 if (id.equals("2")){ 1376 1377 //this remark field contains the resolution information: 1378 String line = remark.getText(); 1379 1380 int i = line.indexOf("ANGSTROM"); 1381 if ( i > 5) { 1382 // line contains ANGSTROM info... 1383 String resolution = line.substring(i-5,i).trim(); 1384 // convert string to float 1385 float res = 99 ; 1386 try { 1387 res = Float.parseFloat(resolution); 1388 1389 } catch (NumberFormatException e) { 1390 logger.info("could not parse resolution from line and ignoring it " + line); 1391 return ; 1392 1393 1394 } 1395 // support for old style header 1396 1397 PDBHeader pdbHeader = structure.getPDBHeader(); 1398 pdbHeader.setResolution(res); 1399 1400 } 1401 1402 } 1403 } 1404 1405 @Override 1406 public void newRefine(Refine r){ 1407 1408 PDBHeader pdbHeader = structure.getPDBHeader(); 1409 // RESOLUTION 1410 // in very rare cases (for instance hybrid methods x-ray + neutron diffraction, e.g. 3ins, 4n9m) 1411 // there are 2 resolution values, one for each method 1412 // we take the last one found so that behaviour is like in PDB file parsing 1413 if (pdbHeader.getResolution()!=PDBHeader.DEFAULT_RESOLUTION) { 1414 logger.warn("More than 1 resolution value present, will use last one {} and discard previous {} " 1415 ,r.getLs_d_res_high(), String.format("%4.2f",pdbHeader.getResolution())); 1416 } 1417 try { 1418 pdbHeader.setResolution(Float.parseFloat(r.getLs_d_res_high())); 1419 } catch (NumberFormatException e){ 1420 logger.info("Could not parse resolution from " + r.getLs_d_res_high() + " " + e.getMessage()); 1421 } 1422 1423 1424 // RFREE 1425 if (pdbHeader.getRfree()!=PDBHeader.DEFAULT_RFREE) { 1426 logger.warn("More than 1 Rfree value present, will use last one {} and discard previous {} ", 1427 r.getLs_R_factor_R_free(), String.format("%4.2f",pdbHeader.getRfree())); 1428 } 1429 if (r.getLs_R_factor_R_free()==null) { 1430 // some entries like 2ifo haven't got this field at all 1431 logger.info("_refine.ls_R_factor_R_free not present, not parsing Rfree value"); 1432 } else { 1433 try { 1434 pdbHeader.setRfree(Float.parseFloat(r.getLs_R_factor_R_free())); 1435 } catch (NumberFormatException e){ 1436 // no rfree present ('?') is very usual, that's why we set it to debug 1437 logger.debug("Could not parse Rfree from string '{}'", r.getLs_R_factor_R_free()); 1438 } 1439 } 1440 1441 } 1442 1443 1444 @Override 1445 public void newAuditAuthor(AuditAuthor aa){ 1446 1447 String name = aa.getName(); 1448 1449 StringBuffer famName = new StringBuffer(); 1450 StringBuffer initials = new StringBuffer(); 1451 boolean afterComma = false; 1452 for ( char c: name.toCharArray()) { 1453 if ( c == ' ') 1454 continue; 1455 if ( c == ','){ 1456 afterComma = true; 1457 continue; 1458 } 1459 1460 if ( afterComma) 1461 initials.append(c); 1462 else 1463 famName.append(c); 1464 } 1465 1466 StringBuffer newaa = new StringBuffer(); 1467 newaa.append(initials); 1468 newaa.append(famName); 1469 1470 PDBHeader header = structure.getPDBHeader(); 1471 String auth = header.getAuthors(); 1472 if (auth == null) { 1473 header.setAuthors(newaa.toString()); 1474 }else { 1475 auth += "," + newaa.toString(); 1476 header.setAuthors(auth); 1477 1478 } 1479 } 1480 1481 @Override 1482 public void newExptl(Exptl exptl) { 1483 1484 PDBHeader pdbHeader = structure.getPDBHeader(); 1485 String method = exptl.getMethod(); 1486 pdbHeader.setExperimentalTechnique(method); 1487 1488 } 1489 1490 @Override 1491 public void newCell(Cell cell) { 1492 1493 try { 1494 float a = Float.parseFloat(cell.getLength_a()); 1495 float b = Float.parseFloat(cell.getLength_b()); 1496 float c = Float.parseFloat(cell.getLength_c()); 1497 float alpha = Float.parseFloat(cell.getAngle_alpha()); 1498 float beta = Float.parseFloat(cell.getAngle_beta()); 1499 float gamma = Float.parseFloat(cell.getAngle_gamma()); 1500 1501 CrystalCell xtalCell = new CrystalCell(); 1502 xtalCell.setA(a); 1503 xtalCell.setB(b); 1504 xtalCell.setC(c); 1505 xtalCell.setAlpha(alpha); 1506 xtalCell.setBeta(beta); 1507 xtalCell.setGamma(gamma); 1508 1509 if (!xtalCell.isCellReasonable()) { 1510 // If the entry describes a structure determined by a technique other than X-ray crystallography, 1511 // cell is (sometimes!) a = b = c = 1.0, alpha = beta = gamma = 90 degrees 1512 // if so we don't add and CrystalCell will be null 1513 logger.debug("The crystal cell read from file does not have reasonable dimensions (at least one dimension is below {}), discarding it.", 1514 CrystalCell.MIN_VALID_CELL_SIZE); 1515 return; 1516 } 1517 1518 structure.getPDBHeader().getCrystallographicInfo().setCrystalCell(xtalCell); 1519 1520 } catch (NumberFormatException e){ 1521 structure.getPDBHeader().getCrystallographicInfo().setCrystalCell(null); 1522 logger.info("could not parse some cell parameters ("+e.getMessage()+"), ignoring _cell "); 1523 } 1524 } 1525 1526 @Override 1527 public void newSymmetry(Symmetry symmetry) { 1528 String spaceGroup = symmetry.getSpace_group_name_H_M(); 1529 SpaceGroup sg = SymoplibParser.getSpaceGroup(spaceGroup); 1530 if (sg==null) { 1531 logger.warn("Space group '"+spaceGroup+"' not recognised as a standard space group"); 1532 structure.getPDBHeader().getCrystallographicInfo().setNonStandardSg(true); 1533 } else { 1534 structure.getPDBHeader().getCrystallographicInfo().setSpaceGroup(sg); 1535 structure.getPDBHeader().getCrystallographicInfo().setNonStandardSg(false); 1536 } 1537 } 1538 1539 @Override 1540 public void newStructNcsOper(StructNcsOper sNcsOper) { 1541 structNcsOper.add(sNcsOper); 1542 } 1543 1544 public void newAtomSites(AtomSites atomSites) { 1545 1546 try { 1547 Matrix4d m = new Matrix4d( 1548 Double.parseDouble(atomSites.getFract_transf_matrix11()), Double.parseDouble(atomSites.getFract_transf_matrix12()), Double.parseDouble(atomSites.getFract_transf_matrix13()), Double.parseDouble(atomSites.getFract_transf_vector1()), 1549 Double.parseDouble(atomSites.getFract_transf_matrix21()), Double.parseDouble(atomSites.getFract_transf_matrix22()), Double.parseDouble(atomSites.getFract_transf_matrix23()), Double.parseDouble(atomSites.getFract_transf_vector2()), 1550 Double.parseDouble(atomSites.getFract_transf_matrix31()), Double.parseDouble(atomSites.getFract_transf_matrix32()), Double.parseDouble(atomSites.getFract_transf_matrix33()), Double.parseDouble(atomSites.getFract_transf_vector3()), 1551 0,0,0,1); 1552 1553 parsedScaleMatrix = m; 1554 1555 } catch (NumberFormatException e) { 1556 logger.warn("Some values in _atom_sites.fract_transf_matrix or _atom_sites.fract_transf_vector could not be parsed as numbers. Can't check whether coordinate frame convention is correct! Error: {}", e.getMessage()); 1557 structure.getPDBHeader().getCrystallographicInfo().setNonStandardCoordFrameConvention(false); 1558 1559 // in this case parsedScaleMatrix stays null and can't be used in documentEnd() 1560 } 1561 } 1562 1563 @Override 1564 public void newStructRef(StructRef sref) { 1565 logger.debug(sref.toString()); 1566 strucRefs.add(sref); 1567 } 1568 1569 private StructRef getStructRef(String ref_id){ 1570 for (StructRef structRef : strucRefs) { 1571 1572 if (structRef.getId().equals(ref_id)){ 1573 return structRef; 1574 } 1575 1576 } 1577 return null; 1578 1579 } 1580 1581 /** 1582 * create a DBRef record from the StrucRefSeq record: 1583 * <pre> 1584 * PDB record DBREF 1585 * Field Name mmCIF Data Item 1586 * Section n.a. 1587 * PDB_ID_Code _struct_ref_seq.pdbx_PDB_id_code 1588 * Strand_ID _struct_ref_seq.pdbx_strand_id 1589 * Begin_Residue_Number _struct_ref_seq.pdbx_auth_seq_align_beg 1590 * Begin_Ins_Code _struct_ref_seq.pdbx_seq_align_beg_ins_code 1591 * End_Residue_Number _struct_ref_seq.pdbx_auth_seq_align_end 1592 * End_Ins_Code _struct_ref_seq.pdbx_seq_align_end_ins_code 1593 * Database _struct_ref.db_name 1594 * Database_Accession_No _struct_ref_seq.pdbx_db_accession 1595 * Database_ID_Code _struct_ref.db_code 1596 * Database_Begin_Residue_Number _struct_ref_seq.db_align_beg 1597 * Databaes_Begin_Ins_Code _struct_ref_seq.pdbx_db_align_beg_ins_code 1598 * Database_End_Residue_Number _struct_ref_seq.db_align_end 1599 * Databaes_End_Ins_Code _struct_ref_seq.pdbx_db_align_end_ins_code 1600 * </pre> 1601 * 1602 * 1603 */ 1604 @Override 1605 public void newStructRefSeq(StructRefSeq sref) { 1606 //if (DEBUG) 1607 // System.out.println(sref); 1608 DBRef r = new DBRef(); 1609 1610 1611 //if (DEBUG) 1612 // System.out.println( " " + sref.getPdbx_PDB_id_code() + " " + sref.getPdbx_db_accession()); 1613 r.setIdCode(sref.getPdbx_PDB_id_code()); 1614 r.setDbAccession(sref.getPdbx_db_accession()); 1615 r.setDbIdCode(sref.getPdbx_db_accession()); 1616 1617 r.setChainId(sref.getPdbx_strand_id()); 1618 StructRef structRef = getStructRef(sref.getRef_id()); 1619 if (structRef == null){ 1620 logger.warn("could not find StructRef " + sref.getRef_id() + " for StructRefSeq " + sref); 1621 } else { 1622 r.setDatabase(structRef.getDb_name()); 1623 r.setDbIdCode(structRef.getDb_code()); 1624 } 1625 1626 1627 int seqbegin = Integer.parseInt(sref.getPdbx_auth_seq_align_beg()); 1628 int seqend = Integer.parseInt(sref.getPdbx_auth_seq_align_end()); 1629 Character begin_ins_code = new Character(sref.getPdbx_seq_align_beg_ins_code().charAt(0)); 1630 Character end_ins_code = new Character(sref.getPdbx_seq_align_end_ins_code().charAt(0)); 1631 1632 if (begin_ins_code == '?') 1633 begin_ins_code = ' '; 1634 1635 if (end_ins_code == '?') 1636 end_ins_code = ' '; 1637 1638 r.setSeqBegin(seqbegin); 1639 r.setInsertBegin(begin_ins_code); 1640 1641 r.setSeqEnd(seqend); 1642 r.setInsertEnd(end_ins_code); 1643 1644 int dbseqbegin = Integer.parseInt(sref.getDb_align_beg()); 1645 int dbseqend = Integer.parseInt(sref.getDb_align_end()); 1646 Character db_begin_in_code = new Character(sref.getPdbx_db_align_beg_ins_code().charAt(0)); 1647 Character db_end_in_code = new Character(sref.getPdbx_db_align_end_ins_code().charAt(0)); 1648 1649 if (db_begin_in_code == '?') 1650 db_begin_in_code = ' '; 1651 1652 if (db_end_in_code == '?') 1653 db_end_in_code = ' '; 1654 1655 1656 r.setDbSeqBegin(dbseqbegin); 1657 r.setIdbnsBegin(db_begin_in_code); 1658 1659 r.setDbSeqEnd(dbseqend); 1660 r.setIdbnsEnd(db_end_in_code); 1661 1662 List<DBRef> dbrefs = structure.getDBRefs(); 1663 if ( dbrefs == null) 1664 dbrefs = new ArrayList<DBRef>(); 1665 dbrefs.add(r); 1666 1667 logger.debug(r.toPDB()); 1668 1669 structure.setDBRefs(dbrefs); 1670 1671 } 1672 1673 @Override 1674 public void newStructRefSeqDif(StructRefSeqDif sref) { 1675 sequenceDifs.add(sref); 1676 } 1677 1678 private Chain getEntityChain(String entity_id){ 1679 1680 for (Chain chain : entityChains) { 1681 if ( chain.getChainID().equals(entity_id)){ 1682 1683 return chain; 1684 } 1685 } 1686 // does not exist yet, so create... 1687 1688 Chain chain = new ChainImpl(); 1689 chain.setChainID(entity_id); 1690 entityChains.add(chain); 1691 1692 return chain; 1693 1694 } 1695 1696 //private Chain getSeqResChain(String chainID){ 1697 // return getChainFromList(seqResChains, chainID); 1698 //} 1699 1700 1701 /** 1702 * Data items in the ENTITY_SRC_GEN category record details of 1703 * the source from which the entity was obtained in cases 1704 * where the source was genetically manipulated. The 1705 * following are treated separately: items pertaining to the tissue 1706 * from which the gene was obtained, items pertaining to the host 1707 * organism for gene expression and items pertaining to the actual 1708 * producing organism (plasmid). 1709 */ 1710 @Override 1711 public void newEntitySrcGen(EntitySrcGen entitySrcGen){ 1712 1713 // add to internal list. Map to Compound object later on... 1714 entitySrcGens.add(entitySrcGen); 1715 } 1716 1717 @Override 1718 public void newEntitySrcNat(EntitySrcNat entitySrcNat){ 1719 1720 // add to internal list. Map to Compound object later on... 1721 entitySrcNats.add(entitySrcNat); 1722 } 1723 1724 @Override 1725 public void newEntitySrcSyn(EntitySrcSyn entitySrcSyn){ 1726 1727 // add to internal list. Map to Compound object later on... 1728 entitySrcSyns.add(entitySrcSyn); 1729 } 1730 1731 /** 1732 * The EntityPolySeq object provide the amino acid sequence objects for the Entities. 1733 * Later on the entities are mapped to the BioJava Chain and Compound objects. 1734 * @param epolseq the EntityPolySeq record for one amino acid 1735 */ 1736 @Override 1737 public void newEntityPolySeq(EntityPolySeq epolseq) { 1738 1739 logger.debug("NEW entity poly seq " + epolseq); 1740 1741 int eId = -1; 1742 try { 1743 eId = Integer.parseInt(epolseq.getEntity_id()); 1744 } catch (NumberFormatException e) { 1745 logger.warn("Could not parse entity id from EntityPolySeq: "+e.getMessage()); 1746 } 1747 Entity e = getEntity(eId); 1748 1749 if (e == null){ 1750 logger.info("Could not find entity "+ epolseq.getEntity_id()+". Can not match sequence to it."); 1751 return; 1752 } 1753 1754 Chain entityChain = getEntityChain(epolseq.getEntity_id()); 1755 1756 1757 // first we check through the chemcomp provider, if it fails we do some heuristics to guess the type of group 1758 // TODO some of this code is analogous to getNewGroup() and we should try to unify them - JD 2016-03-08 1759 1760 Group g = ChemCompGroupFactory.getGroupFromChemCompDictionary(epolseq.getMon_id()); 1761 //int seqId = Integer.parseInt(epolseq.getNum()); 1762 if ( g != null && !g.getChemComp().isEmpty()) { 1763 if ( g instanceof AminoAcidImpl) { 1764 AminoAcidImpl aa = (AminoAcidImpl) g; 1765 aa.setRecordType(AminoAcid.SEQRESRECORD); 1766 //aa.setId(seqId); 1767 } 1768 } else { 1769 1770 if (epolseq.getMon_id().length()==3 && StructureTools.get1LetterCodeAmino(epolseq.getMon_id())!=null){ 1771 AminoAcidImpl a = new AminoAcidImpl(); 1772 a.setRecordType(AminoAcid.SEQRESRECORD); 1773 Character code1 = StructureTools.get1LetterCodeAmino(epolseq.getMon_id()); 1774 a.setAminoType(code1); 1775 g = a; 1776 1777 } else if ( StructureTools.isNucleotide(epolseq.getMon_id())) { 1778 // the group is actually a nucleotide group... 1779 NucleotideImpl n = new NucleotideImpl(); 1780 g = n; 1781 1782 } else { 1783 logger.debug("Residue {} {} is not a standard aminoacid or nucleotide, will create a het group for it", epolseq.getNum(),epolseq.getMon_id()); 1784 HetatomImpl h = new HetatomImpl(); 1785 g = h; 1786 1787 } 1788 1789 1790 } 1791 // at this stage we don't know about author residue numbers (insertion codes) 1792 // we abuse now the ResidueNumber field setting the internal residue numbers (label_seq_id, strictly sequential and follow the seqres sequence 1 to n) 1793 // later the actual ResidueNumbers (author residue numbers) have to be corrected in alignSeqRes() 1794 g.setResidueNumber(ResidueNumber.fromString(epolseq.getNum())); 1795 1796 g.setPDBName(epolseq.getMon_id()); 1797 1798 entityChain.addGroup(g); 1799 1800 } 1801 1802 @Override 1803 public void newPdbxPolySeqScheme(PdbxPolySeqScheme ppss) { 1804 1805 //if ( headerOnly) 1806 // return; 1807 1808 // replace the group asym ids with the real PDB ids! 1809 // replaceGroupSeqPos(ppss); // This might be incorrect in some pdb, to use auth_seq_id of the pdbx_poly_seq_scheme. 1810 1811 // merge the EntityPolySeq info and the AtomSite chains into one... 1812 //already known ignore: 1813 if (asymStrandId.containsKey(ppss.getAsym_id())) 1814 return; 1815 1816 // this is one of the internal mmcif rules it seems... 1817 if ( ppss.getPdb_strand_id() == null) { 1818 asymStrandId.put(ppss.getAsym_id(), ppss.getAuth_mon_id()); 1819 return; 1820 } 1821 1822 //System.out.println(ppss.getAsym_id() + " = " + ppss.getPdb_strand_id()); 1823 1824 asymStrandId.put(ppss.getAsym_id(), ppss.getPdb_strand_id()); 1825 1826 } 1827 1828 1829 @Override 1830 public void newPdbxNonPolyScheme(PdbxNonPolyScheme ppss) { 1831 1832 //if (headerOnly) 1833 // return; 1834 1835 // merge the EntityPolySeq info and the AtomSite chains into one... 1836 //already known ignore: 1837 if (asymStrandId.containsKey(ppss.getAsym_id())) 1838 return; 1839 1840 // this is one of the interal mmcif rules it seems... 1841 if ( ppss.getPdb_strand_id() == null) { 1842 asymStrandId.put(ppss.getAsym_id(), ppss.getAsym_id()); 1843 return; 1844 } 1845 1846 asymStrandId.put(ppss.getAsym_id(), ppss.getPdb_strand_id()); 1847 1848 } 1849 1850 @Override 1851 public void newPdbxEntityNonPoly(PdbxEntityNonPoly pen){ 1852 // TODO: do something with them... 1853 // not implemented yet... 1854 //System.out.println(pen.getEntity_id() + " " + pen.getName() + " " + pen.getComp_id()); 1855 } 1856 1857 @Override 1858 public void newChemComp(ChemComp c) { 1859 // TODO: do something with them... 1860 1861 } 1862 1863 @Override 1864 public void newGenericData(String category, List<String> loopFields, 1865 List<String> lineData) { 1866 1867 //logger.debug("unhandled category so far: " + category); 1868 } 1869 1870 @Override 1871 public FileParsingParameters getFileParsingParameters() 1872 { 1873 return params; 1874 } 1875 1876 @Override 1877 public void setFileParsingParameters(FileParsingParameters params) 1878 { 1879 this.params = params; 1880 1881 } 1882 1883 @Override 1884 public void newChemCompDescriptor(ChemCompDescriptor ccd) { 1885 1886 // TODO nothing happening here yet. 1887 1888 } 1889 1890 1891 1892 public List<PdbxStructOperList> getStructOpers() { 1893 return structOpers; 1894 } 1895 1896 @Override 1897 public void newPdbxStrucAssembly(PdbxStructAssembly strucAssembly) { 1898 strucAssemblies.add(strucAssembly); 1899 1900 } 1901 1902 public List<PdbxStructAssembly> getStructAssemblies(){ 1903 return strucAssemblies; 1904 } 1905 1906 @Override 1907 public void newPdbxStrucAssemblyGen(PdbxStructAssemblyGen strucAssembly) { 1908 strucAssemblyGens.add(strucAssembly); 1909 1910 } 1911 1912 public List<PdbxStructAssemblyGen> getStructAssemblyGens(){ 1913 return strucAssemblyGens; 1914 } 1915 1916 @Override 1917 public void newChemCompAtom(ChemCompAtom atom) { 1918 1919 } 1920 1921 @Override 1922 public void newPdbxChemCompIndentifier(PdbxChemCompIdentifier id) { 1923 1924 } 1925 1926 @Override 1927 public void newChemCompBond(ChemCompBond bond) { 1928 1929 } 1930 1931 @Override 1932 public void newPdbxChemCompDescriptor(PdbxChemCompDescriptor desc) { 1933 1934 } 1935 1936 @Override 1937 public void newStructConn(StructConn structConn) { 1938 this.structConn.add(structConn); 1939 } 1940 1941 @Override 1942 public void newStructSiteGen(StructSiteGen siteGen) { this.structSiteGens.add(siteGen); } 1943 1944 @Override 1945 public void newStructSite(StructSite structSite) { 1946 1947 if (params.isHeaderOnly()) { 1948 return; 1949 } 1950 1951 // Simply implement the method. 1952 List<Site> sites = structure.getSites(); 1953 if (sites == null) sites = new ArrayList<Site>(); 1954 1955 Site site = null; 1956 for (Site asite : sites) { 1957 if (asite.getSiteID().equals(structSite.getId())) { 1958 site = asite; // Prevent duplicate siteIds 1959 } 1960 } 1961 boolean addSite = false; 1962 if (site == null) { site = new Site(); addSite = true; } 1963 site.setSiteID(structSite.getId()); 1964 site.setDescription(structSite.getDetails()); 1965 // site.setPdbxEvidenceCode(structSite.getPdbxEvidenceCode()); // TODO - add addition fields in Sites 1966 if (addSite) sites.add(site); 1967 1968 structure.setSites(sites); 1969 } 1970 1971 /** 1972 * Build sites in a BioJava Structure using the original author chain id & residue numbers. 1973 * Sites are built from struct_site_gen records that have been parsed. 1974 */ 1975 private void addSites() { 1976 List<Site> sites = structure.getSites(); 1977 if (sites == null) sites = new ArrayList<Site>(); 1978 1979 for (StructSiteGen siteGen : structSiteGens) { 1980 // For each StructSiteGen, find the residues involved, if they exist then 1981 String site_id = siteGen.getSite_id(); // multiple could be in same site. 1982 if (site_id == null) site_id = ""; 1983 String comp_id = siteGen.getLabel_comp_id(); // PDBName 1984 // Assumption: the author chain ID and residue number for the site is consistent with the original 1985 // author chain id and residue numbers. 1986 String chain_id; 1987 if (params.isUseInternalChainId()){ 1988 chain_id = siteGen.getLabel_asym_id(); 1989 } 1990 else { 1991 chain_id = siteGen.getAuth_asym_id(); // ChainID 1992 } 1993 String auth_seq_id = siteGen.getAuth_seq_id(); // Res num 1994 1995 String insCode = siteGen.getPdbx_auth_ins_code(); 1996 if ( insCode != null && insCode.equals("?")) 1997 insCode = null; 1998 1999 // Look for asymID = chainID and seqID = seq_ID. Check that comp_id matches the resname. 2000 Group g = null; 2001 try { 2002 Chain chain = structure.getChainByPDB(chain_id); 2003 if (null != chain) { 2004 try { 2005 Character insChar = null; 2006 if (null != insCode && insCode.length() > 0) insChar = insCode.charAt(0); 2007 g = chain.getGroupByPDB(new ResidueNumber(chain_id, Integer.parseInt(auth_seq_id), insChar)); 2008 } catch (NumberFormatException e) { 2009 logger.warn("Could not lookup residue : " + chain_id + auth_seq_id); 2010 } 2011 } 2012 } catch (StructureException e) { 2013 logger.warn("Problem finding residue in site entry " + siteGen.getSite_id() + " - " + e.getMessage(), e.getMessage()); 2014 } 2015 2016 if (g != null) { 2017 // 2. find the site_id, if not existing, create anew. 2018 Site site = null; 2019 for (Site asite: sites) { 2020 if (site_id.equals(asite.getSiteID())) site = asite; 2021 } 2022 2023 boolean addSite = false; 2024 2025 // 3. add this residue to the site. 2026 if (site == null) { 2027 addSite = true; 2028 site = new Site(); 2029 site.setSiteID(site_id); 2030 } 2031 2032 List<Group> groups = site.getGroups(); 2033 if (groups == null) groups = new ArrayList<Group>(); 2034 2035 // Check the self-consistency of the residue reference from auth_seq_id and chain_id 2036 if (!comp_id.equals(g.getPDBName())) { 2037 logger.warn("comp_id doesn't match the residue at " + chain_id + auth_seq_id + " - skipping"); 2038 } else { 2039 groups.add(g); 2040 site.setGroups(groups); 2041 } 2042 if (addSite) sites.add(site); 2043 } 2044 } 2045 structure.setSites(sites); 2046 } 2047}