001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * created at Mar 4, 2008 021 */ 022package org.biojava.nbio.structure.io.mmcif; 023 024import java.io.BufferedReader; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.InputStreamReader; 028import java.lang.reflect.Field; 029import java.lang.reflect.InvocationTargetException; 030import java.lang.reflect.Method; 031import java.util.ArrayList; 032import java.util.HashMap; 033import java.util.HashSet; 034import java.util.List; 035import java.util.Map; 036import java.util.Set; 037 038 039import org.biojava.nbio.structure.Structure; 040import org.biojava.nbio.structure.io.MMCIFFileReader; 041import org.biojava.nbio.structure.io.StructureIOFile; 042import org.biojava.nbio.structure.io.mmcif.model.AtomSite; 043import org.biojava.nbio.structure.io.mmcif.model.AtomSites; 044import org.biojava.nbio.structure.io.mmcif.model.AuditAuthor; 045import org.biojava.nbio.structure.io.mmcif.model.CIFLabel; 046import org.biojava.nbio.structure.io.mmcif.model.Cell; 047import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 048import org.biojava.nbio.structure.io.mmcif.model.ChemCompAtom; 049import org.biojava.nbio.structure.io.mmcif.model.ChemCompBond; 050import org.biojava.nbio.structure.io.mmcif.model.ChemCompDescriptor; 051import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBremark; 052import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBrev; 053import org.biojava.nbio.structure.io.mmcif.model.DatabasePdbrevRecord; 054import org.biojava.nbio.structure.io.mmcif.model.Entity; 055import org.biojava.nbio.structure.io.mmcif.model.EntityPolySeq; 056import org.biojava.nbio.structure.io.mmcif.model.EntitySrcGen; 057import org.biojava.nbio.structure.io.mmcif.model.EntitySrcNat; 058import org.biojava.nbio.structure.io.mmcif.model.EntitySrcSyn; 059import org.biojava.nbio.structure.io.mmcif.model.Exptl; 060import org.biojava.nbio.structure.io.mmcif.model.IgnoreField; 061import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompDescriptor; 062import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompIdentifier; 063import org.biojava.nbio.structure.io.mmcif.model.PdbxEntityNonPoly; 064import org.biojava.nbio.structure.io.mmcif.model.PdbxNonPolyScheme; 065import org.biojava.nbio.structure.io.mmcif.model.PdbxPolySeqScheme; 066import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssembly; 067import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssemblyGen; 068import org.biojava.nbio.structure.io.mmcif.model.PdbxStructOperList; 069import org.biojava.nbio.structure.io.mmcif.model.Refine; 070import org.biojava.nbio.structure.io.mmcif.model.Struct; 071import org.biojava.nbio.structure.io.mmcif.model.StructAsym; 072import org.biojava.nbio.structure.io.mmcif.model.StructConn; 073import org.biojava.nbio.structure.io.mmcif.model.StructKeywords; 074import org.biojava.nbio.structure.io.mmcif.model.StructNcsOper; 075import org.biojava.nbio.structure.io.mmcif.model.StructRef; 076import org.biojava.nbio.structure.io.mmcif.model.StructRefSeq; 077import org.biojava.nbio.structure.io.mmcif.model.StructRefSeqDif; 078import org.biojava.nbio.structure.io.mmcif.model.StructSite; 079import org.biojava.nbio.structure.io.mmcif.model.StructSiteGen; 080import org.biojava.nbio.structure.io.mmcif.model.Symmetry; 081import org.slf4j.Logger; 082import org.slf4j.LoggerFactory; 083 084/** 085 * A simple mmCif file parser 086 * 087 * 088 * Usage: 089 * <pre> 090String file = "path/to/mmcif/file"; 091StructureIOFile pdbreader = new MMCIFFileReader(); 092 093Structure s = pdbreader.getStructure(file); 094System.out.println(s); 095 096// you can convert it to a PDB file... 097System.out.println(s.toPDB()); 098 099 * </pre> 100 * For more documentation see <a href="http://biojava.org/wiki/BioJava:CookBook#Protein_Structure">http://biojava.org/wiki/BioJava:CookBook#Protein_Structure</a>. 101 * 102 * @author Andreas Prlic 103 * @author Jose Duarte 104 * @since 1.7 105 */ 106public class SimpleMMcifParser implements MMcifParser { 107 108 109 110 /** 111 * The header appearing at the beginning of a mmCIF file. 112 * A "block code" can be added to it of no more than 32 chars. 113 * See http://www.iucr.org/__data/assets/pdf_file/0019/22618/cifguide.pdf 114 */ 115 public static final String MMCIF_TOP_HEADER = "data_"; 116 117 public static final String COMMENT_CHAR = "#"; 118 public static final String LOOP_START = "loop_"; 119 public static final String FIELD_LINE = "_"; 120 121 // the following are the 3 valid quoting characters in CIF 122 /** 123 * Quoting character ' 124 */ 125 private static final char S1 = '\''; 126 127 /** 128 * Quoting character " 129 */ 130 private static final char S2 = '\"'; 131 132 /** 133 * Quoting character ; (multi-line quoting) 134 */ 135 public static final String STRING_LIMIT = ";"; 136 137 138 private List<MMcifConsumer> consumers ; 139 140 private Struct struct ; 141 142 private static final Logger logger = LoggerFactory.getLogger(SimpleMMcifParser.class); 143 144 public SimpleMMcifParser(){ 145 consumers = new ArrayList<MMcifConsumer>(); 146 struct = null; 147 } 148 149 @Override 150 public void addMMcifConsumer(MMcifConsumer consumer) { 151 consumers.add(consumer); 152 153 } 154 155 @Override 156 public void clearConsumers() { 157 consumers.clear(); 158 159 } 160 161 @Override 162 public void removeMMcifConsumer(MMcifConsumer consumer) { 163 consumers.remove(consumer); 164 } 165 166 public static void main(String[] args){ 167 String file = "/Users/andreas/WORK/PDB/mmCif/a9/1a9n.cif.gz"; 168 //String file = "/Users/andreas/WORK/PDB/MMCIF/1gav.mmcif"; 169 //String file = "/Users/andreas/WORK/PDB/MMCIF/100d.cif"; 170 //String file = "/Users/andreas/WORK/PDB/MMCIF/1a4a.mmcif"; 171 System.out.println("parsing " + file); 172 173 StructureIOFile pdbreader = new MMCIFFileReader(); 174 try { 175 Structure s = pdbreader.getStructure(file); 176 System.out.println(s); 177 // convert it to a PDB file... 178 System.out.println(s.toPDB()); 179 } catch (IOException e) { 180 e.printStackTrace(); 181 } 182 183 } 184 185 @Override 186 public void parse(InputStream inStream) throws IOException { 187 parse(new BufferedReader(new InputStreamReader(inStream))); 188 189 } 190 191 @Override 192 public void parse(BufferedReader buf) 193 throws IOException { 194 195 triggerDocumentStart(); 196 197 198 // init container objects... 199 struct = new Struct(); 200 String line = null; 201 202 boolean inLoop = false; 203 boolean inLoopData = false; 204 205 206 List<String> loopFields = new ArrayList<String>(); 207 List<String> lineData = new ArrayList<String>(); 208 Set<String> loopWarnings = new HashSet<String>(); // used only to reduce logging statements 209 210 String category = null; 211 212 213 // the first line is a data_PDBCODE line, test if this looks like a mmcif file 214 line = buf.readLine(); 215 if (line == null || !line.startsWith(MMCIF_TOP_HEADER)){ 216 logger.error("This does not look like a valid mmCIF file! The first line should start with 'data_', but is: '" + line+"'"); 217 triggerDocumentEnd(); 218 return; 219 } 220 221 while ( (line = buf.readLine ()) != null ){ 222 223 if (line.isEmpty() || line.startsWith(COMMENT_CHAR)) continue; 224 225 logger.debug(inLoop + " " + line); 226 227 if (line.startsWith(MMCIF_TOP_HEADER)){ 228 // either first line in file, or beginning of new section 229 if ( inLoop) { 230 //System.out.println("new data and in loop: " + line); 231 inLoop = false; 232 inLoopData = false; 233 lineData.clear(); 234 loopFields.clear(); 235 } 236 237 } 238 239 240 if ( inLoop) { 241 242 243 if ( line.startsWith(LOOP_START)){ 244 loopFields.clear(); 245 inLoop = true; 246 inLoopData = false; 247 continue; 248 } 249 250 if ( line.matches("\\s*"+FIELD_LINE+"\\w+.*")) { 251 252 if (inLoopData && line.startsWith(FIELD_LINE)) { 253 logger.debug("Found a field line after reading loop data. Toggling to inLoop=false"); 254 inLoop = false; 255 inLoopData = false; 256 loopFields.clear(); 257 258 259 // a boring normal line 260 List<String> data = processLine(line, buf, 2); 261 262 if ( data.size() < 1){ 263 // this can happen if empty lines at end of file 264 lineData.clear(); 265 continue; 266 } 267 String key = data.get(0); 268 int pos = key.indexOf("."); 269 if ( pos < 0 ) { 270 // looks like a chem_comp file 271 // line should start with data, otherwise something is wrong! 272 if (! line.startsWith(MMCIF_TOP_HEADER)){ 273 logger.warn("This does not look like a valid mmCIF file! The first line should start with 'data_', but is '" + line+"'"); 274 triggerDocumentEnd(); 275 return; 276 } 277 // ignore the first line... 278 category=null; 279 lineData.clear(); 280 continue; 281 } 282 category = key.substring(0,pos); 283 String value = data.get(1); 284 loopFields.add(key.substring(pos+1,key.length())); 285 lineData.add(value); 286 287 logger.debug("Found data for category {}: {}", key, value); 288 continue; 289 } 290 291 // found another field. 292 String txt = line.trim(); 293 if ( txt.indexOf('.') > -1){ 294 295 String[] spl = txt.split("\\."); 296 category = spl[0]; 297 String attribute = spl[1]; 298 loopFields.add(attribute); 299 logger.debug("Found category: {}, attribute: {}",category, attribute); 300 if ( spl.length > 2){ 301 logger.warn("Found nested attribute in {}, not supported yet!",txt); 302 } 303 304 } else { 305 category = txt; 306 logger.debug("Found category without attribute: {}",category); 307 } 308 309 310 } else { 311 312 // in loop and we found a data line 313 lineData = processLine(line, buf, loopFields.size()); 314 logger.debug("Found a loop data line with {} data fields", lineData.size()); 315 logger.debug("Data fields: {}", lineData.toString()); 316 if ( lineData.size() != loopFields.size()){ 317 logger.warn("Expected {} data fields, but found {} in line: {}",loopFields.size(),lineData.size(),line); 318 319 } 320 321 endLineChecks(category, loopFields, lineData, loopWarnings); 322 323 lineData.clear(); 324 325 inLoopData = true; 326 } 327 328 } else { 329 // not in loop 330 331 if ( line.startsWith(LOOP_START)){ 332 if ( category != null) 333 endLineChecks(category, loopFields, lineData, loopWarnings); 334 335 resetBuffers(loopFields, lineData, loopWarnings); 336 category = null; 337 inLoop = true; 338 inLoopData = false; 339 logger.debug("Detected LOOP_START: '{}'. Toggling to inLoop=true", LOOP_START); 340 continue; 341 } else { 342 logger.debug("Normal line "); 343 inLoop = false; 344 345 // a boring normal line 346 List<String> data = processLine(line, buf, 2); 347 348 if ( data.size() < 1){ 349 // this can happen if empty lines at end of file 350 lineData.clear(); 351 continue; 352 } 353 String key = data.get(0); 354 int pos = key.indexOf("."); 355 if ( pos < 0 ) { 356 // looks like a chem_comp file 357 // line should start with data, otherwise something is wrong! 358 if (! line.startsWith(MMCIF_TOP_HEADER)){ 359 logger.warn("This does not look like a valid mmCIF file! The first line should start with 'data_', but is '" + line+"'"); 360 triggerDocumentEnd(); 361 return; 362 } 363 // ignore the first line... 364 category=null; 365 lineData.clear(); 366 continue; 367 } 368 369 if (category!=null && !key.substring(0,pos).equals(category)) { 370 // we've changed category: need to flush the previous one 371 endLineChecks(category, loopFields, lineData, loopWarnings); 372 resetBuffers(loopFields, lineData, loopWarnings); 373 } 374 375 category = key.substring(0,pos); 376 377 String value = data.get(1); 378 loopFields.add(key.substring(pos+1,key.length())); 379 lineData.add(value); 380 381 logger.debug("Found data for category {}: {}", key, value); 382 383 } 384 } 385 } 386 387 if (category!=null && lineData.size()>0 && lineData.size()==loopFields.size()) { 388 // the last category in the file will still be missing, we add it now 389 endLineChecks(category, loopFields, lineData, loopWarnings); 390 resetBuffers(loopFields, lineData, loopWarnings); 391 } 392 393 if (struct != null){ 394 triggerStructData(struct); 395 } 396 397 triggerDocumentEnd(); 398 399 } 400 401 private void resetBuffers(List<String> loopFields, List<String> lineData, Set<String> loopWarnings) { 402 loopFields.clear(); 403 lineData.clear(); 404 loopWarnings.clear(); 405 } 406 407 private List<String> processSingleLine(String line){ 408 409 List<String> data = new ArrayList<String>(); 410 411 if ( line.trim().length() == 0){ 412 return data; 413 } 414 415 if ( line.trim().length() == 1){ 416 if ( line.startsWith(STRING_LIMIT)) 417 return data; 418 } 419 boolean inString = false; // semicolon (;) quoting 420 boolean inS1 = false; // single quote (') quoting 421 boolean inS2 = false; // double quote (") quoting 422 String word = ""; 423 424 for (int i=0; i< line.length(); i++ ){ 425 426 Character c = line.charAt(i); 427 428 Character nextC = null; 429 if (i < line.length() - 1) 430 nextC = line.charAt(i+1); 431 432 Character prevC = null; 433 if (i>0) 434 prevC = line.charAt(i-1); 435 436 if (c == ' ') { 437 438 if ( ! inString){ 439 if ( ! word.equals("")) 440 data.add(word.trim()); 441 word = ""; 442 } else { 443 // we are in a string, add the space 444 word += c; 445 } 446 447 } else if (c == S1 ) { 448 449 if ( inString){ 450 451 boolean wordEnd = false; 452 if (! inS2) { 453 if (nextC==null || Character.isWhitespace(nextC)){ 454 i++; 455 wordEnd = true; 456 } 457 } 458 459 460 if ( wordEnd ) { 461 462 // at end of string 463 if ( ! word.equals("")) 464 data.add(word.trim()); 465 word = ""; 466 inString = false; 467 inS1 = false; 468 } else { 469 word += c; 470 } 471 472 } else if (prevC==null || prevC==' ') { 473 // the beginning of a new string 474 inString = true; 475 inS1 = true; 476 } else { 477 word += c; 478 } 479 } else if ( c == S2 ){ 480 if ( inString){ 481 482 boolean wordEnd = false; 483 if (! inS1) { 484 if (nextC==null || Character.isWhitespace(nextC)){ 485 i++; 486 wordEnd = true; 487 } 488 } 489 490 if ( wordEnd ) { 491 492 // at end of string 493 if ( ! word.equals("")) 494 data.add(word.trim()); 495 word = ""; 496 inString = false; 497 inS2 = false; 498 } else { 499 word += c; 500 } 501 } else if (prevC==null || prevC==' ') { 502 // the beginning of a new string 503 inString = true; 504 inS2 = true; 505 } else { 506 word += c; 507 } 508 } else { 509 word += c; 510 } 511 512 } 513 if ( ! word.trim().equals("")) 514 data.add(word); 515 516 517 return data; 518 519 } 520 521 /** 522 * Get the content of a cif entry 523 * 524 * @param line 525 * @param buf 526 * @return 527 */ 528 private List<String> processLine(String line, 529 BufferedReader buf, 530 int fieldLength) 531 throws IOException{ 532 533 //System.out.println("XX processLine " + fieldLength + " " + line); 534 // go through the line and process each character 535 List<String> lineData = new ArrayList<String>(); 536 537 boolean inString = false; 538 539 StringBuilder bigWord = null; 540 541 while ( true ){ 542 543 if ( line.startsWith(STRING_LIMIT)){ 544 if (! inString){ 545 546 inString = true; 547 if ( line.length() > 1) 548 bigWord = new StringBuilder(line.substring(1)); 549 else 550 bigWord = new StringBuilder(""); 551 552 553 } else { 554 // the end of a word 555 lineData.add(bigWord.toString()); 556 bigWord = null; 557 inString = false; 558 559 } 560 } else { 561 if ( inString ) 562 bigWord.append(line); 563 else { 564 565 List<String> dat = processSingleLine(line); 566 567 for (String d : dat){ 568 lineData.add(d); 569 } 570 } 571 } 572 573 //System.out.println("in process line : " + lineData.size() + " " + fieldLength); 574 575 if ( lineData.size() > fieldLength){ 576 577 logger.warn("wrong data length ("+lineData.size()+ 578 ") should be ("+fieldLength+") at line " + line + " got lineData: " + lineData); 579 return lineData; 580 } 581 582 if ( lineData.size() == fieldLength) 583 return lineData; 584 585 586 line = buf.readLine(); 587 if ( line == null) 588 break; 589 } 590 return lineData; 591 592 } 593 594 595 596 private void endLineChecks(String category,List<String> loopFields, List<String> lineData, Set<String> loopWarnings ) throws IOException{ 597 598 logger.debug("Processing category {}, with fields: {}",category,loopFields.toString()); 599 // System.out.println("parsed the following data: " +category + " fields: "+ 600 // loopFields + " DATA: " + 601 // lineData); 602 603 if ( loopFields.size() != lineData.size()){ 604 logger.warn("looks like we got a problem with nested string quote characters:"); 605 throw new IOException("data length ("+ lineData.size() + 606 ") != fields length ("+loopFields.size()+ 607 ") category: " +category + " fields: "+ 608 loopFields + " DATA: " + 609 lineData ); 610 } 611 612 if ( category.equals("_entity")){ 613 614 Entity e = (Entity) buildObject( 615 Entity.class.getName(), 616 loopFields,lineData, loopWarnings); 617 triggerNewEntity(e); 618 619 } else if ( category.equals("_struct")){ 620 621 struct = (Struct) buildObject( 622 Struct.class.getName(), 623 loopFields, lineData, loopWarnings); 624 625 } else if ( category.equals("_atom_site")){ 626 627 AtomSite a = (AtomSite) buildObject( 628 AtomSite.class.getName(), 629 loopFields, lineData, loopWarnings); 630 triggerNewAtomSite(a); 631 632 } else if ( category.equals("_database_PDB_rev")){ 633 DatabasePDBrev dbrev = (DatabasePDBrev) buildObject( 634 DatabasePDBrev.class.getName(), 635 loopFields, lineData, loopWarnings); 636 637 triggerNewDatabasePDBrev(dbrev); 638 639 } else if ( category.equals("_database_PDB_rev_record")){ 640 DatabasePdbrevRecord dbrev = (DatabasePdbrevRecord) buildObject( 641 DatabasePdbrevRecord.class.getName(), 642 loopFields, lineData, loopWarnings); 643 644 triggerNewDatabasePDBrevRecord(dbrev); 645 646 }else if ( category.equals("_database_PDB_remark")){ 647 DatabasePDBremark remark = (DatabasePDBremark) buildObject( 648 DatabasePDBremark.class.getName(), 649 loopFields, lineData, loopWarnings); 650 651 triggerNewDatabasePDBremark(remark); 652 653 } else if ( category.equals("_exptl")){ 654 Exptl exptl = (Exptl) buildObject( 655 Exptl.class.getName(), 656 loopFields,lineData, loopWarnings); 657 658 triggerExptl(exptl); 659 660 } else if ( category.equals("_cell")){ 661 Cell cell = (Cell) buildObject( 662 Cell.class.getName(), 663 loopFields,lineData, loopWarnings); 664 665 triggerNewCell(cell); 666 667 } else if ( category.equals("_symmetry")){ 668 Symmetry symmetry = (Symmetry) buildObject( 669 Symmetry.class.getName(), 670 loopFields,lineData, loopWarnings); 671 672 triggerNewSymmetry(symmetry); 673 } else if ( category.equals("_struct_ncs_oper")) { 674 675 StructNcsOper sNcsOper = (StructNcsOper) buildObject( 676 StructNcsOper.class.getName(), 677 loopFields, lineData, loopWarnings); 678 triggerNewStructNcsOper(sNcsOper); 679 } else if ( category.equals("_atom_sites")) { 680 681 AtomSites atomSites = (AtomSites) buildObject( 682 AtomSites.class.getName(), 683 loopFields, lineData, loopWarnings); 684 triggerNewAtomSites(atomSites); 685 686 } else if ( category.equals("_struct_ref")){ 687 StructRef sref = (StructRef) buildObject( 688 StructRef.class.getName(), 689 loopFields,lineData, loopWarnings); 690 691 triggerNewStrucRef(sref); 692 693 } else if ( category.equals("_struct_ref_seq")){ 694 StructRefSeq sref = (StructRefSeq) buildObject( 695 StructRefSeq.class.getName(), 696 loopFields,lineData, loopWarnings); 697 698 triggerNewStrucRefSeq(sref); 699 } else if ( category.equals("_struct_ref_seq_dif")) { 700 StructRefSeqDif sref = (StructRefSeqDif) buildObject( 701 StructRefSeqDif.class.getName(), 702 loopFields, lineData, loopWarnings); 703 704 triggerNewStrucRefSeqDif(sref); 705 } else if ( category.equals("_struct_site_gen")) { 706 StructSiteGen sref = (StructSiteGen) buildObject( 707 StructSiteGen.class.getName(), 708 loopFields, lineData, loopWarnings); 709 710 triggerNewStructSiteGen(sref); 711 } else if ( category.equals("_struct_site")) { 712 StructSite sref = (StructSite) buildObject( 713 StructSite.class.getName(), 714 loopFields, lineData, loopWarnings); 715 triggerNewStructSite(sref); 716 } else if ( category.equals("_entity_poly_seq")){ 717 EntityPolySeq exptl = (EntityPolySeq) buildObject( 718 EntityPolySeq.class.getName(), 719 loopFields,lineData, loopWarnings); 720 721 triggerNewEntityPolySeq(exptl); 722 } else if ( category.equals("_entity_src_gen")){ 723 EntitySrcGen entitySrcGen = (EntitySrcGen) buildObject( 724 EntitySrcGen.class.getName(), 725 loopFields,lineData, loopWarnings); 726 triggerNewEntitySrcGen(entitySrcGen); 727 } else if ( category.equals("_entity_src_nat")){ 728 EntitySrcNat entitySrcNat = (EntitySrcNat) buildObject( 729 EntitySrcNat.class.getName(), 730 loopFields,lineData, loopWarnings); 731 triggerNewEntitySrcNat(entitySrcNat); 732 } else if ( category.equals("_pdbx_entity_src_syn")){ 733 EntitySrcSyn entitySrcSyn = (EntitySrcSyn) buildObject( 734 EntitySrcSyn.class.getName(), 735 loopFields,lineData, loopWarnings); 736 triggerNewEntitySrcSyn(entitySrcSyn); 737 } else if ( category.equals("_struct_asym")){ 738 StructAsym sasym = (StructAsym) buildObject( 739 StructAsym.class.getName(), 740 loopFields,lineData, loopWarnings); 741 742 triggerNewStructAsym(sasym); 743 744 } else if ( category.equals("_pdbx_poly_seq_scheme")){ 745 PdbxPolySeqScheme ppss = (PdbxPolySeqScheme) buildObject( 746 PdbxPolySeqScheme.class.getName(), 747 loopFields,lineData, loopWarnings); 748 749 triggerNewPdbxPolySeqScheme(ppss); 750 751 } else if ( category.equals("_pdbx_nonpoly_scheme")){ 752 PdbxNonPolyScheme ppss = (PdbxNonPolyScheme) buildObject( 753 PdbxNonPolyScheme.class.getName(), 754 loopFields,lineData, loopWarnings); 755 756 triggerNewPdbxNonPolyScheme(ppss); 757 758 } else if ( category.equals("_pdbx_entity_nonpoly")){ 759 PdbxEntityNonPoly pen = (PdbxEntityNonPoly) buildObject( 760 PdbxEntityNonPoly.class.getName(), 761 loopFields,lineData, loopWarnings 762 ); 763 triggerNewPdbxEntityNonPoly(pen); 764 } else if ( category.equals("_struct_keywords")){ 765 StructKeywords kw = (StructKeywords)buildObject( 766 StructKeywords.class.getName(), 767 loopFields,lineData, loopWarnings 768 ); 769 triggerNewStructKeywords(kw); 770 } else if (category.equals("_refine")){ 771 Refine r = (Refine)buildObject( 772 Refine.class.getName(), 773 loopFields,lineData, loopWarnings 774 ); 775 triggerNewRefine(r); 776 } else if (category.equals("_chem_comp")){ 777 ChemComp c = (ChemComp)buildObject( 778 ChemComp.class.getName(), 779 loopFields, lineData, loopWarnings 780 ); 781 triggerNewChemComp(c); 782 } else if (category.equals("_audit_author")) { 783 AuditAuthor aa = (AuditAuthor)buildObject( 784 AuditAuthor.class.getName(), 785 loopFields, lineData, loopWarnings); 786 triggerNewAuditAuthor(aa); 787 } else if (category.equals("_pdbx_chem_comp_descriptor")) { 788 ChemCompDescriptor ccd = (ChemCompDescriptor) buildObject( 789 ChemCompDescriptor.class.getName(), 790 loopFields, lineData, loopWarnings); 791 triggerNewChemCompDescriptor(ccd); 792 } else if (category.equals("_pdbx_struct_oper_list")) { 793 794 PdbxStructOperList structOper = (PdbxStructOperList) buildObject( 795 PdbxStructOperList.class.getName(), 796 loopFields, lineData, loopWarnings 797 ); 798 triggerNewPdbxStructOper(structOper); 799 800 } else if (category.equals("_pdbx_struct_assembly")) { 801 PdbxStructAssembly sa = (PdbxStructAssembly) buildObject( 802 PdbxStructAssembly.class.getName(), 803 loopFields, lineData, loopWarnings); 804 triggerNewPdbxStructAssembly(sa); 805 806 } else if (category.equals("_pdbx_struct_assembly_gen")) { 807 PdbxStructAssemblyGen sa = (PdbxStructAssemblyGen) buildObject( 808 PdbxStructAssemblyGen.class.getName(), 809 loopFields, lineData, loopWarnings); 810 triggerNewPdbxStructAssemblyGen(sa); 811 } else if ( category.equals("_chem_comp_atom")){ 812 ChemCompAtom atom = (ChemCompAtom)buildObject( 813 ChemCompAtom.class.getName(), 814 loopFields,lineData, loopWarnings); 815 triggerNewChemCompAtom(atom); 816 817 }else if ( category.equals("_chem_comp_bond")){ 818 ChemCompBond bond = (ChemCompBond)buildObject( 819 ChemCompBond.class.getName(), 820 loopFields,lineData, loopWarnings); 821 triggerNewChemCompBond(bond); 822 } else if ( category.equals("_pdbx_chem_comp_identifier")){ 823 PdbxChemCompIdentifier id = (PdbxChemCompIdentifier)buildObject( 824 PdbxChemCompIdentifier.class.getName(), 825 loopFields,lineData, loopWarnings); 826 triggerNewPdbxChemCompIdentifier(id); 827 } else if ( category.equals("_pdbx_chem_comp_descriptor")){ 828 PdbxChemCompDescriptor id = (PdbxChemCompDescriptor)buildObject( 829 PdbxChemCompDescriptor.class.getName(), 830 loopFields,lineData, loopWarnings); 831 triggerNewPdbxChemCompDescriptor(id); 832 } else if ( category.equals("_struct_conn")){ 833 StructConn id = (StructConn)buildObject( 834 StructConn.class.getName(), 835 loopFields,lineData, loopWarnings); 836 triggerNewStructConn(id); 837 838 } else { 839 840 logger.debug("Using a generic bean for category {}",category); 841 842 // trigger a generic bean that can deal with all missing data types... 843 triggerGeneric(category,loopFields,lineData); 844 } 845 846 847 } 848 849 850// private PdbxStructOperList getPdbxStructOperList(List<String> loopFields, 851// List<String> lineData) { 852// PdbxStructOperList so = new PdbxStructOperList(); 853// 854// //System.out.println(loopFields); 855// //System.out.println(lineData); 856// 857// String id = lineData.get(loopFields.indexOf("id")); 858// so.setId(id); 859// so.setType(lineData.get(loopFields.indexOf("type"))); 860// Matrix matrix = new Matrix(3,3); 861// for (int i = 1 ; i <=3 ; i++){ 862// for (int j =1 ; j <= 3 ; j++){ 863// String max = String.format("matrix[%d][%d]",j,i); 864// 865// String val = lineData.get(loopFields.indexOf(max)); 866// Double d = Double.parseDouble(val); 867// matrix.set(j-1,i-1,d); 868// // matrix.set(i-1,j-1,d); 869// } 870// } 871// 872// double[] coords =new double[3]; 873// 874// for ( int i = 1; i <=3 ; i++){ 875// String v = String.format("vector[%d]",i); 876// String val = lineData.get(loopFields.indexOf(v)); 877// Double d = Double.parseDouble(val); 878// coords[i-1] = d; 879// } 880// 881// so.setMatrix(matrix); 882// so.setVector(coords); 883// 884// 885// 886// return so; 887// } 888 889 public void triggerNewPdbxStructOper(PdbxStructOperList structOper) { 890 for(MMcifConsumer c : consumers){ 891 c.newPdbxStructOperList(structOper); 892 } 893 894 } 895 896 public void triggerNewStructNcsOper(StructNcsOper sNcsOper) { 897 for(MMcifConsumer c : consumers){ 898 c.newStructNcsOper(sNcsOper); 899 } 900 901 } 902 903 public void triggerNewAtomSites(AtomSites atomSites) { 904 for(MMcifConsumer c : consumers){ 905 c.newAtomSites(atomSites); 906 } 907 } 908 909 /** 910 * Populates a bean object from the {@link org.biojava.nbio.structure.io.mmcif.model} package, 911 * from the data read from a CIF file. 912 * It uses reflection to lookup the field and setter method names given the category 913 * found in the CIF file. 914 * <p> 915 * Due to limitations in variable names in java, not all fields can have names 916 * exactly as defined in the CIF categories. In those cases the {@link CIFLabel} tag 917 * can be used in the field names to give the appropriate name that corresponds to the 918 * CIF category, which is the name that will be then looked up here. 919 * The {@link IgnoreField} tag can also be used to exclude fields from being looked up. 920 * @param className 921 * @param loopFields 922 * @param lineData 923 * @param warnings 924 * @return 925 */ 926 private Object buildObject(String className, List<String> loopFields, List<String> lineData, Set<String> warnings) { 927 928 Object o = null; 929 Class<?> c = null; 930 931 try { 932 // build up the Entity object from the line data... 933 c = Class.forName(className); 934 935 o = c.newInstance(); 936 937 } catch (InstantiationException|ClassNotFoundException|IllegalAccessException e){ 938 logger.error( "Error while constructing {}: {}", className, e.getMessage()); 939 return null; 940 } 941 942 // these methods get the fields but also looking at the IgnoreField and CIFLabel annotations 943 Field[] fields = MMCIFFileTools.getFields(c); 944 String[] names = MMCIFFileTools.getFieldNames(fields); 945 946 // let's build a map of all methods so that we can look up the setter methods later 947 Method[] methods = c.getMethods(); 948 949 Map<String,Method> methodMap = new HashMap<String, Method>(); 950 for (Method m : methods) { 951 methodMap.put(m.getName(),m); 952 } 953 954 // and a map of all the fields so that we can lookup them up later 955 Map<String, Field> names2fields = new HashMap<>(); 956 for (int i=0;i<fields.length;i++) { 957 names2fields.put(names[i], fields[i]); 958 } 959 960 int pos = -1 ; 961 for (String key: loopFields){ 962 pos++; 963 964 String val = lineData.get(pos); 965 966 // we first start looking up the field which can be annotated with a CIFLabel if they 967 // need alternative names (e.g. for field _symmetry.space_group_name_H-M, since hyphen is not allowed in var names in java) 968 Field field = names2fields.get(key); 969 970 if (field == null) { 971 produceWarning(key, val, c, warnings); 972 continue; 973 } 974 // now we need to find the corresponding setter 975 // note that we can't use the field directly and then call Field.set() because many setters 976 // have more functionality than just setting the value (e.g. some setters in ChemComp) 977 978 // building up the setter method name: need to upper case the first letter, leave the rest untouched 979 String setterMethodName = "set" + field.getName().substring(0,1).toUpperCase() + field.getName().substring(1, field.getName().length()); 980 981 Method setter = methodMap.get(setterMethodName); 982 983 if (setter==null) { 984 produceWarning(key, val, c, warnings); 985 continue; 986 } 987 988 989 990 // now we populate the object with the values by invoking the corresponding setter method, 991 // note that all of the mmCif container classes have only one argument (they are beans) 992 Class<?>[] pType = setter.getParameterTypes(); 993 994 995 try { 996 if ( pType[0].getName().equals(Integer.class.getName())) { 997 if ( val != null && ! val.equals("?") && !val.equals(".")) { 998 999 Integer intVal = Integer.parseInt(val); 1000 setter.invoke(o, intVal); 1001 1002 } 1003 } else { 1004 // default val is a String 1005 setter.invoke(o, val); 1006 } 1007 } catch (IllegalAccessException|InvocationTargetException e) { 1008 logger.error("Could not invoke setter {} with value {} for class {}", setterMethodName, val, className); 1009 } 1010 1011 } 1012 1013 return o; 1014 } 1015 1016 private void produceWarning(String key, String val, Class<?> c, Set<String> warnings) { 1017 1018 String warning = "Trying to set field " + key + " in "+ c.getName() +" found in file, but no corresponding field could be found in model class (value:" + val + ")"; 1019 String warnkey = key+"-"+c.getName(); 1020 // Suppress duplicate warnings or attempts to store empty data 1021 if( val.equals("?") || val.equals(".") || ( warnings != null && warnings.contains(warnkey)) ) { 1022 logger.debug(warning); 1023 } else { 1024 logger.warn(warning); 1025 } 1026 1027 if(warnings != null) { 1028 warnings.add(warnkey); 1029 } 1030 1031 } 1032 1033 public void triggerGeneric(String category, List<String> loopFields, List<String> lineData){ 1034 for(MMcifConsumer c : consumers){ 1035 c.newGenericData(category, loopFields, lineData); 1036 } 1037 } 1038 1039 public void triggerNewEntity(Entity entity){ 1040 for(MMcifConsumer c : consumers){ 1041 c.newEntity(entity); 1042 } 1043 } 1044 1045 public void triggerNewEntityPolySeq(EntityPolySeq epolseq){ 1046 for(MMcifConsumer c : consumers){ 1047 c.newEntityPolySeq(epolseq); 1048 } 1049 } 1050 public void triggerNewEntitySrcGen(EntitySrcGen entitySrcGen){ 1051 for(MMcifConsumer c : consumers){ 1052 c.newEntitySrcGen(entitySrcGen); 1053 } 1054 } 1055 public void triggerNewEntitySrcNat(EntitySrcNat entitySrcNat){ 1056 for(MMcifConsumer c : consumers){ 1057 c.newEntitySrcNat(entitySrcNat); 1058 } 1059 } 1060 public void triggerNewEntitySrcSyn(EntitySrcSyn entitySrcSyn){ 1061 for(MMcifConsumer c : consumers){ 1062 c.newEntitySrcSyn(entitySrcSyn); 1063 } 1064 } 1065 public void triggerNewChemComp(ChemComp cc){ 1066 1067 for(MMcifConsumer c : consumers){ 1068 c.newChemComp(cc); 1069 } 1070 } 1071 public void triggerNewStructAsym(StructAsym sasym){ 1072 for(MMcifConsumer c : consumers){ 1073 c.newStructAsym(sasym); 1074 } 1075 } 1076 1077 private void triggerStructData(Struct struct){ 1078 for(MMcifConsumer c : consumers){ 1079 c.setStruct(struct); 1080 } 1081 } 1082 1083 private void triggerNewAtomSite(AtomSite atom){ 1084 for(MMcifConsumer c : consumers){ 1085 c.newAtomSite(atom); 1086 } 1087 } 1088 1089 private void triggerNewAuditAuthor(AuditAuthor aa){ 1090 for(MMcifConsumer c : consumers){ 1091 c.newAuditAuthor(aa); 1092 } 1093 } 1094 private void triggerNewDatabasePDBrev(DatabasePDBrev dbrev){ 1095 for(MMcifConsumer c : consumers){ 1096 c.newDatabasePDBrev(dbrev); 1097 } 1098 } 1099 private void triggerNewDatabasePDBrevRecord(DatabasePdbrevRecord dbrev){ 1100 for(MMcifConsumer c : consumers){ 1101 c.newDatabasePDBrevRecord(dbrev); 1102 } 1103 } 1104 1105 private void triggerNewDatabasePDBremark(DatabasePDBremark remark){ 1106 for(MMcifConsumer c : consumers){ 1107 c.newDatabasePDBremark(remark); 1108 } 1109 } 1110 1111 private void triggerExptl(Exptl exptl){ 1112 for(MMcifConsumer c : consumers){ 1113 c.newExptl(exptl); 1114 } 1115 } 1116 1117 private void triggerNewCell(Cell cell) { 1118 for(MMcifConsumer c : consumers){ 1119 c.newCell(cell); 1120 } 1121 } 1122 1123 private void triggerNewSymmetry(Symmetry symmetry) { 1124 for(MMcifConsumer c : consumers){ 1125 c.newSymmetry(symmetry); 1126 } 1127 } 1128 1129 private void triggerNewStrucRef(StructRef sref){ 1130 for(MMcifConsumer c : consumers){ 1131 c.newStructRef(sref); 1132 } 1133 } 1134 1135 private void triggerNewStrucRefSeq(StructRefSeq sref){ 1136 for(MMcifConsumer c : consumers){ 1137 c.newStructRefSeq(sref); 1138 } 1139 } 1140 1141 private void triggerNewStrucRefSeqDif(StructRefSeqDif sref){ 1142 for(MMcifConsumer c : consumers){ 1143 c.newStructRefSeqDif(sref); 1144 } 1145 } 1146 1147 private void triggerNewPdbxPolySeqScheme(PdbxPolySeqScheme ppss){ 1148 for(MMcifConsumer c : consumers){ 1149 c.newPdbxPolySeqScheme(ppss); 1150 } 1151 } 1152 private void triggerNewPdbxNonPolyScheme(PdbxNonPolyScheme ppss){ 1153 for(MMcifConsumer c : consumers){ 1154 c.newPdbxNonPolyScheme(ppss); 1155 } 1156 } 1157 public void triggerNewPdbxEntityNonPoly(PdbxEntityNonPoly pen){ 1158 for (MMcifConsumer c: consumers){ 1159 c.newPdbxEntityNonPoly(pen); 1160 } 1161 } 1162 public void triggerNewStructKeywords(StructKeywords kw){ 1163 for (MMcifConsumer c: consumers){ 1164 c.newStructKeywords(kw); 1165 } 1166 } 1167 public void triggerNewRefine(Refine r){ 1168 for (MMcifConsumer c: consumers){ 1169 c.newRefine(r); 1170 } 1171 } 1172 public void triggerDocumentStart(){ 1173 for(MMcifConsumer c : consumers){ 1174 c.documentStart(); 1175 } 1176 } 1177 public void triggerDocumentEnd(){ 1178 for(MMcifConsumer c : consumers){ 1179 c.documentEnd(); 1180 } 1181 } 1182 public void triggerNewChemCompDescriptor(ChemCompDescriptor ccd) { 1183 for(MMcifConsumer c : consumers){ 1184 c.newChemCompDescriptor(ccd); 1185 } 1186 } 1187 private void triggerNewPdbxStructAssembly(PdbxStructAssembly sa) { 1188 for(MMcifConsumer c : consumers){ 1189 c.newPdbxStrucAssembly(sa); 1190 } 1191 } 1192 private void triggerNewPdbxStructAssemblyGen(PdbxStructAssemblyGen sa) { 1193 for(MMcifConsumer c : consumers){ 1194 c.newPdbxStrucAssemblyGen(sa); 1195 } 1196 } 1197 1198 private void triggerNewChemCompAtom(ChemCompAtom atom) { 1199 for(MMcifConsumer c : consumers){ 1200 c.newChemCompAtom(atom); 1201 } 1202 } 1203 1204 private void triggerNewChemCompBond(ChemCompBond bond) { 1205 for(MMcifConsumer c : consumers){ 1206 c.newChemCompBond(bond); 1207 } 1208 } 1209 1210 private void triggerNewPdbxChemCompIdentifier(PdbxChemCompIdentifier id) { 1211 for(MMcifConsumer c : consumers){ 1212 c.newPdbxChemCompIndentifier(id); 1213 } 1214 } 1215 private void triggerNewPdbxChemCompDescriptor(PdbxChemCompDescriptor id) { 1216 for(MMcifConsumer c : consumers){ 1217 c.newPdbxChemCompDescriptor(id); 1218 } 1219 } 1220 private void triggerNewStructConn(StructConn id) { 1221 for(MMcifConsumer c : consumers){ 1222 c.newStructConn(id); 1223 } 1224 } 1225 private void triggerNewStructSiteGen(StructSiteGen id) { 1226 for (MMcifConsumer c : consumers) { 1227 c.newStructSiteGen(id); 1228 } 1229 } 1230 private void triggerNewStructSite(StructSite id) { 1231 for (MMcifConsumer c : consumers) { 1232 c.newStructSite(id); 1233 } 1234 } 1235}