001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * created at Mar 4, 2008 021 */ 022package org.biojava.nbio.structure.io.mmcif; 023 024import java.io.BufferedReader; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.InputStreamReader; 028import java.lang.reflect.Field; 029import java.lang.reflect.InvocationTargetException; 030import java.lang.reflect.Method; 031import java.util.ArrayList; 032import java.util.HashMap; 033import java.util.HashSet; 034import java.util.List; 035import java.util.Map; 036import java.util.Set; 037 038 039import org.biojava.nbio.structure.Structure; 040import org.biojava.nbio.structure.io.MMCIFFileReader; 041import org.biojava.nbio.structure.io.StructureIOFile; 042import org.biojava.nbio.structure.io.mmcif.model.AtomSite; 043import org.biojava.nbio.structure.io.mmcif.model.AtomSites; 044import org.biojava.nbio.structure.io.mmcif.model.AuditAuthor; 045import org.biojava.nbio.structure.io.mmcif.model.CIFLabel; 046import org.biojava.nbio.structure.io.mmcif.model.Cell; 047import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 048import org.biojava.nbio.structure.io.mmcif.model.ChemCompAtom; 049import org.biojava.nbio.structure.io.mmcif.model.ChemCompBond; 050import org.biojava.nbio.structure.io.mmcif.model.ChemCompDescriptor; 051import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBremark; 052import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBrev; 053import org.biojava.nbio.structure.io.mmcif.model.DatabasePdbrevRecord; 054import org.biojava.nbio.structure.io.mmcif.model.Entity; 055import org.biojava.nbio.structure.io.mmcif.model.EntityPoly; 056import org.biojava.nbio.structure.io.mmcif.model.EntityPolySeq; 057import org.biojava.nbio.structure.io.mmcif.model.EntitySrcGen; 058import org.biojava.nbio.structure.io.mmcif.model.EntitySrcNat; 059import org.biojava.nbio.structure.io.mmcif.model.EntitySrcSyn; 060import org.biojava.nbio.structure.io.mmcif.model.Exptl; 061import org.biojava.nbio.structure.io.mmcif.model.IgnoreField; 062import org.biojava.nbio.structure.io.mmcif.model.PdbxAuditRevisionHistory; 063import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompDescriptor; 064import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompIdentifier; 065import org.biojava.nbio.structure.io.mmcif.model.PdbxDatabaseStatus; 066import org.biojava.nbio.structure.io.mmcif.model.PdbxEntityNonPoly; 067import org.biojava.nbio.structure.io.mmcif.model.PdbxNonPolyScheme; 068import org.biojava.nbio.structure.io.mmcif.model.PdbxPolySeqScheme; 069import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssembly; 070import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssemblyGen; 071import org.biojava.nbio.structure.io.mmcif.model.PdbxStructOperList; 072import org.biojava.nbio.structure.io.mmcif.model.Refine; 073import org.biojava.nbio.structure.io.mmcif.model.Struct; 074import org.biojava.nbio.structure.io.mmcif.model.StructAsym; 075import org.biojava.nbio.structure.io.mmcif.model.StructConn; 076import org.biojava.nbio.structure.io.mmcif.model.StructKeywords; 077import org.biojava.nbio.structure.io.mmcif.model.StructNcsOper; 078import org.biojava.nbio.structure.io.mmcif.model.StructRef; 079import org.biojava.nbio.structure.io.mmcif.model.StructRefSeq; 080import org.biojava.nbio.structure.io.mmcif.model.StructRefSeqDif; 081import org.biojava.nbio.structure.io.mmcif.model.StructSite; 082import org.biojava.nbio.structure.io.mmcif.model.StructSiteGen; 083import org.biojava.nbio.structure.io.mmcif.model.Symmetry; 084import org.slf4j.Logger; 085import org.slf4j.LoggerFactory; 086 087/** 088 * A simple mmCif file parser 089 * 090 * 091 * Usage: 092 * <pre> 093String file = "path/to/mmcif/file"; 094StructureIOFile pdbreader = new MMCIFFileReader(); 095 096Structure s = pdbreader.getStructure(file); 097System.out.println(s); 098 099// you can convert it to a PDB file... 100System.out.println(s.toPDB()); 101 102 * </pre> 103 * For more documentation see <a href="http://biojava.org/wiki/BioJava:CookBook#Protein_Structure">http://biojava.org/wiki/BioJava:CookBook#Protein_Structure</a>. 104 * 105 * @author Andreas Prlic 106 * @author Jose Duarte 107 * @since 1.7 108 */ 109public class SimpleMMcifParser implements MMcifParser { 110 111 112 113 /** 114 * The header appearing at the beginning of a mmCIF file. 115 * A "block code" can be added to it of no more than 32 chars. 116 * See http://www.iucr.org/__data/assets/pdf_file/0019/22618/cifguide.pdf 117 */ 118 public static final String MMCIF_TOP_HEADER = "data_"; 119 120 public static final String COMMENT_CHAR = "#"; 121 public static final String LOOP_START = "loop_"; 122 public static final String FIELD_LINE = "_"; 123 124 // the following are the 3 valid quoting characters in CIF 125 /** 126 * Quoting character ' 127 */ 128 private static final char S1 = '\''; 129 130 /** 131 * Quoting character " 132 */ 133 private static final char S2 = '\"'; 134 135 /** 136 * Quoting character ; (multi-line quoting) 137 */ 138 public static final String STRING_LIMIT = ";"; 139 140 141 private List<MMcifConsumer> consumers ; 142 143 private Struct struct ; 144 145 private static final Logger logger = LoggerFactory.getLogger(SimpleMMcifParser.class); 146 147 public SimpleMMcifParser(){ 148 consumers = new ArrayList<MMcifConsumer>(); 149 struct = null; 150 } 151 152 @Override 153 public void addMMcifConsumer(MMcifConsumer consumer) { 154 consumers.add(consumer); 155 156 } 157 158 @Override 159 public void clearConsumers() { 160 consumers.clear(); 161 162 } 163 164 @Override 165 public void removeMMcifConsumer(MMcifConsumer consumer) { 166 consumers.remove(consumer); 167 } 168 169 public static void main(String[] args){ 170 String file = "/Users/andreas/WORK/PDB/mmCif/a9/1a9n.cif.gz"; 171 //String file = "/Users/andreas/WORK/PDB/MMCIF/1gav.mmcif"; 172 //String file = "/Users/andreas/WORK/PDB/MMCIF/100d.cif"; 173 //String file = "/Users/andreas/WORK/PDB/MMCIF/1a4a.mmcif"; 174 System.out.println("parsing " + file); 175 176 StructureIOFile pdbreader = new MMCIFFileReader(); 177 try { 178 Structure s = pdbreader.getStructure(file); 179 System.out.println(s); 180 // convert it to a PDB file... 181 System.out.println(s.toPDB()); 182 } catch (IOException e) { 183 e.printStackTrace(); 184 } 185 186 } 187 188 @Override 189 public void parse(InputStream inStream) throws IOException { 190 parse(new BufferedReader(new InputStreamReader(inStream))); 191 192 } 193 194 @Override 195 public void parse(BufferedReader buf) 196 throws IOException { 197 198 triggerDocumentStart(); 199 200 201 // init container objects... 202 struct = new Struct(); 203 String line = null; 204 205 boolean inLoop = false; 206 boolean inLoopData = false; 207 208 209 List<String> loopFields = new ArrayList<String>(); 210 List<String> lineData = new ArrayList<String>(); 211 Set<String> loopWarnings = new HashSet<String>(); // used only to reduce logging statements 212 213 String category = null; 214 215 boolean foundHeader = false; 216 217 while ( (line = buf.readLine ()) != null ){ 218 219 if (line.isEmpty() || line.startsWith(COMMENT_CHAR)) continue; 220 221 if (!foundHeader) { 222 // the first non-comment line is a data_PDBCODE line, test if this looks like a mmcif file 223 if (line.startsWith(MMCIF_TOP_HEADER)){ 224 foundHeader = true; 225 continue; 226 } else { 227 triggerDocumentEnd(); 228 throw new IOException("This does not look like a valid mmCIF file! The first line should start with 'data_', but is: '" + line+"'"); 229 } 230 } 231 232 logger.debug(inLoop + " " + line); 233 234 if (line.startsWith(MMCIF_TOP_HEADER)){ 235 // either first line in file, or beginning of new section (data block in CIF parlance) 236 if ( inLoop) { 237 //System.out.println("new data and in loop: " + line); 238 inLoop = false; 239 inLoopData = false; 240 lineData.clear(); 241 loopFields.clear(); 242 } 243 244 } 245 246 247 if ( inLoop) { 248 249 250 if ( line.startsWith(LOOP_START)){ 251 loopFields.clear(); 252 inLoop = true; 253 inLoopData = false; 254 continue; 255 } 256 257 if ( line.matches("\\s*"+FIELD_LINE+"\\w+.*")) { 258 259 if (inLoopData && line.startsWith(FIELD_LINE)) { 260 logger.debug("Found a field line after reading loop data. Toggling to inLoop=false"); 261 inLoop = false; 262 inLoopData = false; 263 loopFields.clear(); 264 265 266 // a boring normal line 267 List<String> data = processLine(line, buf, 2); 268 269 if ( data.size() < 1){ 270 // this can happen if empty lines at end of file 271 lineData.clear(); 272 continue; 273 } 274 String key = data.get(0); 275 int pos = key.indexOf("."); 276 if ( pos < 0 ) { 277 // looks like a chem_comp file 278 // line should start with data, otherwise something is wrong! 279 if (! line.startsWith(MMCIF_TOP_HEADER)){ 280 logger.warn("This does not look like a valid mmCIF file! The first line should start with 'data_', but is '" + line+"'"); 281 triggerDocumentEnd(); 282 return; 283 } 284 // ignore the first line... 285 category=null; 286 lineData.clear(); 287 continue; 288 } 289 category = key.substring(0,pos); 290 String value = data.get(1); 291 loopFields.add(key.substring(pos+1,key.length())); 292 lineData.add(value); 293 294 logger.debug("Found data for category {}: {}", key, value); 295 continue; 296 } 297 298 // found another field. 299 String txt = line.trim(); 300 if ( txt.indexOf('.') > -1){ 301 302 String[] spl = txt.split("\\."); 303 category = spl[0]; 304 String attribute = spl[1]; 305 loopFields.add(attribute); 306 logger.debug("Found category: {}, attribute: {}",category, attribute); 307 if ( spl.length > 2){ 308 logger.warn("Found nested attribute in {}, not supported yet!",txt); 309 } 310 311 } else { 312 category = txt; 313 logger.debug("Found category without attribute: {}",category); 314 } 315 316 317 } else { 318 319 // in loop and we found a data line 320 lineData = processLine(line, buf, loopFields.size()); 321 logger.debug("Found a loop data line with {} data fields", lineData.size()); 322 logger.debug("Data fields: {}", lineData.toString()); 323 if ( lineData.size() != loopFields.size()){ 324 logger.warn("Expected {} data fields, but found {} in line: {}",loopFields.size(),lineData.size(),line); 325 326 } 327 328 endLineChecks(category, loopFields, lineData, loopWarnings); 329 330 lineData.clear(); 331 332 inLoopData = true; 333 } 334 335 } else { 336 // not in loop 337 338 if ( line.startsWith(LOOP_START)){ 339 if ( category != null) 340 endLineChecks(category, loopFields, lineData, loopWarnings); 341 342 resetBuffers(loopFields, lineData, loopWarnings); 343 category = null; 344 inLoop = true; 345 inLoopData = false; 346 logger.debug("Detected LOOP_START: '{}'. Toggling to inLoop=true", LOOP_START); 347 continue; 348 } else { 349 logger.debug("Normal line "); 350 inLoop = false; 351 352 // a boring normal line 353 List<String> data = processLine(line, buf, 2); 354 355 if ( data.size() < 1){ 356 // this can happen if empty lines at end of file 357 lineData.clear(); 358 continue; 359 } 360 String key = data.get(0); 361 int pos = key.indexOf("."); 362 if ( pos < 0 ) { 363 // looks like a chem_comp file 364 // line should start with data, otherwise something is wrong! 365 if (! line.startsWith(MMCIF_TOP_HEADER)){ 366 logger.warn("This does not look like a valid mmCIF file! The first line should start with 'data_', but is '" + line+"'"); 367 triggerDocumentEnd(); 368 return; 369 } 370 // ignore the first line... 371 category=null; 372 lineData.clear(); 373 continue; 374 } 375 376 if (category!=null && !key.substring(0,pos).equals(category)) { 377 // we've changed category: need to flush the previous one 378 endLineChecks(category, loopFields, lineData, loopWarnings); 379 resetBuffers(loopFields, lineData, loopWarnings); 380 } 381 382 category = key.substring(0,pos); 383 384 String value = data.get(1); 385 loopFields.add(key.substring(pos+1,key.length())); 386 lineData.add(value); 387 388 logger.debug("Found data for category {}: {}", key, value); 389 390 } 391 } 392 } 393 394 if (category!=null && lineData.size()>0 && lineData.size()==loopFields.size()) { 395 // the last category in the file will still be missing, we add it now 396 endLineChecks(category, loopFields, lineData, loopWarnings); 397 resetBuffers(loopFields, lineData, loopWarnings); 398 } 399 400 if (struct != null){ 401 triggerStructData(struct); 402 } 403 404 triggerDocumentEnd(); 405 406 } 407 408 private void resetBuffers(List<String> loopFields, List<String> lineData, Set<String> loopWarnings) { 409 loopFields.clear(); 410 lineData.clear(); 411 loopWarnings.clear(); 412 } 413 414 private List<String> processSingleLine(String line){ 415 416 List<String> data = new ArrayList<String>(); 417 418 if ( line.trim().length() == 0){ 419 return data; 420 } 421 422 if ( line.trim().length() == 1){ 423 if ( line.startsWith(STRING_LIMIT)) 424 return data; 425 } 426 boolean inString = false; // semicolon (;) quoting 427 boolean inS1 = false; // single quote (') quoting 428 boolean inS2 = false; // double quote (") quoting 429 String word = ""; 430 431 for (int i=0; i< line.length(); i++ ){ 432 433 Character c = line.charAt(i); 434 435 Character nextC = null; 436 if (i < line.length() - 1) 437 nextC = line.charAt(i+1); 438 439 Character prevC = null; 440 if (i>0) 441 prevC = line.charAt(i-1); 442 443 if (c == ' ') { 444 445 if ( ! inString){ 446 if ( ! word.equals("")) 447 data.add(word.trim()); 448 word = ""; 449 } else { 450 // we are in a string, add the space 451 word += c; 452 } 453 454 } else if (c == S1 ) { 455 456 if ( inString){ 457 458 boolean wordEnd = false; 459 if (! inS2) { 460 if (nextC==null || Character.isWhitespace(nextC)){ 461 i++; 462 wordEnd = true; 463 } 464 } 465 466 467 if ( wordEnd ) { 468 469 // at end of string 470 if ( ! word.equals("")) 471 data.add(word.trim()); 472 word = ""; 473 inString = false; 474 inS1 = false; 475 } else { 476 word += c; 477 } 478 479 } else if (prevC==null || prevC==' ') { 480 // the beginning of a new string 481 inString = true; 482 inS1 = true; 483 } else { 484 word += c; 485 } 486 } else if ( c == S2 ){ 487 if ( inString){ 488 489 boolean wordEnd = false; 490 if (! inS1) { 491 if (nextC==null || Character.isWhitespace(nextC)){ 492 i++; 493 wordEnd = true; 494 } 495 } 496 497 if ( wordEnd ) { 498 499 // at end of string 500 if ( ! word.equals("")) 501 data.add(word.trim()); 502 word = ""; 503 inString = false; 504 inS2 = false; 505 } else { 506 word += c; 507 } 508 } else if (prevC==null || prevC==' ') { 509 // the beginning of a new string 510 inString = true; 511 inS2 = true; 512 } else { 513 word += c; 514 } 515 } else { 516 word += c; 517 } 518 519 } 520 if ( ! word.trim().equals("")) 521 data.add(word); 522 523 524 return data; 525 526 } 527 528 /** 529 * Get the content of a cif entry 530 * 531 * @param line 532 * @param buf 533 * @return 534 */ 535 private List<String> processLine(String line, 536 BufferedReader buf, 537 int fieldLength) 538 throws IOException{ 539 540 //System.out.println("XX processLine " + fieldLength + " " + line); 541 // go through the line and process each character 542 List<String> lineData = new ArrayList<String>(); 543 544 boolean inString = false; 545 546 StringBuilder bigWord = null; 547 548 while ( true ){ 549 550 if ( line.startsWith(STRING_LIMIT)){ 551 if (! inString){ 552 553 inString = true; 554 if ( line.length() > 1) 555 bigWord = new StringBuilder(line.substring(1)); 556 else 557 bigWord = new StringBuilder(""); 558 559 560 } else { 561 // the end of a word 562 lineData.add(bigWord.toString()); 563 bigWord = null; 564 inString = false; 565 566 } 567 } else { 568 if ( inString ) 569 bigWord.append(line); 570 else { 571 572 List<String> dat = processSingleLine(line); 573 574 for (String d : dat){ 575 lineData.add(d); 576 } 577 } 578 } 579 580 //System.out.println("in process line : " + lineData.size() + " " + fieldLength); 581 582 if ( lineData.size() > fieldLength){ 583 584 logger.warn("wrong data length ("+lineData.size()+ 585 ") should be ("+fieldLength+") at line " + line + " got lineData: " + lineData); 586 return lineData; 587 } 588 589 if ( lineData.size() == fieldLength) 590 return lineData; 591 592 593 line = buf.readLine(); 594 if ( line == null) 595 break; 596 } 597 return lineData; 598 599 } 600 601 602 603 private void endLineChecks(String category,List<String> loopFields, List<String> lineData, Set<String> loopWarnings ) throws IOException{ 604 605 logger.debug("Processing category {}, with fields: {}",category,loopFields.toString()); 606 // System.out.println("parsed the following data: " +category + " fields: "+ 607 // loopFields + " DATA: " + 608 // lineData); 609 610 if ( loopFields.size() != lineData.size()){ 611 logger.warn("looks like we got a problem with nested string quote characters:"); 612 throw new IOException("data length ("+ lineData.size() + 613 ") != fields length ("+loopFields.size()+ 614 ") category: " +category + " fields: "+ 615 loopFields + " DATA: " + 616 lineData ); 617 } 618 619 if ( category.equals("_entity")){ 620 621 Entity e = (Entity) buildObject( 622 Entity.class.getName(), 623 loopFields,lineData, loopWarnings); 624 triggerNewEntity(e); 625 626 } else if (category.equals("_entity_poly")) { 627 EntityPoly ep = (EntityPoly) buildObject(EntityPoly.class.getName(), loopFields, lineData, loopWarnings); 628 triggerNewEntityPoly(ep); 629 630 } else if ( category.equals("_struct")){ 631 632 struct = (Struct) buildObject( 633 Struct.class.getName(), 634 loopFields, lineData, loopWarnings); 635 636 } else if ( category.equals("_atom_site")){ 637 638 AtomSite a = (AtomSite) buildObject( 639 AtomSite.class.getName(), 640 loopFields, lineData, loopWarnings); 641 triggerNewAtomSite(a); 642 643 } else if ( category.equals("_database_PDB_rev")){ 644 DatabasePDBrev dbrev = (DatabasePDBrev) buildObject( 645 DatabasePDBrev.class.getName(), 646 loopFields, lineData, loopWarnings); 647 648 triggerNewDatabasePDBrev(dbrev); 649 650 } else if ( category.equals("_database_PDB_rev_record")) { 651 DatabasePdbrevRecord dbrev = (DatabasePdbrevRecord) buildObject( 652 DatabasePdbrevRecord.class.getName(), 653 loopFields, lineData, loopWarnings); 654 655 triggerNewDatabasePDBrevRecord(dbrev); 656 657 // MMCIF version 5 dates 658 } else if ( category.equals("_pdbx_audit_revision_history")) { 659 PdbxAuditRevisionHistory history = (PdbxAuditRevisionHistory) buildObject( 660 PdbxAuditRevisionHistory.class.getName(), 661 loopFields, lineData, loopWarnings); 662 663 triggerNewPdbxAuditRevisionHistory(history); 664 665 // MMCIF version 5 dates 666 } else if ( category.equals("_pdbx_database_status")) { 667 PdbxDatabaseStatus status = (PdbxDatabaseStatus) buildObject( 668 PdbxDatabaseStatus.class.getName(), 669 loopFields, lineData, loopWarnings); 670 671 triggerNewPdbxDatabaseStatus(status); 672 673 }else if ( category.equals("_database_PDB_remark")) { 674 DatabasePDBremark remark = (DatabasePDBremark) buildObject( 675 DatabasePDBremark.class.getName(), 676 loopFields, lineData, loopWarnings); 677 678 triggerNewDatabasePDBremark(remark); 679 680 } else if ( category.equals("_exptl")){ 681 Exptl exptl = (Exptl) buildObject( 682 Exptl.class.getName(), 683 loopFields,lineData, loopWarnings); 684 685 triggerExptl(exptl); 686 687 } else if ( category.equals("_cell")){ 688 Cell cell = (Cell) buildObject( 689 Cell.class.getName(), 690 loopFields,lineData, loopWarnings); 691 692 triggerNewCell(cell); 693 694 } else if ( category.equals("_symmetry")){ 695 Symmetry symmetry = (Symmetry) buildObject( 696 Symmetry.class.getName(), 697 loopFields,lineData, loopWarnings); 698 699 triggerNewSymmetry(symmetry); 700 } else if ( category.equals("_struct_ncs_oper")) { 701 702 StructNcsOper sNcsOper = (StructNcsOper) buildObject( 703 StructNcsOper.class.getName(), 704 loopFields, lineData, loopWarnings); 705 triggerNewStructNcsOper(sNcsOper); 706 } else if ( category.equals("_atom_sites")) { 707 708 AtomSites atomSites = (AtomSites) buildObject( 709 AtomSites.class.getName(), 710 loopFields, lineData, loopWarnings); 711 triggerNewAtomSites(atomSites); 712 713 } else if ( category.equals("_struct_ref")){ 714 StructRef sref = (StructRef) buildObject( 715 StructRef.class.getName(), 716 loopFields,lineData, loopWarnings); 717 718 triggerNewStrucRef(sref); 719 720 } else if ( category.equals("_struct_ref_seq")){ 721 StructRefSeq sref = (StructRefSeq) buildObject( 722 StructRefSeq.class.getName(), 723 loopFields,lineData, loopWarnings); 724 725 triggerNewStrucRefSeq(sref); 726 } else if ( category.equals("_struct_ref_seq_dif")) { 727 StructRefSeqDif sref = (StructRefSeqDif) buildObject( 728 StructRefSeqDif.class.getName(), 729 loopFields, lineData, loopWarnings); 730 731 triggerNewStrucRefSeqDif(sref); 732 } else if ( category.equals("_struct_site_gen")) { 733 StructSiteGen sref = (StructSiteGen) buildObject( 734 StructSiteGen.class.getName(), 735 loopFields, lineData, loopWarnings); 736 737 triggerNewStructSiteGen(sref); 738 } else if ( category.equals("_struct_site")) { 739 StructSite sref = (StructSite) buildObject( 740 StructSite.class.getName(), 741 loopFields, lineData, loopWarnings); 742 triggerNewStructSite(sref); 743 } else if ( category.equals("_entity_poly_seq")){ 744 EntityPolySeq exptl = (EntityPolySeq) buildObject( 745 EntityPolySeq.class.getName(), 746 loopFields,lineData, loopWarnings); 747 748 triggerNewEntityPolySeq(exptl); 749 } else if ( category.equals("_entity_src_gen")){ 750 EntitySrcGen entitySrcGen = (EntitySrcGen) buildObject( 751 EntitySrcGen.class.getName(), 752 loopFields,lineData, loopWarnings); 753 triggerNewEntitySrcGen(entitySrcGen); 754 } else if ( category.equals("_entity_src_nat")){ 755 EntitySrcNat entitySrcNat = (EntitySrcNat) buildObject( 756 EntitySrcNat.class.getName(), 757 loopFields,lineData, loopWarnings); 758 triggerNewEntitySrcNat(entitySrcNat); 759 } else if ( category.equals("_pdbx_entity_src_syn")){ 760 EntitySrcSyn entitySrcSyn = (EntitySrcSyn) buildObject( 761 EntitySrcSyn.class.getName(), 762 loopFields,lineData, loopWarnings); 763 triggerNewEntitySrcSyn(entitySrcSyn); 764 } else if ( category.equals("_struct_asym")){ 765 StructAsym sasym = (StructAsym) buildObject( 766 StructAsym.class.getName(), 767 loopFields,lineData, loopWarnings); 768 769 triggerNewStructAsym(sasym); 770 771 } else if ( category.equals("_pdbx_poly_seq_scheme")){ 772 PdbxPolySeqScheme ppss = (PdbxPolySeqScheme) buildObject( 773 PdbxPolySeqScheme.class.getName(), 774 loopFields,lineData, loopWarnings); 775 776 triggerNewPdbxPolySeqScheme(ppss); 777 778 } else if ( category.equals("_pdbx_nonpoly_scheme")){ 779 PdbxNonPolyScheme ppss = (PdbxNonPolyScheme) buildObject( 780 PdbxNonPolyScheme.class.getName(), 781 loopFields,lineData, loopWarnings); 782 783 triggerNewPdbxNonPolyScheme(ppss); 784 785 } else if ( category.equals("_pdbx_entity_nonpoly")){ 786 PdbxEntityNonPoly pen = (PdbxEntityNonPoly) buildObject( 787 PdbxEntityNonPoly.class.getName(), 788 loopFields,lineData, loopWarnings 789 ); 790 triggerNewPdbxEntityNonPoly(pen); 791 } else if ( category.equals("_struct_keywords")){ 792 StructKeywords kw = (StructKeywords)buildObject( 793 StructKeywords.class.getName(), 794 loopFields,lineData, loopWarnings 795 ); 796 triggerNewStructKeywords(kw); 797 } else if (category.equals("_refine")){ 798 Refine r = (Refine)buildObject( 799 Refine.class.getName(), 800 loopFields,lineData, loopWarnings 801 ); 802 triggerNewRefine(r); 803 } else if (category.equals("_chem_comp")){ 804 ChemComp c = (ChemComp)buildObject( 805 ChemComp.class.getName(), 806 loopFields, lineData, loopWarnings 807 ); 808 triggerNewChemComp(c); 809 } else if (category.equals("_audit_author")) { 810 AuditAuthor aa = (AuditAuthor)buildObject( 811 AuditAuthor.class.getName(), 812 loopFields, lineData, loopWarnings); 813 triggerNewAuditAuthor(aa); 814 } else if (category.equals("_pdbx_chem_comp_descriptor")) { 815 ChemCompDescriptor ccd = (ChemCompDescriptor) buildObject( 816 ChemCompDescriptor.class.getName(), 817 loopFields, lineData, loopWarnings); 818 triggerNewChemCompDescriptor(ccd); 819 } else if (category.equals("_pdbx_struct_oper_list")) { 820 821 PdbxStructOperList structOper = (PdbxStructOperList) buildObject( 822 PdbxStructOperList.class.getName(), 823 loopFields, lineData, loopWarnings 824 ); 825 triggerNewPdbxStructOper(structOper); 826 827 } else if (category.equals("_pdbx_struct_assembly")) { 828 PdbxStructAssembly sa = (PdbxStructAssembly) buildObject( 829 PdbxStructAssembly.class.getName(), 830 loopFields, lineData, loopWarnings); 831 triggerNewPdbxStructAssembly(sa); 832 833 } else if (category.equals("_pdbx_struct_assembly_gen")) { 834 PdbxStructAssemblyGen sa = (PdbxStructAssemblyGen) buildObject( 835 PdbxStructAssemblyGen.class.getName(), 836 loopFields, lineData, loopWarnings); 837 triggerNewPdbxStructAssemblyGen(sa); 838 } else if ( category.equals("_chem_comp_atom")){ 839 ChemCompAtom atom = (ChemCompAtom)buildObject( 840 ChemCompAtom.class.getName(), 841 loopFields,lineData, loopWarnings); 842 triggerNewChemCompAtom(atom); 843 844 }else if ( category.equals("_chem_comp_bond")){ 845 ChemCompBond bond = (ChemCompBond)buildObject( 846 ChemCompBond.class.getName(), 847 loopFields,lineData, loopWarnings); 848 triggerNewChemCompBond(bond); 849 } else if ( category.equals("_pdbx_chem_comp_identifier")){ 850 PdbxChemCompIdentifier id = (PdbxChemCompIdentifier)buildObject( 851 PdbxChemCompIdentifier.class.getName(), 852 loopFields,lineData, loopWarnings); 853 triggerNewPdbxChemCompIdentifier(id); 854 } else if ( category.equals("_pdbx_chem_comp_descriptor")){ 855 PdbxChemCompDescriptor id = (PdbxChemCompDescriptor)buildObject( 856 PdbxChemCompDescriptor.class.getName(), 857 loopFields,lineData, loopWarnings); 858 triggerNewPdbxChemCompDescriptor(id); 859 } else if ( category.equals("_struct_conn")){ 860 StructConn id = (StructConn)buildObject( 861 StructConn.class.getName(), 862 loopFields,lineData, loopWarnings); 863 triggerNewStructConn(id); 864 865 } else { 866 867 logger.debug("Using a generic bean for category {}",category); 868 869 // trigger a generic bean that can deal with all missing data types... 870 triggerGeneric(category,loopFields,lineData); 871 } 872 873 874 } 875 876 877// private PdbxStructOperList getPdbxStructOperList(List<String> loopFields, 878// List<String> lineData) { 879// PdbxStructOperList so = new PdbxStructOperList(); 880// 881// //System.out.println(loopFields); 882// //System.out.println(lineData); 883// 884// String id = lineData.get(loopFields.indexOf("id")); 885// so.setId(id); 886// so.setType(lineData.get(loopFields.indexOf("type"))); 887// Matrix matrix = new Matrix(3,3); 888// for (int i = 1 ; i <=3 ; i++){ 889// for (int j =1 ; j <= 3 ; j++){ 890// String max = String.format("matrix[%d][%d]",j,i); 891// 892// String val = lineData.get(loopFields.indexOf(max)); 893// Double d = Double.parseDouble(val); 894// matrix.set(j-1,i-1,d); 895// // matrix.set(i-1,j-1,d); 896// } 897// } 898// 899// double[] coords =new double[3]; 900// 901// for ( int i = 1; i <=3 ; i++){ 902// String v = String.format("vector[%d]",i); 903// String val = lineData.get(loopFields.indexOf(v)); 904// Double d = Double.parseDouble(val); 905// coords[i-1] = d; 906// } 907// 908// so.setMatrix(matrix); 909// so.setVector(coords); 910// 911// 912// 913// return so; 914// } 915 916 public void triggerNewPdbxStructOper(PdbxStructOperList structOper) { 917 for(MMcifConsumer c : consumers){ 918 c.newPdbxStructOperList(structOper); 919 } 920 921 } 922 923 public void triggerNewStructNcsOper(StructNcsOper sNcsOper) { 924 for(MMcifConsumer c : consumers){ 925 c.newStructNcsOper(sNcsOper); 926 } 927 928 } 929 930 public void triggerNewAtomSites(AtomSites atomSites) { 931 for(MMcifConsumer c : consumers){ 932 c.newAtomSites(atomSites); 933 } 934 } 935 936 /** 937 * Populates a bean object from the {@link org.biojava.nbio.structure.io.mmcif.model} package, 938 * from the data read from a CIF file. 939 * It uses reflection to lookup the field and setter method names given the category 940 * found in the CIF file. 941 * <p> 942 * Due to limitations in variable names in java, not all fields can have names 943 * exactly as defined in the CIF categories. In those cases the {@link CIFLabel} tag 944 * can be used in the field names to give the appropriate name that corresponds to the 945 * CIF category, which is the name that will be then looked up here. 946 * The {@link IgnoreField} tag can also be used to exclude fields from being looked up. 947 * @param className 948 * @param loopFields 949 * @param lineData 950 * @param warnings 951 * @return 952 */ 953 private Object buildObject(String className, List<String> loopFields, List<String> lineData, Set<String> warnings) { 954 955 Object o = null; 956 Class<?> c = null; 957 958 try { 959 // build up the Entity object from the line data... 960 c = Class.forName(className); 961 962 o = c.newInstance(); 963 964 } catch (InstantiationException|ClassNotFoundException|IllegalAccessException e){ 965 logger.error( "Error while constructing {}: {}", className, e.getMessage()); 966 return null; 967 } 968 969 // these methods get the fields but also looking at the IgnoreField and CIFLabel annotations 970 Field[] fields = MMCIFFileTools.getFields(c); 971 String[] names = MMCIFFileTools.getFieldNames(fields); 972 973 // let's build a map of all methods so that we can look up the setter methods later 974 Method[] methods = c.getMethods(); 975 976 Map<String,Method> methodMap = new HashMap<String, Method>(); 977 for (Method m : methods) { 978 methodMap.put(m.getName(),m); 979 } 980 981 // and a map of all the fields so that we can lookup them up later 982 Map<String, Field> names2fields = new HashMap<>(); 983 for (int i=0;i<fields.length;i++) { 984 names2fields.put(names[i], fields[i]); 985 } 986 987 int pos = -1 ; 988 for (String key: loopFields){ 989 pos++; 990 991 String val = lineData.get(pos); 992 993 // we first start looking up the field which can be annotated with a CIFLabel if they 994 // need alternative names (e.g. for field _symmetry.space_group_name_H-M, since hyphen is not allowed in var names in java) 995 Field field = names2fields.get(key); 996 997 if (field == null) { 998 produceWarning(key, val, c, warnings); 999 continue; 1000 } 1001 // now we need to find the corresponding setter 1002 // note that we can't use the field directly and then call Field.set() because many setters 1003 // have more functionality than just setting the value (e.g. some setters in ChemComp) 1004 1005 // building up the setter method name: need to upper case the first letter, leave the rest untouched 1006 String setterMethodName = "set" + field.getName().substring(0,1).toUpperCase() + field.getName().substring(1, field.getName().length()); 1007 1008 Method setter = methodMap.get(setterMethodName); 1009 1010 if (setter==null) { 1011 produceWarning(key, val, c, warnings); 1012 continue; 1013 } 1014 1015 1016 1017 // now we populate the object with the values by invoking the corresponding setter method, 1018 // note that all of the mmCif container classes have only one argument (they are beans) 1019 Class<?>[] pType = setter.getParameterTypes(); 1020 1021 1022 try { 1023 if ( pType[0].getName().equals(Integer.class.getName())) { 1024 if ( val != null && ! val.equals("?") && !val.equals(".")) { 1025 1026 Integer intVal = Integer.parseInt(val); 1027 setter.invoke(o, intVal); 1028 1029 } 1030 } else { 1031 // default val is a String 1032 setter.invoke(o, val); 1033 } 1034 } catch (IllegalAccessException|InvocationTargetException e) { 1035 logger.error("Could not invoke setter {} with value {} for class {}", setterMethodName, val, className); 1036 } 1037 1038 } 1039 1040 return o; 1041 } 1042 1043 private void produceWarning(String key, String val, Class<?> c, Set<String> warnings) { 1044 1045 String warning = "Trying to set field " + key + " in "+ c.getName() +" found in file, but no corresponding field could be found in model class (value:" + val + ")"; 1046 String warnkey = key+"-"+c.getName(); 1047 // Suppress duplicate warnings or attempts to store empty data 1048 if( val.equals("?") || val.equals(".") || ( warnings != null && warnings.contains(warnkey)) ) { 1049 logger.debug(warning); 1050 } else { 1051 logger.info(warning); 1052 } 1053 1054 if(warnings != null) { 1055 warnings.add(warnkey); 1056 } 1057 1058 } 1059 1060 public void triggerGeneric(String category, List<String> loopFields, List<String> lineData){ 1061 for(MMcifConsumer c : consumers){ 1062 c.newGenericData(category, loopFields, lineData); 1063 } 1064 } 1065 1066 public void triggerNewEntity(Entity entity){ 1067 for(MMcifConsumer c : consumers){ 1068 c.newEntity(entity); 1069 } 1070 } 1071 1072 public void triggerNewEntityPoly(EntityPoly entityPoly) { 1073 for(MMcifConsumer c : consumers){ 1074 c.newEntityPoly(entityPoly); 1075 } 1076 } 1077 1078 public void triggerNewEntityPolySeq(EntityPolySeq epolseq){ 1079 for(MMcifConsumer c : consumers){ 1080 c.newEntityPolySeq(epolseq); 1081 } 1082 } 1083 public void triggerNewEntitySrcGen(EntitySrcGen entitySrcGen){ 1084 for(MMcifConsumer c : consumers){ 1085 c.newEntitySrcGen(entitySrcGen); 1086 } 1087 } 1088 public void triggerNewEntitySrcNat(EntitySrcNat entitySrcNat){ 1089 for(MMcifConsumer c : consumers){ 1090 c.newEntitySrcNat(entitySrcNat); 1091 } 1092 } 1093 public void triggerNewEntitySrcSyn(EntitySrcSyn entitySrcSyn){ 1094 for(MMcifConsumer c : consumers){ 1095 c.newEntitySrcSyn(entitySrcSyn); 1096 } 1097 } 1098 public void triggerNewChemComp(ChemComp cc){ 1099 1100 for(MMcifConsumer c : consumers){ 1101 c.newChemComp(cc); 1102 } 1103 } 1104 public void triggerNewStructAsym(StructAsym sasym){ 1105 for(MMcifConsumer c : consumers){ 1106 c.newStructAsym(sasym); 1107 } 1108 } 1109 1110 private void triggerStructData(Struct struct){ 1111 for(MMcifConsumer c : consumers){ 1112 c.setStruct(struct); 1113 } 1114 } 1115 1116 private void triggerNewAtomSite(AtomSite atom){ 1117 for(MMcifConsumer c : consumers){ 1118 c.newAtomSite(atom); 1119 } 1120 } 1121 1122 private void triggerNewAuditAuthor(AuditAuthor aa){ 1123 for(MMcifConsumer c : consumers){ 1124 c.newAuditAuthor(aa); 1125 } 1126 } 1127 1128 private void triggerNewPdbxAuditRevisionHistory(PdbxAuditRevisionHistory history) { 1129 for(MMcifConsumer c : consumers){ 1130 c.newPdbxAuditRevisionHistory(history); 1131 } 1132 } 1133 1134 private void triggerNewPdbxDatabaseStatus(PdbxDatabaseStatus status) { 1135 for(MMcifConsumer c : consumers){ 1136 c.newPdbxDatabaseStatus(status); 1137 } 1138 } 1139 1140 private void triggerNewDatabasePDBrev(DatabasePDBrev dbrev){ 1141 for(MMcifConsumer c : consumers){ 1142 c.newDatabasePDBrev(dbrev); 1143 } 1144 } 1145 private void triggerNewDatabasePDBrevRecord(DatabasePdbrevRecord dbrev){ 1146 for(MMcifConsumer c : consumers){ 1147 c.newDatabasePDBrevRecord(dbrev); 1148 } 1149 } 1150 1151 private void triggerNewDatabasePDBremark(DatabasePDBremark remark){ 1152 for(MMcifConsumer c : consumers){ 1153 c.newDatabasePDBremark(remark); 1154 } 1155 } 1156 1157 private void triggerExptl(Exptl exptl){ 1158 for(MMcifConsumer c : consumers){ 1159 c.newExptl(exptl); 1160 } 1161 } 1162 1163 private void triggerNewCell(Cell cell) { 1164 for(MMcifConsumer c : consumers){ 1165 c.newCell(cell); 1166 } 1167 } 1168 1169 private void triggerNewSymmetry(Symmetry symmetry) { 1170 for(MMcifConsumer c : consumers){ 1171 c.newSymmetry(symmetry); 1172 } 1173 } 1174 1175 private void triggerNewStrucRef(StructRef sref){ 1176 for(MMcifConsumer c : consumers){ 1177 c.newStructRef(sref); 1178 } 1179 } 1180 1181 private void triggerNewStrucRefSeq(StructRefSeq sref){ 1182 for(MMcifConsumer c : consumers){ 1183 c.newStructRefSeq(sref); 1184 } 1185 } 1186 1187 private void triggerNewStrucRefSeqDif(StructRefSeqDif sref){ 1188 for(MMcifConsumer c : consumers){ 1189 c.newStructRefSeqDif(sref); 1190 } 1191 } 1192 1193 private void triggerNewPdbxPolySeqScheme(PdbxPolySeqScheme ppss){ 1194 for(MMcifConsumer c : consumers){ 1195 c.newPdbxPolySeqScheme(ppss); 1196 } 1197 } 1198 private void triggerNewPdbxNonPolyScheme(PdbxNonPolyScheme ppss){ 1199 for(MMcifConsumer c : consumers){ 1200 c.newPdbxNonPolyScheme(ppss); 1201 } 1202 } 1203 public void triggerNewPdbxEntityNonPoly(PdbxEntityNonPoly pen){ 1204 for (MMcifConsumer c: consumers){ 1205 c.newPdbxEntityNonPoly(pen); 1206 } 1207 } 1208 public void triggerNewStructKeywords(StructKeywords kw){ 1209 for (MMcifConsumer c: consumers){ 1210 c.newStructKeywords(kw); 1211 } 1212 } 1213 public void triggerNewRefine(Refine r){ 1214 for (MMcifConsumer c: consumers){ 1215 c.newRefine(r); 1216 } 1217 } 1218 public void triggerDocumentStart(){ 1219 for(MMcifConsumer c : consumers){ 1220 c.documentStart(); 1221 } 1222 } 1223 public void triggerDocumentEnd(){ 1224 for(MMcifConsumer c : consumers){ 1225 c.documentEnd(); 1226 } 1227 } 1228 public void triggerNewChemCompDescriptor(ChemCompDescriptor ccd) { 1229 for(MMcifConsumer c : consumers){ 1230 c.newChemCompDescriptor(ccd); 1231 } 1232 } 1233 private void triggerNewPdbxStructAssembly(PdbxStructAssembly sa) { 1234 for(MMcifConsumer c : consumers){ 1235 c.newPdbxStrucAssembly(sa); 1236 } 1237 } 1238 private void triggerNewPdbxStructAssemblyGen(PdbxStructAssemblyGen sa) { 1239 for(MMcifConsumer c : consumers){ 1240 c.newPdbxStrucAssemblyGen(sa); 1241 } 1242 } 1243 1244 private void triggerNewChemCompAtom(ChemCompAtom atom) { 1245 for(MMcifConsumer c : consumers){ 1246 c.newChemCompAtom(atom); 1247 } 1248 } 1249 1250 private void triggerNewChemCompBond(ChemCompBond bond) { 1251 for(MMcifConsumer c : consumers){ 1252 c.newChemCompBond(bond); 1253 } 1254 } 1255 1256 private void triggerNewPdbxChemCompIdentifier(PdbxChemCompIdentifier id) { 1257 for(MMcifConsumer c : consumers){ 1258 c.newPdbxChemCompIndentifier(id); 1259 } 1260 } 1261 private void triggerNewPdbxChemCompDescriptor(PdbxChemCompDescriptor id) { 1262 for(MMcifConsumer c : consumers){ 1263 c.newPdbxChemCompDescriptor(id); 1264 } 1265 } 1266 private void triggerNewStructConn(StructConn id) { 1267 for(MMcifConsumer c : consumers){ 1268 c.newStructConn(id); 1269 } 1270 } 1271 private void triggerNewStructSiteGen(StructSiteGen id) { 1272 for (MMcifConsumer c : consumers) { 1273 c.newStructSiteGen(id); 1274 } 1275 } 1276 private void triggerNewStructSite(StructSite id) { 1277 for (MMcifConsumer c : consumers) { 1278 c.newStructSite(id); 1279 } 1280 } 1281}