001/* 002 * This code may be freely distributed and modified under the 003 * terms of the GNU Lesser General Public Licence. This should 004 * be distributed with the code. If you do not have a copy, 005 * see: 006 * 007 * http://www.gnu.org/copyleft/lesser.html 008 * 009 * Copyright for this code is held jointly by the individual 010 * authors. These should be listed in @author doc comments. 011 * 012 * For more information on the BioJava project and its aims, 013 * or to join the biojava-l mailing list, visit the home page 014 * at: 015 * 016 * http://www.biojava.org/ 017 * 018 * Created on 26.04.2004 019 * @author Andreas Prlic 020 * 021 */ 022package org.biojava.nbio.structure.io; 023 024import java.io.IOException; 025import java.text.DateFormat; 026import java.text.DecimalFormat; 027import java.text.NumberFormat; 028import java.text.SimpleDateFormat; 029import java.util.ArrayList; 030import java.util.List; 031import java.util.Locale; 032import java.util.Map; 033 034import org.biojava.nbio.core.util.XMLWriter; 035import org.biojava.nbio.structure.Atom; 036import org.biojava.nbio.structure.Chain; 037import org.biojava.nbio.structure.DBRef; 038import org.biojava.nbio.structure.Element; 039import org.biojava.nbio.structure.Group; 040import org.biojava.nbio.structure.GroupType; 041import org.biojava.nbio.structure.PDBHeader; 042import org.biojava.nbio.structure.Site; 043import org.biojava.nbio.structure.Structure; 044import org.biojava.nbio.structure.io.mmcif.MMCIFFileTools; 045import org.biojava.nbio.structure.io.mmcif.SimpleMMcifParser; 046import org.biojava.nbio.structure.io.mmcif.model.AtomSite; 047import org.slf4j.Logger; 048import org.slf4j.LoggerFactory; 049 050 051/** Methods to convert a structure object into different file formats. 052 * @author Andreas Prlic 053 * @since 1.4 054 */ 055public class FileConvert { 056 057 private static final Logger logger = LoggerFactory.getLogger(FileConvert.class); 058 059 060 061 private Structure structure ; 062 063 private boolean printConnections; 064 065 // Locale should be english, e.g. in DE separator is "," -> PDB files have "." ! 066 public static DecimalFormat d3 = (DecimalFormat)NumberFormat.getInstance(Locale.US); 067 static { 068 d3.setMaximumIntegerDigits(4); 069 d3.setMinimumFractionDigits(3); 070 d3.setMaximumFractionDigits(3); 071 } 072 public static DecimalFormat d2 = (DecimalFormat)NumberFormat.getInstance(Locale.US); 073 static { 074 d2.setMaximumIntegerDigits(3); 075 d2.setMinimumFractionDigits(2); 076 d2.setMaximumFractionDigits(2); 077 } 078 079 private static final String newline = System.getProperty("line.separator"); 080 081 /** 082 * Constructs a FileConvert object. 083 * 084 * @param struc a Structure object 085 */ 086 public FileConvert(Structure struc) { 087 structure = struc ; 088 printConnections = true; 089 } 090 091 /** 092 * Returns if the Connections should be added 093 * default is true; 094 * @return if the printConnections flag is set 095 */ 096 public boolean doPrintConnections() { 097 return printConnections; 098 } 099 100 /** enable/disable printing of connections 101 * connections are sometimes buggy in PDB files 102 * so there are some cases where one might turn this off. 103 * @param printConnections 104 */ 105 public void setPrintConnections(boolean printConnections) { 106 this.printConnections = printConnections; 107 } 108 109 /** prints the connections in PDB style 110 * 111 * Thanks to Tamas Horvath for this one 112 */ 113 private String printPDBConnections(){ 114 115 116 StringBuffer str = new StringBuffer(); 117 118 // TODO this needs to be rewritten so that the data comes from Atom.getBonds(). Structure.getConnections will be removed in upcoming releases (after 4.2) - JD 2016-03-03 119 120 List<Map<String, Integer>> cons = structure.getConnections(); 121 for (int cnr = 0; cnr<cons.size();cnr++){ 122 Map<String,Integer> con = cons.get(cnr); 123 Integer as = con.get("atomserial"); 124 125 String atomserial = ""; 126 127 String bond1 = ""; 128 String bond2 = ""; 129 String bond3 = ""; 130 String bond4 = ""; 131 String hyd1 = ""; 132 String hyd2 = ""; 133 String salt1 = ""; 134 String hyd3 = ""; 135 String hyd4 = ""; 136 String salt2 = ""; 137 138 139 140 if (con.containsKey("bond1")) bond1 = con.get("bond1").toString(); 141 if (con.containsKey("bond2")) bond2 = con.get("bond2").toString(); 142 if (con.containsKey("bond3")) bond3 = con.get("bond3").toString(); 143 if (con.containsKey("bond4")) bond4 = con.get("bond4").toString(); 144 if (con.containsKey("hyd1")) hyd1 = con.get("hyd1").toString(); 145 if (con.containsKey("hyd2")) hyd2 = con.get("hyd2").toString(); 146 if (con.containsKey("salt1")) salt1 = con.get("salt1").toString(); 147 if (con.containsKey("hyd3")) hyd3 = con.get("hyd3").toString(); 148 if (con.containsKey("hyd4")) hyd4 = con.get("hyd4").toString(); 149 if (con.containsKey("salt2")) salt2 = con.get("salt2").toString(); 150 151 atomserial = String.format("%5d",as) ; 152 bond1 = String.format("%5s",bond1) ; 153 bond2 = String.format("%5s",bond2) ; 154 bond3 = String.format("%5s",bond3) ; 155 bond4 = String.format("%5s",bond4) ; 156 hyd1 = String.format("%5s",hyd1) ; 157 hyd2 = String.format("%5s",hyd2) ; 158 salt1 = String.format("%5s",salt1) ; 159 hyd3 = String.format("%5s",hyd3) ; 160 hyd4 = String.format("%5s",hyd4) ; 161 salt2 = String.format("%5s",salt2) ; 162 163 String connectLine = "CONECT" + atomserial + bond1 + bond2 + bond3 + 164 bond4 + hyd1 + hyd2 + salt1 + hyd3 + hyd4 + salt2; 165 166 str.append(connectLine).append(newline); 167 } 168 return str.toString(); 169 } 170 171 /** Convert a structure into a PDB file. 172 * @return a String representing a PDB file. 173 */ 174 public String toPDB() { 175 176 177 StringBuffer str = new StringBuffer(); 178 //int i = 0 ; 179 180 181 182 // TODO: print all the PDB header informaton in PDB style 183 // some objects (PDBHeader, Compound) are still missing 184 // 185 186 PDBHeader header = structure.getPDBHeader(); 187 header.toPDB(str); 188 189 190 //REMARK 800 191 if (!structure.getSites().isEmpty()) { 192 str.append("REMARK 800 ").append(newline); 193 str.append("REMARK 800 SITE ").append(newline); 194 for (Site site : structure.getSites()) { 195 site.remark800toPDB(str); 196 } 197 } 198 //DBREF 199 for (DBRef dbref : structure.getDBRefs()){ 200 dbref.toPDB(str); 201 str.append(newline); 202 } 203 //SSBOND 204 List<SSBondImpl> ssbonds = SSBondImpl.getSsBondListFromBondList(structure.getSSBonds()); 205 for (SSBondImpl ssbond : ssbonds){ 206 ssbond.toPDB(str); 207 str.append(newline); 208 } 209 //SITE 210 for (Site site : structure.getSites()) { 211 try { 212 site.toPDB(str); 213 } catch (Exception e){ 214 e.printStackTrace(); 215 } 216 } 217 218 // 219 // print the atom records 220 // 221 222 // do for all models 223 int nrModels = structure.nrModels() ; 224 if ( structure.isNmr()) { 225 str.append("EXPDTA NMR, "+ nrModels+" STRUCTURES"+newline) ; 226 } 227 for (int m = 0 ; m < nrModels ; m++) { 228 List<Chain> model = structure.getModel(m); 229 // todo support NMR structures ... 230 if ( nrModels>1 ) { 231 str.append("MODEL " + (m+1)+ newline); 232 } 233 // do for all chains 234 int nrChains = model.size(); 235 for ( int c =0; c<nrChains;c++) { 236 Chain chain = model.get(c); 237 //String chainID = chain.getChainID(); 238 //if ( chainID.equals(DEFAULTCHAIN) ) chainID = " "; 239 // do for all groups 240 int nrGroups = chain.getAtomLength(); 241 for ( int h=0; h<nrGroups;h++){ 242 243 Group g= chain.getAtomGroup(h); 244 245 246 toPDB(g,str); 247 248 249 } 250 // End any chains with a "TER" record. 251 if (nrGroups > 0) str.append("TER").append(newline); 252 } 253 254 if ( nrModels>1) { 255 str.append("ENDMDL").append(newline); 256 } 257 258 259 260 } 261 262 if ( doPrintConnections() ) 263 str.append(printPDBConnections()); 264 265 return str.toString() ; 266 } 267 268 private static void toPDB(Group g, StringBuffer str) { 269 // iterate over all atoms ... 270 // format output ... 271 int groupsize = g.size(); 272 273 for ( int atompos = 0 ; atompos < groupsize; atompos++) { 274 Atom a = null ; 275 276 a = g.getAtom(atompos); 277 if ( a == null) 278 continue ; 279 280 toPDB(a, str); 281 282 283 //line = record + serial + " " + fullname +altLoc 284 //+ leftResName + " " + chainID + resseq 285 //+ " " + x+y+z 286 //+ occupancy + tempfactor; 287 //str.append(line + newline); 288 //System.out.println(line); 289 } 290 if ( g.hasAltLoc()){ 291 for (Group alt : g.getAltLocs() ) { 292 toPDB(alt,str); 293 } 294 } 295 296 } 297 298 /** Prints the content of an Atom object as a PDB formatted line. 299 * 300 * @param a 301 * @return 302 */ 303 public static String toPDB(Atom a){ 304 StringBuffer w = new StringBuffer(); 305 306 toPDB(a,w); 307 308 return w.toString(); 309 310 } 311 312 public static String toPDB(Atom a, String chainId) { 313 StringBuffer w = new StringBuffer(); 314 315 toPDB(a,w, chainId); 316 317 return w.toString(); 318 } 319 320 321 /** 322 * Convert a Chain object to PDB representation 323 * 324 * @param chain 325 * @return 326 */ 327 public static String toPDB(Chain chain){ 328 StringBuffer w = new StringBuffer(); 329 int nrGroups = chain.getAtomLength(); 330 331 for ( int h=0; h<nrGroups;h++){ 332 333 Group g= chain.getAtomGroup(h); 334 335 336 toPDB(g,w); 337 338 339 } 340 341 return w.toString(); 342 } 343 344 /** 345 * Convert a Group object to PDB representation 346 * 347 * @param g 348 * @return 349 */ 350 public static String toPDB(Group g){ 351 StringBuffer w = new StringBuffer(); 352 toPDB(g,w); 353 return w.toString(); 354 } 355 356 /** 357 * Print ATOM record in the following syntax 358 * <pre> 359 * ATOM 1 N ASP A 15 110.964 24.941 59.191 1.00 83.44 N 360 * 361 * COLUMNS DATA TYPE FIELD DEFINITION 362 * --------------------------------------------------------------------------------- 363 * 1 - 6 Record name "ATOM " 364 * 7 - 11 Integer serial Atom serial number. 365 * 13 - 16 Atom name Atom name. 366 * 17 Character altLoc Alternate location indicator. 367 * 18 - 20 Residue name resName Residue name. 368 * 22 Character chainID Chain identifier. 369 * 23 - 26 Integer resSeq Residue sequence number. 370 * 27 AChar iCode Code for insertion of residues. 371 * 31 - 38 Real(8.3) x Orthogonal coordinates for X in 372 * Angstroms. 373 * 39 - 46 Real(8.3) y Orthogonal coordinates for Y in 374 * Angstroms. 375 * 47 - 54 Real(8.3) z Orthogonal coordinates for Z in 376 * Angstroms. 377 * 55 - 60 Real(6.2) occupancy Occupancy. 378 * 61 - 66 Real(6.2) tempFactor Temperature factor. 379 * 73 - 76 LString(4) segID Segment identifier, left-justified. 380 * 77 - 78 LString(2) element Element symbol, right-justified. 381 * 79 - 80 LString(2) charge Charge on the atom. 382 * </pre> 383 * @param a 384 * @param str 385 * @param chainID the chain ID that the Atom will have in the output string 386 */ 387 public static void toPDB(Atom a, StringBuffer str, String chainID) { 388 389 Group g = a.getGroup(); 390 391 GroupType type = g.getType() ; 392 393 String record = "" ; 394 if ( type.equals(GroupType.HETATM) ) { 395 record = "HETATM"; 396 } else { 397 record = "ATOM "; 398 } 399 400 401 // format output ... 402 String resName = g.getPDBName(); 403 String pdbcode = g.getResidueNumber().toString(); 404 405 406 int seri = a.getPDBserial() ; 407 String serial = String.format("%5d",seri); 408 String fullName = formatAtomName(a); 409 410 Character altLoc = a.getAltLoc(); 411 if ( altLoc == null) 412 altLoc = ' '; 413 414 String resseq = "" ; 415 if ( hasInsertionCode(pdbcode) ) 416 resseq = String.format("%5s",pdbcode); 417 else 418 resseq = String.format("%4s",pdbcode)+" "; 419 420 String x = String.format("%8s",d3.format(a.getX())); 421 String y = String.format("%8s",d3.format(a.getY())); 422 String z = String.format("%8s",d3.format(a.getZ())); 423 String occupancy = String.format("%6s",d2.format(a.getOccupancy())) ; 424 String tempfactor = String.format("%6s",d2.format(a.getTempFactor())); 425 426 427 String leftResName = String.format("%3s",resName); 428 429 StringBuffer s = new StringBuffer(); 430 s.append(record); 431 s.append(serial); 432 s.append(" "); 433 s.append(fullName); 434 s.append(altLoc); 435 s.append(leftResName); 436 s.append(" "); 437 s.append(chainID); 438 s.append(resseq); 439 s.append(" "); 440 s.append(x); 441 s.append(y); 442 s.append(z); 443 s.append(occupancy); 444 s.append(tempfactor); 445 446 Element e = a.getElement(); 447 448 String eString = e.toString().toUpperCase(); 449 450 if ( e.equals(Element.R)) { 451 eString = "X"; 452 } 453 str.append(String.format("%-76s%2s", s.toString(),eString)); 454 str.append(newline); 455 456 } 457 458 public static void toPDB(Atom a, StringBuffer str) { 459 toPDB(a,str,a.getGroup().getChainId()); 460 } 461 462 463 /** test if pdbserial has an insertion code */ 464 private static boolean hasInsertionCode(String pdbserial) { 465 try { 466 Integer.parseInt(pdbserial) ; 467 } catch (NumberFormatException e) { 468 return true ; 469 } 470 return false ; 471 } 472 473 474 /** 475 * Convert a protein Structure to a DAS Structure XML response . 476 * @param xw a XMLWriter object 477 * @throws IOException ... 478 * 479 */ 480 public void toDASStructure(XMLWriter xw) 481 throws IOException 482 { 483 484 /*xmlns="http://www.sanger.ac.uk/xml/das/2004/06/17/dasalignment.xsd" xmlns:align="http://www.sanger.ac.uk/xml/das/2004/06/17/alignment.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema-instance" xsd:schemaLocation="http://www.sanger.ac.uk/xml/das/2004/06/17/dasalignment.xsd http://www.sanger.ac.uk/xml/das//2004/06/17/dasalignment.xsd"*/ 485 486 if ( structure == null){ 487 System.err.println("can not convert structure null"); 488 return; 489 } 490 491 PDBHeader header = structure.getPDBHeader(); 492 493 xw.openTag("object"); 494 xw.attribute("dbAccessionId",structure.getPDBCode()); 495 xw.attribute("intObjectId" ,structure.getPDBCode()); 496 // missing modification date 497 DateFormat dateFormat = new SimpleDateFormat("dd-MMM-yy",Locale.US); 498 String modificationDate = dateFormat.format(header.getModDate()); 499 xw.attribute("objectVersion",modificationDate); 500 xw.attribute("type","protein structure"); 501 xw.attribute("dbSource","PDB"); 502 xw.attribute("dbVersion","20070116"); 503 xw.attribute("dbCoordSys","PDBresnum,Protein Structure"); 504 505 // do we need object details ??? 506 xw.closeTag("object"); 507 508 509 // do for all models 510 for (int modelnr = 0;modelnr<structure.nrModels();modelnr++){ 511 512 // do for all chains: 513 for (int chainnr = 0;chainnr<structure.size(modelnr);chainnr++){ 514 Chain chain = structure.getChain(modelnr,chainnr); 515 xw.openTag("chain"); 516 xw.attribute("id",chain.getChainID()); 517 xw.attribute("SwissprotId",chain.getSwissprotId() ); 518 if (structure.nrModels()>1){ 519 xw.attribute("model",Integer.toString(modelnr+1)); 520 } 521 522 //do for all groups: 523 for (int groupnr =0; 524 groupnr<chain.getAtomLength() 525 ;groupnr++){ 526 Group gr = chain.getAtomGroup(groupnr); 527 xw.openTag("group"); 528 xw.attribute("name",gr.getPDBName()); 529 xw.attribute("type",gr.getType().toString()); 530 xw.attribute("groupID",gr.getResidueNumber().toString()); 531 532 533 // do for all atoms: 534 //Atom[] atoms = gr.getAtoms(); 535 List<Atom> atoms = gr.getAtoms(); 536 for (int atomnr=0;atomnr<atoms.size();atomnr++){ 537 Atom atom = atoms.get(atomnr); 538 xw.openTag("atom"); 539 xw.attribute("atomID",Integer.toString(atom.getPDBserial())); 540 xw.attribute("atomName",formatAtomName(atom)); 541 xw.attribute("x",Double.toString(atom.getX())); 542 xw.attribute("y",Double.toString(atom.getY())); 543 xw.attribute("z",Double.toString(atom.getZ())); 544 xw.closeTag("atom"); 545 } 546 xw.closeTag("group") ; 547 } 548 549 xw.closeTag("chain"); 550 } 551 } 552 553 554 if ( doPrintConnections() ) { 555 // do connectivity for all chains: 556 557 List<Map<String,Integer>> cons = structure.getConnections(); 558 for (int cnr = 0; cnr<cons.size();cnr++){ 559 560 561 /* 562 the HashMap for a single CONECT line contains the following fields: 563 <ul> 564 <li>atomserial (mandatory) : Atom serial number 565 <li>bond1 .. bond4 (optional): Serial number of bonded atom 566 <li>hydrogen1 .. hydrogen4 (optional):Serial number of hydrogen bonded atom 567 <li>salt1 .. salt2 (optional): Serial number of salt bridged atom 568 </ul> 569 */ 570 571 Map<String, Integer> con = cons.get(cnr); 572 Integer as = con.get("atomserial"); 573 int atomserial = as.intValue(); 574 575 576 List<Integer> atomids = new ArrayList<Integer>() ; 577 578 // test salt and hydrogen first // 579 if (con.containsKey("salt1")) atomids.add(con.get("salt1")); 580 if (con.containsKey("salt2")) atomids.add(con.get("salt2")); 581 582 if (atomids.size()!=0){ 583 addConnection(xw,"salt",atomserial,atomids); 584 atomids = new ArrayList<Integer>() ; 585 } 586 if (con.containsKey("hydrogen1")) atomids.add(con.get("hydrogen1")); 587 if (con.containsKey("hydrogen2")) atomids.add(con.get("hydrogen2")); 588 if (con.containsKey("hydrogen3")) atomids.add(con.get("hydrogen3")); 589 if (con.containsKey("hydrogen4")) atomids.add(con.get("hydrogen4")); 590 if (atomids.size()!=0){ 591 addConnection(xw,"hydrogen",atomserial,atomids); 592 atomids = new ArrayList<Integer>() ; 593 } 594 595 if (con.containsKey("bond1")) atomids.add(con.get("bond1")); 596 if (con.containsKey("bond2")) atomids.add(con.get("bond2")); 597 if (con.containsKey("bond3")) atomids.add(con.get("bond3")); 598 if (con.containsKey("bond4")) atomids.add(con.get("bond4")); 599 600 if (atomids.size()!=0){ 601 addConnection(xw,"bond",atomserial,atomids); 602 } 603 } 604 } 605 } 606 607 private void addConnection(XMLWriter xw,String connType, int atomserial, List<Integer> atomids){ 608 try{ 609 xw.openTag("connect"); 610 xw.attribute("atomSerial",Integer.toString(atomserial)); 611 xw.attribute("type",connType); 612 for (int i=0;i<atomids.size();i++){ 613 Integer atomid = atomids.get(i); 614 if ( atomid == null) 615 continue; 616 int aid = atomid.intValue(); 617 xw.openTag("atomID"); 618 xw.attribute("atomID",Integer.toString(aid)); 619 xw.closeTag("atomID"); 620 } 621 xw.closeTag("connect"); 622 } catch( Exception e) { 623 e.printStackTrace(); 624 } 625 } 626 627 private static String formatAtomName(Atom a) { 628 629 String fullName = null; 630 String name = a.getName(); 631 Element element = a.getElement(); 632 633 // RULES FOR ATOM NAME PADDING: 4 columns in total: 13, 14, 15, 16 634 635 // if length 4: nothing to do 636 if (name.length()==4) 637 fullName = name; 638 639 // if length 3: they stay at 14 640 else if (name.length()==3) 641 fullName = " "+name; 642 643 // for length 2 it depends: 644 // carbon, oxygens, nitrogens, phosphorous stay at column 14 645 // elements with 2 letters (e.g. NA, FE) will go to column 13 646 else if (name.length()==2) { 647 if (element == Element.C || element == Element.N || element == Element.O || element == Element.P || element == Element.S) 648 fullName = " "+name+" "; 649 else 650 fullName = name+" "; 651 } 652 653 // for length 1 (e.g. K but also C, O) they stay in column 14 654 else if (name.length()==1) 655 fullName = " "+name+" "; 656 657 //if (fullName.length()!=4) 658 // logger.warn("Atom name "+fullName+"to be written in PDB format does not have length 4. Formatting will be incorrect"); 659 660 return fullName; 661 } 662 663 664 public String toMMCIF() { 665 666 StringBuilder str = new StringBuilder(); 667 668 str.append(SimpleMMcifParser.MMCIF_TOP_HEADER+"BioJava_mmCIF_file"+newline); 669 670 if (structure.getPDBHeader()!=null & structure.getPDBHeader().getCrystallographicInfo()!=null && 671 structure.getPDBHeader().getCrystallographicInfo().getSpaceGroup()!=null && 672 structure.getPDBHeader().getCrystallographicInfo().getCrystalCell()!=null) { 673 674 str.append(MMCIFFileTools.toMMCIF("_cell", 675 MMCIFFileTools.convertCrystalCellToCell(structure.getPDBHeader().getCrystallographicInfo().getCrystalCell()))); 676 str.append(MMCIFFileTools.toMMCIF("_symmetry", 677 MMCIFFileTools.convertSpaceGroupToSymmetry(structure.getPDBHeader().getCrystallographicInfo().getSpaceGroup()))); 678 679 } 680 681 682 str.append(getAtomSiteHeader()); 683 684 List<AtomSite> list = MMCIFFileTools.convertStructureToAtomSites(structure); 685 686 687 str.append(MMCIFFileTools.toMMCIF(list,AtomSite.class)); 688 689 return str.toString(); 690 } 691 692 public static String toMMCIF(Chain chain, String chainId, String internalChainId, boolean writeHeader) { 693 StringBuilder str = new StringBuilder(); 694 695 if (writeHeader) 696 str.append(getAtomSiteHeader()); 697 698 699 List<AtomSite> list = MMCIFFileTools.convertChainToAtomSites(chain, 1, chainId, internalChainId); 700 701 str.append(MMCIFFileTools.toMMCIF(list,AtomSite.class)); 702 return str.toString(); 703 } 704 705 public static String toMMCIF(Chain chain, boolean writeHeader) { 706 StringBuilder sb = new StringBuilder(); 707 sb.append(SimpleMMcifParser.MMCIF_TOP_HEADER+"BioJava_mmCIF_file"+newline); 708 sb.append(toMMCIF(chain, chain.getChainID(),chain.getInternalChainID(),writeHeader)); 709 return sb.toString(); 710 } 711 712 public static String getAtomSiteHeader() { 713 String header; 714 try { 715 header = MMCIFFileTools.toLoopMmCifHeaderString("_atom_site", AtomSite.class.getName()); 716 717 } catch (ClassNotFoundException e) { 718 logger.error("Class not found, will not have a header for this MMCIF category: "+e.getMessage()); 719 header = ""; 720 } 721 722 return header; 723 } 724}