001/* 002 * This code may be freely distributed and modified under the 003 * terms of the GNU Lesser General Public Licence. This should 004 * be distributed with the code. If you do not have a copy, 005 * see: 006 * 007 * http://www.gnu.org/copyleft/lesser.html 008 * 009 * Copyright for this code is held jointly by the individual 010 * authors. These should be listed in @author doc comments. 011 * 012 * For more information on the BioJava project and its aims, 013 * or to join the biojava-l mailing list, visit the home page 014 * at: 015 * 016 * http://www.biojava.org/ 017 * 018 * Created on 26.04.2004 019 * @author Andreas Prlic 020 * 021 */ 022package org.biojava.nbio.structure.io; 023 024import java.io.IOException; 025import java.text.DateFormat; 026import java.text.DecimalFormat; 027import java.text.NumberFormat; 028import java.text.SimpleDateFormat; 029import java.util.List; 030import java.util.Locale; 031 032import org.biojava.nbio.core.util.XMLWriter; 033import org.biojava.nbio.structure.Atom; 034import org.biojava.nbio.structure.Bond; 035import org.biojava.nbio.structure.Chain; 036import org.biojava.nbio.structure.DBRef; 037import org.biojava.nbio.structure.Element; 038import org.biojava.nbio.structure.Group; 039import org.biojava.nbio.structure.GroupType; 040import org.biojava.nbio.structure.PDBHeader; 041import org.biojava.nbio.structure.Site; 042import org.biojava.nbio.structure.Structure; 043import org.biojava.nbio.structure.io.mmcif.MMCIFFileTools; 044import org.biojava.nbio.structure.io.mmcif.SimpleMMcifParser; 045import org.biojava.nbio.structure.io.mmcif.model.AtomSite; 046import org.slf4j.Logger; 047import org.slf4j.LoggerFactory; 048 049 050/** 051 * Methods to convert a structure object into different file formats. 052 * @author Andreas Prlic 053 * @since 1.4 054 */ 055public class FileConvert { 056 057 private static final Logger logger = LoggerFactory.getLogger(FileConvert.class); 058 059 060 061 private Structure structure ; 062 063 private boolean printConnections; 064 065 // Locale should be english, e.g. in DE separator is "," -> PDB files have "." ! 066 public static DecimalFormat d3 = (DecimalFormat)NumberFormat.getInstance(Locale.US); 067 static { 068 d3.setMaximumIntegerDigits(4); 069 d3.setMinimumFractionDigits(3); 070 d3.setMaximumFractionDigits(3); 071 d3.setGroupingUsed(false); 072 } 073 public static DecimalFormat d2 = (DecimalFormat)NumberFormat.getInstance(Locale.US); 074 static { 075 d2.setMaximumIntegerDigits(3); 076 d2.setMinimumFractionDigits(2); 077 d2.setMaximumFractionDigits(2); 078 d2.setGroupingUsed(false); 079 } 080 081 private static final String newline = System.getProperty("line.separator"); 082 083 /** 084 * Constructs a FileConvert object. 085 * 086 * @param struc a Structure object 087 */ 088 public FileConvert(Structure struc) { 089 structure = struc ; 090 printConnections = true; 091 } 092 093 /** 094 * Returns if the Connections should be added 095 * default is true; 096 * @return if the printConnections flag is set 097 */ 098 public boolean doPrintConnections() { 099 return printConnections; 100 } 101 102 /** enable/disable printing of connections 103 * connections are sometimes buggy in PDB files 104 * so there are some cases where one might turn this off. 105 * @param printConnections 106 */ 107 public void setPrintConnections(boolean printConnections) { 108 this.printConnections = printConnections; 109 } 110 111 /** 112 * Prints the connections in PDB style 113 * 114 * Rewritten since 5.0 to use {@link Bond}s 115 * Will produce strictly one CONECT record per bond (won't group several bonds in one line) 116 */ 117 private String printPDBConnections(){ 118 119 StringBuilder str = new StringBuilder(); 120 121 for (Chain c:structure.getChains()) { 122 for (Group g:c.getAtomGroups()) { 123 for (Atom a:g.getAtoms()) { 124 if (a.getBonds()!=null) { 125 for (Bond b:a.getBonds()) { //7890123456789012345678901234567890123456789012345678901234567890 126 str.append(String.format("CONECT%5d%5d "+newline, b.getAtomA().getPDBserial(), b.getAtomB().getPDBserial())); 127 } 128 } 129 } 130 } 131 } 132 133 return str.toString(); 134 } 135 136 /** Convert a structure into a PDB file. 137 * @return a String representing a PDB file. 138 */ 139 public String toPDB() { 140 141 142 StringBuffer str = new StringBuffer(); 143 //int i = 0 ; 144 145 146 147 // TODO: print all the PDB header informaton in PDB style 148 // some objects (PDBHeader, Compound) are still missing 149 // 150 151 PDBHeader header = structure.getPDBHeader(); 152 header.toPDB(str); 153 154 155 //REMARK 800 156 if (!structure.getSites().isEmpty()) { 157 str.append("REMARK 800 ").append(newline); 158 str.append("REMARK 800 SITE ").append(newline); 159 for (Site site : structure.getSites()) { 160 site.remark800toPDB(str); 161 } 162 } 163 //DBREF 164 for (DBRef dbref : structure.getDBRefs()){ 165 dbref.toPDB(str); 166 str.append(newline); 167 } 168 //SSBOND 169 List<SSBondImpl> ssbonds = SSBondImpl.getSsBondListFromBondList(structure.getSSBonds()); 170 for (SSBondImpl ssbond : ssbonds){ 171 ssbond.toPDB(str); 172 str.append(newline); 173 } 174 //SITE 175 for (Site site : structure.getSites()) { 176 try { 177 site.toPDB(str); 178 } catch (Exception e){ 179 e.printStackTrace(); 180 } 181 } 182 183 // 184 // print the atom records 185 // 186 187 // do for all models 188 int nrModels = structure.nrModels() ; 189 if ( structure.isNmr()) { 190 str.append("EXPDTA NMR, "+ nrModels+" STRUCTURES"+newline) ; 191 } 192 for (int m = 0 ; m < nrModels ; m++) { 193 194 195 if ( nrModels>1 ) { 196 str.append("MODEL " + (m+1)+ newline); 197 } 198 199 List<Chain> polyChains = structure.getPolyChains(m); 200 List<Chain> nonPolyChains = structure.getNonPolyChains(m); 201 List<Chain> waterChains = structure.getWaterChains(m); 202 203 for (Chain chain : polyChains) { 204 205 // do for all groups 206 int nrGroups = chain.getAtomLength(); 207 for ( int h=0; h<nrGroups;h++){ 208 209 Group g= chain.getAtomGroup(h); 210 211 toPDB(g,str); 212 213 } 214 // End any polymeric chain with a "TER" record 215 if (nrGroups > 0) str.append(String.format("%-80s","TER")).append(newline); 216 217 } 218 219 boolean nonPolyGroupsExist = false; 220 for (Chain chain : nonPolyChains) { 221 222 // do for all groups 223 int nrGroups = chain.getAtomLength(); 224 for ( int h=0; h<nrGroups;h++){ 225 226 Group g= chain.getAtomGroup(h); 227 228 toPDB(g,str); 229 230 nonPolyGroupsExist = true; 231 } 232 233 } 234 if (nonPolyGroupsExist) str.append(String.format("%-80s","TER")).append(newline);; 235 236 boolean waterGroupsExist = false; 237 for (Chain chain : waterChains) { 238 239 // do for all groups 240 int nrGroups = chain.getAtomLength(); 241 for ( int h=0; h<nrGroups;h++){ 242 243 Group g= chain.getAtomGroup(h); 244 245 toPDB(g,str); 246 247 waterGroupsExist = true; 248 } 249 250 } 251 if (waterGroupsExist) str.append(String.format("%-80s","TER")).append(newline);; 252 253 254 if ( nrModels>1) { 255 str.append(String.format("%-80s","ENDMDL")).append(newline); 256 } 257 258 259 260 } 261 262 if ( doPrintConnections() ) 263 str.append(printPDBConnections()); 264 265 return str.toString() ; 266 } 267 268 private static void toPDB(Group g, StringBuffer str) { 269 // iterate over all atoms ... 270 // format output ... 271 int groupsize = g.size(); 272 273 for ( int atompos = 0 ; atompos < groupsize; atompos++) { 274 Atom a = null ; 275 276 a = g.getAtom(atompos); 277 if ( a == null) 278 continue ; 279 280 toPDB(a, str); 281 282 283 //line = record + serial + " " + fullname +altLoc 284 //+ leftResName + " " + chainID + resseq 285 //+ " " + x+y+z 286 //+ occupancy + tempfactor; 287 //str.append(line + newline); 288 //System.out.println(line); 289 } 290 if ( g.hasAltLoc()){ 291 for (Group alt : g.getAltLocs() ) { 292 toPDB(alt,str); 293 } 294 } 295 296 } 297 298 /** Prints the content of an Atom object as a PDB formatted line. 299 * 300 * @param a 301 * @return 302 */ 303 public static String toPDB(Atom a){ 304 StringBuffer w = new StringBuffer(); 305 306 toPDB(a,w); 307 308 return w.toString(); 309 310 } 311 312 public static String toPDB(Atom a, String chainId) { 313 StringBuffer w = new StringBuffer(); 314 315 toPDB(a,w, chainId); 316 317 return w.toString(); 318 } 319 320 321 /** 322 * Convert a Chain object to PDB representation 323 * 324 * @param chain 325 * @return 326 */ 327 public static String toPDB(Chain chain){ 328 StringBuffer w = new StringBuffer(); 329 int nrGroups = chain.getAtomLength(); 330 331 for ( int h=0; h<nrGroups;h++){ 332 333 Group g= chain.getAtomGroup(h); 334 335 336 toPDB(g,w); 337 338 339 } 340 341 return w.toString(); 342 } 343 344 /** 345 * Convert a Group object to PDB representation 346 * 347 * @param g 348 * @return 349 */ 350 public static String toPDB(Group g){ 351 StringBuffer w = new StringBuffer(); 352 toPDB(g,w); 353 return w.toString(); 354 } 355 356 /** 357 * Print ATOM record in the following syntax 358 * <pre> 359 * ATOM 1 N ASP A 15 110.964 24.941 59.191 1.00 83.44 N 360 * 361 * COLUMNS DATA TYPE FIELD DEFINITION 362 * --------------------------------------------------------------------------------- 363 * 1 - 6 Record name "ATOM " 364 * 7 - 11 Integer serial Atom serial number. 365 * 13 - 16 Atom name Atom name. 366 * 17 Character altLoc Alternate location indicator. 367 * 18 - 20 Residue name resName Residue name. 368 * 22 Character chainID Chain identifier. 369 * 23 - 26 Integer resSeq Residue sequence number. 370 * 27 AChar iCode Code for insertion of residues. 371 * 31 - 38 Real(8.3) x Orthogonal coordinates for X in 372 * Angstroms. 373 * 39 - 46 Real(8.3) y Orthogonal coordinates for Y in 374 * Angstroms. 375 * 47 - 54 Real(8.3) z Orthogonal coordinates for Z in 376 * Angstroms. 377 * 55 - 60 Real(6.2) occupancy Occupancy. 378 * 61 - 66 Real(6.2) tempFactor Temperature factor. 379 * 73 - 76 LString(4) segID Segment identifier, left-justified. 380 * 77 - 78 LString(2) element Element symbol, right-justified. 381 * 79 - 80 LString(2) charge Charge on the atom. 382 * </pre> 383 * @param a 384 * @param str 385 * @param chainID the chain ID that the Atom will have in the output string 386 */ 387 public static void toPDB(Atom a, StringBuffer str, String chainID) { 388 389 Group g = a.getGroup(); 390 391 GroupType type = g.getType() ; 392 393 String record = "" ; 394 if ( type.equals(GroupType.HETATM) ) { 395 record = "HETATM"; 396 } else { 397 record = "ATOM "; 398 } 399 400 401 // format output ... 402 String resName = g.getPDBName(); 403 String pdbcode = g.getResidueNumber().toString(); 404 405 406 int seri = a.getPDBserial() ; 407 String serial = String.format("%5d",seri); 408 String fullName = formatAtomName(a); 409 410 Character altLoc = a.getAltLoc(); 411 if ( altLoc == null) 412 altLoc = ' '; 413 414 String resseq = "" ; 415 if ( hasInsertionCode(pdbcode) ) 416 resseq = String.format("%5s",pdbcode); 417 else 418 resseq = String.format("%4s",pdbcode)+" "; 419 420 String x = String.format("%8s",d3.format(a.getX())); 421 String y = String.format("%8s",d3.format(a.getY())); 422 String z = String.format("%8s",d3.format(a.getZ())); 423 String occupancy = String.format("%6s",d2.format(a.getOccupancy())) ; 424 String tempfactor = String.format("%6s",d2.format(a.getTempFactor())); 425 426 427 String leftResName = String.format("%3s",resName); 428 429 StringBuffer s = new StringBuffer(); 430 s.append(record); 431 s.append(serial); 432 s.append(" "); 433 s.append(fullName); 434 s.append(altLoc); 435 s.append(leftResName); 436 s.append(" "); 437 s.append(chainID); 438 s.append(resseq); 439 s.append(" "); 440 s.append(x); 441 s.append(y); 442 s.append(z); 443 s.append(occupancy); 444 s.append(tempfactor); 445 446 Element e = a.getElement(); 447 448 String eString = e.toString().toUpperCase(); 449 450 if ( e.equals(Element.R)) { 451 eString = "X"; 452 } 453 str.append(String.format("%-76s%2s", s.toString(),eString)); 454 str.append(newline); 455 456 } 457 458 public static void toPDB(Atom a, StringBuffer str) { 459 toPDB(a,str,a.getGroup().getChain().getName()); 460 } 461 462 463 /** test if pdbserial has an insertion code */ 464 private static boolean hasInsertionCode(String pdbserial) { 465 try { 466 Integer.parseInt(pdbserial) ; 467 } catch (NumberFormatException e) { 468 return true ; 469 } 470 return false ; 471 } 472 473 474 /** 475 * Convert a protein Structure to a DAS Structure XML response . 476 * Since 5.0, bond (CONECT records) information is not supported anymore. 477 * @param xw a XMLWriter object 478 * @throws IOException ... 479 * 480 */ 481 public void toDASStructure(XMLWriter xw) 482 throws IOException 483 { 484 485 /*xmlns="http://www.sanger.ac.uk/xml/das/2004/06/17/dasalignment.xsd" xmlns:align="http://www.sanger.ac.uk/xml/das/2004/06/17/alignment.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema-instance" xsd:schemaLocation="http://www.sanger.ac.uk/xml/das/2004/06/17/dasalignment.xsd http://www.sanger.ac.uk/xml/das//2004/06/17/dasalignment.xsd"*/ 486 487 if ( structure == null){ 488 System.err.println("can not convert structure null"); 489 return; 490 } 491 492 PDBHeader header = structure.getPDBHeader(); 493 494 xw.openTag("object"); 495 xw.attribute("dbAccessionId",structure.getPDBCode()); 496 xw.attribute("intObjectId" ,structure.getPDBCode()); 497 // missing modification date 498 DateFormat dateFormat = new SimpleDateFormat("dd-MMM-yy",Locale.US); 499 String modificationDate = dateFormat.format(header.getModDate()); 500 xw.attribute("objectVersion",modificationDate); 501 xw.attribute("type","protein structure"); 502 xw.attribute("dbSource","PDB"); 503 xw.attribute("dbVersion","20070116"); 504 xw.attribute("dbCoordSys","PDBresnum,Protein Structure"); 505 506 // do we need object details ??? 507 xw.closeTag("object"); 508 509 510 // do for all models 511 for (int modelnr = 0;modelnr<structure.nrModels();modelnr++){ 512 513 // do for all chains: 514 for (int chainnr = 0;chainnr<structure.size(modelnr);chainnr++){ 515 Chain chain = structure.getChainByIndex(modelnr,chainnr); 516 xw.openTag("chain"); 517 xw.attribute("id",chain.getId()); 518 xw.attribute("SwissprotId",chain.getSwissprotId() ); 519 if (structure.nrModels()>1){ 520 xw.attribute("model",Integer.toString(modelnr+1)); 521 } 522 523 //do for all groups: 524 for (int groupnr =0; 525 groupnr<chain.getAtomLength() 526 ;groupnr++){ 527 Group gr = chain.getAtomGroup(groupnr); 528 xw.openTag("group"); 529 xw.attribute("name",gr.getPDBName()); 530 xw.attribute("type",gr.getType().toString()); 531 xw.attribute("groupID",gr.getResidueNumber().toString()); 532 533 534 // do for all atoms: 535 //Atom[] atoms = gr.getAtoms(); 536 List<Atom> atoms = gr.getAtoms(); 537 for (int atomnr=0;atomnr<atoms.size();atomnr++){ 538 Atom atom = atoms.get(atomnr); 539 xw.openTag("atom"); 540 xw.attribute("atomID",Integer.toString(atom.getPDBserial())); 541 xw.attribute("atomName",formatAtomName(atom)); 542 xw.attribute("x",Double.toString(atom.getX())); 543 xw.attribute("y",Double.toString(atom.getY())); 544 xw.attribute("z",Double.toString(atom.getZ())); 545 xw.closeTag("atom"); 546 } 547 xw.closeTag("group") ; 548 } 549 550 xw.closeTag("chain"); 551 } 552 } 553 554 555 if ( doPrintConnections() ) { 556 // not supported anymore since 5.0 557 } 558 } 559 560 private static String formatAtomName(Atom a) { 561 562 String fullName = null; 563 String name = a.getName(); 564 Element element = a.getElement(); 565 566 // RULES FOR ATOM NAME PADDING: 4 columns in total: 13, 14, 15, 16 567 568 // if length 4: nothing to do 569 if (name.length()==4) 570 fullName = name; 571 572 // if length 3: they stay at 14 573 else if (name.length()==3) 574 fullName = " "+name; 575 576 // for length 2 it depends: 577 // carbon, oxygens, nitrogens, phosphorous stay at column 14 578 // elements with 2 letters (e.g. NA, FE) will go to column 13 579 else if (name.length()==2) { 580 if (element == Element.C || element == Element.N || element == Element.O || element == Element.P || element == Element.S) 581 fullName = " "+name+" "; 582 else 583 fullName = name+" "; 584 } 585 586 // for length 1 (e.g. K but also C, O) they stay in column 14 587 else if (name.length()==1) 588 fullName = " "+name+" "; 589 590 //if (fullName.length()!=4) 591 // logger.warn("Atom name "+fullName+"to be written in PDB format does not have length 4. Formatting will be incorrect"); 592 593 return fullName; 594 } 595 596 597 public String toMMCIF() { 598 599 StringBuilder str = new StringBuilder(); 600 601 str.append(SimpleMMcifParser.MMCIF_TOP_HEADER+"BioJava_mmCIF_file"+newline); 602 603 if (structure.getPDBHeader()!=null && structure.getPDBHeader().getCrystallographicInfo()!=null && 604 structure.getPDBHeader().getCrystallographicInfo().getSpaceGroup()!=null && 605 structure.getPDBHeader().getCrystallographicInfo().getCrystalCell()!=null) { 606 607 str.append(MMCIFFileTools.toMMCIF("_cell", 608 MMCIFFileTools.convertCrystalCellToCell(structure.getPDBHeader().getCrystallographicInfo().getCrystalCell()))); 609 str.append(MMCIFFileTools.toMMCIF("_symmetry", 610 MMCIFFileTools.convertSpaceGroupToSymmetry(structure.getPDBHeader().getCrystallographicInfo().getSpaceGroup()))); 611 612 } 613 614 615 str.append(getAtomSiteHeader()); 616 617 List<AtomSite> list = MMCIFFileTools.convertStructureToAtomSites(structure); 618 619 620 str.append(MMCIFFileTools.toMMCIF(list,AtomSite.class)); 621 622 return str.toString(); 623 } 624 625 public static String toMMCIF(Chain chain, String authId, String asymId, boolean writeHeader) { 626 StringBuilder str = new StringBuilder(); 627 628 if (writeHeader) 629 str.append(getAtomSiteHeader()); 630 631 632 List<AtomSite> list = MMCIFFileTools.convertChainToAtomSites(chain, 1, authId, asymId); 633 634 str.append(MMCIFFileTools.toMMCIF(list,AtomSite.class)); 635 return str.toString(); 636 } 637 638 public static String toMMCIF(Chain chain, boolean writeHeader) { 639 StringBuilder sb = new StringBuilder(); 640 sb.append(SimpleMMcifParser.MMCIF_TOP_HEADER+"BioJava_mmCIF_file"+newline); 641 sb.append(toMMCIF(chain, chain.getName(), chain.getId(),writeHeader)); 642 return sb.toString(); 643 } 644 645 public static String getAtomSiteHeader() { 646 String header; 647 try { 648 header = MMCIFFileTools.toLoopMmCifHeaderString("_atom_site", AtomSite.class.getName()); 649 650 } catch (ClassNotFoundException e) { 651 logger.error("Class not found, will not have a header for this MMCIF category: "+e.getMessage()); 652 header = ""; 653 } 654 655 return header; 656 } 657}