001/* 002 * This code may be freely distributed and modified under the 003 * terms of the GNU Lesser General Public Licence. This should 004 * be distributed with the code. If you do not have a copy, 005 * see: 006 * 007 * http://www.gnu.org/copyleft/lesser.html 008 * 009 * Copyright for this code is held jointly by the individual 010 * authors. These should be listed in @author doc comments. 011 * 012 * For more information on the BioJava project and its aims, 013 * or to join the biojava-l mailing list, visit the home page 014 * at: 015 * 016 * http://www.biojava.org/ 017 * 018 * Created on 26.04.2004 019 * @author Andreas Prlic 020 * 021 */ 022package org.biojava.nbio.structure.io; 023 024import java.io.IOException; 025import java.text.DateFormat; 026import java.text.DecimalFormat; 027import java.text.NumberFormat; 028import java.text.SimpleDateFormat; 029import java.util.List; 030import java.util.Locale; 031 032import org.biojava.nbio.core.util.XMLWriter; 033import org.biojava.nbio.structure.Atom; 034import org.biojava.nbio.structure.Bond; 035import org.biojava.nbio.structure.Chain; 036import org.biojava.nbio.structure.DBRef; 037import org.biojava.nbio.structure.Element; 038import org.biojava.nbio.structure.Group; 039import org.biojava.nbio.structure.GroupType; 040import org.biojava.nbio.structure.PDBHeader; 041import org.biojava.nbio.structure.Site; 042import org.biojava.nbio.structure.Structure; 043import org.biojava.nbio.structure.io.cif.CifStructureConverter; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047 048/** 049 * Methods to convert a structure object into different file formats. 050 * @author Andreas Prlic 051 * @since 1.4 052 */ 053public class FileConvert { 054 055 private static final Logger logger = LoggerFactory.getLogger(FileConvert.class); 056 057 058 059 private Structure structure ; 060 061 private boolean printConnections; 062 063 // Locale should be english, e.g. in DE separator is "," -> PDB files have "." ! 064 public static DecimalFormat d3 = (DecimalFormat)NumberFormat.getInstance(Locale.US); 065 static { 066 d3.setMaximumIntegerDigits(4); 067 d3.setMinimumFractionDigits(3); 068 d3.setMaximumFractionDigits(3); 069 d3.setGroupingUsed(false); 070 } 071 public static DecimalFormat d2 = (DecimalFormat)NumberFormat.getInstance(Locale.US); 072 static { 073 d2.setMaximumIntegerDigits(3); 074 d2.setMinimumFractionDigits(2); 075 d2.setMaximumFractionDigits(2); 076 d2.setGroupingUsed(false); 077 } 078 079 private static final String newline = System.getProperty("line.separator"); 080 081 /** 082 * Constructs a FileConvert object. 083 * 084 * @param struc a Structure object 085 */ 086 public FileConvert(Structure struc) { 087 structure = struc ; 088 printConnections = true; 089 } 090 091 /** 092 * Returns if the Connections should be added 093 * default is true; 094 * @return if the printConnections flag is set 095 */ 096 public boolean doPrintConnections() { 097 return printConnections; 098 } 099 100 /** enable/disable printing of connections 101 * connections are sometimes buggy in PDB files 102 * so there are some cases where one might turn this off. 103 * @param printConnections 104 */ 105 public void setPrintConnections(boolean printConnections) { 106 this.printConnections = printConnections; 107 } 108 109 /** 110 * Prints the connections in PDB style 111 * 112 * Rewritten since 5.0 to use {@link Bond}s 113 * Will produce strictly one CONECT record per bond (won't group several bonds in one line) 114 */ 115 private String printPDBConnections(){ 116 117 StringBuilder str = new StringBuilder(); 118 119 for (Chain c:structure.getChains()) { 120 for (Group g:c.getAtomGroups()) { 121 for (Atom a:g.getAtoms()) { 122 if (a.getBonds()!=null) { 123 for (Bond b:a.getBonds()) { //7890123456789012345678901234567890123456789012345678901234567890 124 str.append(String.format("CONECT%5d%5d "+newline, b.getAtomA().getPDBserial(), b.getAtomB().getPDBserial())); 125 } 126 } 127 } 128 } 129 } 130 131 return str.toString(); 132 } 133 134 /** Convert a structure into a PDB file. 135 * @return a String representing a PDB file. 136 */ 137 public String toPDB() { 138 139 140 StringBuffer str = new StringBuffer(); 141 //int i = 0 ; 142 143 144 145 // TODO: print all the PDB header informaton in PDB style 146 // some objects (PDBHeader, Compound) are still missing 147 // 148 149 PDBHeader header = structure.getPDBHeader(); 150 header.toPDB(str); 151 152 153 //REMARK 800 154 if (!structure.getSites().isEmpty()) { 155 str.append("REMARK 800 ").append(newline); 156 str.append("REMARK 800 SITE ").append(newline); 157 for (Site site : structure.getSites()) { 158 site.remark800toPDB(str); 159 } 160 } 161 //DBREF 162 for (DBRef dbref : structure.getDBRefs()){ 163 dbref.toPDB(str); 164 str.append(newline); 165 } 166 //SSBOND 167 List<SSBondImpl> ssbonds = SSBondImpl.getSsBondListFromBondList(structure.getSSBonds()); 168 for (SSBondImpl ssbond : ssbonds){ 169 ssbond.toPDB(str); 170 str.append(newline); 171 } 172 //SITE 173 for (Site site : structure.getSites()) { 174 try { 175 site.toPDB(str); 176 } catch (Exception e){ 177 e.printStackTrace(); 178 } 179 } 180 181 // 182 // print the atom records 183 // 184 185 // do for all models 186 int nrModels = structure.nrModels() ; 187 if ( structure.isNmr()) { 188 str.append("EXPDTA NMR, "+ nrModels+" STRUCTURES"+newline) ; 189 } 190 for (int m = 0 ; m < nrModels ; m++) { 191 192 193 if ( nrModels>1 ) { 194 str.append("MODEL " + (m+1)+ newline); 195 } 196 197 List<Chain> polyChains = structure.getPolyChains(m); 198 List<Chain> nonPolyChains = structure.getNonPolyChains(m); 199 List<Chain> waterChains = structure.getWaterChains(m); 200 201 for (Chain chain : polyChains) { 202 203 // do for all groups 204 int nrGroups = chain.getAtomLength(); 205 for ( int h=0; h<nrGroups;h++){ 206 207 Group g= chain.getAtomGroup(h); 208 209 toPDB(g,str); 210 211 } 212 // End any polymeric chain with a "TER" record 213 if (nrGroups > 0) str.append(String.format("%-80s","TER")).append(newline); 214 215 } 216 217 boolean nonPolyGroupsExist = false; 218 for (Chain chain : nonPolyChains) { 219 220 // do for all groups 221 int nrGroups = chain.getAtomLength(); 222 for ( int h=0; h<nrGroups;h++){ 223 224 Group g= chain.getAtomGroup(h); 225 226 toPDB(g,str); 227 228 nonPolyGroupsExist = true; 229 } 230 231 } 232 if (nonPolyGroupsExist) str.append(String.format("%-80s","TER")).append(newline);; 233 234 boolean waterGroupsExist = false; 235 for (Chain chain : waterChains) { 236 237 // do for all groups 238 int nrGroups = chain.getAtomLength(); 239 for ( int h=0; h<nrGroups;h++){ 240 241 Group g= chain.getAtomGroup(h); 242 243 toPDB(g,str); 244 245 waterGroupsExist = true; 246 } 247 248 } 249 if (waterGroupsExist) str.append(String.format("%-80s","TER")).append(newline);; 250 251 252 if ( nrModels>1) { 253 str.append(String.format("%-80s","ENDMDL")).append(newline); 254 } 255 256 257 258 } 259 260 if ( doPrintConnections() ) 261 str.append(printPDBConnections()); 262 263 return str.toString() ; 264 } 265 266 private static void toPDB(Group g, StringBuffer str) { 267 // iterate over all atoms ... 268 // format output ... 269 int groupsize = g.size(); 270 271 for ( int atompos = 0 ; atompos < groupsize; atompos++) { 272 Atom a = null ; 273 274 a = g.getAtom(atompos); 275 if ( a == null) 276 continue ; 277 278 toPDB(a, str); 279 280 281 //line = record + serial + " " + fullname +altLoc 282 //+ leftResName + " " + chainID + resseq 283 //+ " " + x+y+z 284 //+ occupancy + tempfactor; 285 //str.append(line + newline); 286 //System.out.println(line); 287 } 288 if ( g.hasAltLoc()){ 289 for (Group alt : g.getAltLocs() ) { 290 toPDB(alt,str); 291 } 292 } 293 294 } 295 296 /** Prints the content of an Atom object as a PDB formatted line. 297 * 298 * @param a 299 * @return 300 */ 301 public static String toPDB(Atom a){ 302 StringBuffer w = new StringBuffer(); 303 304 toPDB(a,w); 305 306 return w.toString(); 307 308 } 309 310 public static String toPDB(Atom a, String chainId) { 311 StringBuffer w = new StringBuffer(); 312 313 toPDB(a,w, chainId); 314 315 return w.toString(); 316 } 317 318 319 /** 320 * Convert a Chain object to PDB representation 321 * 322 * @param chain 323 * @return 324 */ 325 public static String toPDB(Chain chain){ 326 StringBuffer w = new StringBuffer(); 327 int nrGroups = chain.getAtomLength(); 328 329 for ( int h=0; h<nrGroups;h++){ 330 331 Group g= chain.getAtomGroup(h); 332 333 334 toPDB(g,w); 335 336 337 } 338 339 return w.toString(); 340 } 341 342 /** 343 * Convert a Group object to PDB representation 344 * 345 * @param g 346 * @return 347 */ 348 public static String toPDB(Group g){ 349 StringBuffer w = new StringBuffer(); 350 toPDB(g,w); 351 return w.toString(); 352 } 353 354 /** 355 * Print ATOM record in the following syntax 356 * <pre> 357 * ATOM 1 N ASP A 15 110.964 24.941 59.191 1.00 83.44 N 358 * 359 * COLUMNS DATA TYPE FIELD DEFINITION 360 * --------------------------------------------------------------------------------- 361 * 1 - 6 Record name "ATOM " 362 * 7 - 11 Integer serial Atom serial number. 363 * 13 - 16 Atom name Atom name. 364 * 17 Character altLoc Alternate location indicator. 365 * 18 - 20 Residue name resName Residue name. 366 * 22 Character chainID Chain identifier. 367 * 23 - 26 Integer resSeq Residue sequence number. 368 * 27 AChar iCode Code for insertion of residues. 369 * 31 - 38 Real(8.3) x Orthogonal coordinates for X in 370 * Angstroms. 371 * 39 - 46 Real(8.3) y Orthogonal coordinates for Y in 372 * Angstroms. 373 * 47 - 54 Real(8.3) z Orthogonal coordinates for Z in 374 * Angstroms. 375 * 55 - 60 Real(6.2) occupancy Occupancy. 376 * 61 - 66 Real(6.2) tempFactor Temperature factor. 377 * 73 - 76 LString(4) segID Segment identifier, left-justified. 378 * 77 - 78 LString(2) element Element symbol, right-justified. 379 * 79 - 80 LString(2) charge Charge on the atom. 380 * </pre> 381 * @param a 382 * @param str 383 * @param chainID the chain ID that the Atom will have in the output string 384 */ 385 public static void toPDB(Atom a, StringBuffer str, String chainID) { 386 387 Group g = a.getGroup(); 388 389 GroupType type = g.getType() ; 390 391 String record = "" ; 392 if ( type.equals(GroupType.HETATM) ) { 393 record = "HETATM"; 394 } else { 395 record = "ATOM "; 396 } 397 398 399 // format output ... 400 String resName = g.getPDBName(); 401 String pdbcode = g.getResidueNumber().toString(); 402 403 404 int seri = a.getPDBserial() ; 405 String serial = String.format("%5d",seri); 406 String fullName = formatAtomName(a); 407 408 Character altLoc = a.getAltLoc(); 409 if ( altLoc == null) 410 altLoc = ' '; 411 412 String resseq = "" ; 413 if ( hasInsertionCode(pdbcode) ) 414 resseq = String.format("%5s",pdbcode); 415 else 416 resseq = String.format("%4s",pdbcode)+" "; 417 418 String x = String.format("%8s",d3.format(a.getX())); 419 String y = String.format("%8s",d3.format(a.getY())); 420 String z = String.format("%8s",d3.format(a.getZ())); 421 String occupancy = String.format("%6s",d2.format(a.getOccupancy())) ; 422 String tempfactor = String.format("%6s",d2.format(a.getTempFactor())); 423 424 425 String leftResName = String.format("%3s",resName); 426 427 StringBuffer s = new StringBuffer(); 428 s.append(record); 429 s.append(serial); 430 s.append(" "); 431 s.append(fullName); 432 s.append(altLoc); 433 s.append(leftResName); 434 s.append(" "); 435 s.append(chainID); 436 s.append(resseq); 437 s.append(" "); 438 s.append(x); 439 s.append(y); 440 s.append(z); 441 s.append(occupancy); 442 s.append(tempfactor); 443 444 Element e = a.getElement(); 445 446 String eString = e.toString().toUpperCase(); 447 448 if ( e.equals(Element.R)) { 449 eString = "X"; 450 } 451 str.append(String.format("%-76s%2s", s.toString(),eString)); 452 str.append(newline); 453 454 } 455 456 public static void toPDB(Atom a, StringBuffer str) { 457 toPDB(a,str,a.getGroup().getChain().getName()); 458 } 459 460 461 /** test if pdbserial has an insertion code */ 462 private static boolean hasInsertionCode(String pdbserial) { 463 try { 464 Integer.parseInt(pdbserial) ; 465 } catch (NumberFormatException e) { 466 return true ; 467 } 468 return false ; 469 } 470 471 472 /** 473 * Convert a protein Structure to a DAS Structure XML response . 474 * Since 5.0, bond (CONECT records) information is not supported anymore. 475 * @param xw a XMLWriter object 476 * @throws IOException ... 477 * 478 */ 479 public void toDASStructure(XMLWriter xw) 480 throws IOException 481 { 482 483 /*xmlns="http://www.sanger.ac.uk/xml/das/2004/06/17/dasalignment.xsd" xmlns:align="http://www.sanger.ac.uk/xml/das/2004/06/17/alignment.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema-instance" xsd:schemaLocation="http://www.sanger.ac.uk/xml/das/2004/06/17/dasalignment.xsd http://www.sanger.ac.uk/xml/das//2004/06/17/dasalignment.xsd"*/ 484 485 if ( structure == null){ 486 System.err.println("can not convert structure null"); 487 return; 488 } 489 490 PDBHeader header = structure.getPDBHeader(); 491 492 xw.openTag("object"); 493 xw.attribute("dbAccessionId",structure.getPDBCode()); 494 xw.attribute("intObjectId" ,structure.getPDBCode()); 495 // missing modification date 496 DateFormat dateFormat = new SimpleDateFormat("dd-MMM-yy",Locale.US); 497 String modificationDate = dateFormat.format(header.getModDate()); 498 xw.attribute("objectVersion",modificationDate); 499 xw.attribute("type","protein structure"); 500 xw.attribute("dbSource","PDB"); 501 xw.attribute("dbVersion","20070116"); 502 xw.attribute("dbCoordSys","PDBresnum,Protein Structure"); 503 504 // do we need object details ??? 505 xw.closeTag("object"); 506 507 508 // do for all models 509 for (int modelnr = 0;modelnr<structure.nrModels();modelnr++){ 510 511 // do for all chains: 512 for (int chainnr = 0;chainnr<structure.size(modelnr);chainnr++){ 513 Chain chain = structure.getChainByIndex(modelnr,chainnr); 514 xw.openTag("chain"); 515 xw.attribute("id",chain.getId()); 516 if (structure.nrModels()>1){ 517 xw.attribute("model",Integer.toString(modelnr+1)); 518 } 519 520 //do for all groups: 521 for (int groupnr =0; 522 groupnr<chain.getAtomLength() 523 ;groupnr++){ 524 Group gr = chain.getAtomGroup(groupnr); 525 xw.openTag("group"); 526 xw.attribute("name",gr.getPDBName()); 527 xw.attribute("type",gr.getType().toString()); 528 xw.attribute("groupID",gr.getResidueNumber().toString()); 529 530 531 // do for all atoms: 532 //Atom[] atoms = gr.getAtoms(); 533 List<Atom> atoms = gr.getAtoms(); 534 for (int atomnr=0;atomnr<atoms.size();atomnr++){ 535 Atom atom = atoms.get(atomnr); 536 xw.openTag("atom"); 537 xw.attribute("atomID",Integer.toString(atom.getPDBserial())); 538 xw.attribute("atomName",formatAtomName(atom)); 539 xw.attribute("x",Double.toString(atom.getX())); 540 xw.attribute("y",Double.toString(atom.getY())); 541 xw.attribute("z",Double.toString(atom.getZ())); 542 xw.closeTag("atom"); 543 } 544 xw.closeTag("group") ; 545 } 546 547 xw.closeTag("chain"); 548 } 549 } 550 551 552 if ( doPrintConnections() ) { 553 // not supported anymore since 5.0 554 } 555 } 556 557 private static String formatAtomName(Atom a) { 558 559 String fullName = null; 560 String name = a.getName(); 561 Element element = a.getElement(); 562 563 // RULES FOR ATOM NAME PADDING: 4 columns in total: 13, 14, 15, 16 564 565 // if length 4: nothing to do 566 if (name.length()==4) 567 fullName = name; 568 569 // if length 3: they stay at 14 570 else if (name.length()==3) 571 fullName = " "+name; 572 573 // for length 2 it depends: 574 // carbon, oxygens, nitrogens, phosphorous stay at column 14 575 // elements with 2 letters (e.g. NA, FE) will go to column 13 576 else if (name.length()==2) { 577 if (element == Element.C || element == Element.N || element == Element.O || element == Element.P || element == Element.S) 578 fullName = " "+name+" "; 579 else 580 fullName = name+" "; 581 } 582 583 // for length 1 (e.g. K but also C, O) they stay in column 14 584 else if (name.length()==1) 585 fullName = " "+name+" "; 586 587 //if (fullName.length()!=4) 588 // logger.warn("Atom name "+fullName+"to be written in PDB format does not have length 4. Formatting will be incorrect"); 589 590 return fullName; 591 } 592 593 594 /** 595 * Convert this structure to its CIF representation. 596 * @return a String representing this structure as CIF 597 */ 598 public String toMMCIF() { 599 return CifStructureConverter.toText(this.structure); 600 } 601 602 /** 603 * Convert a chain to its CIF representation. 604 * @param chain data 605 * @return a String representing this chain as CIF 606 */ 607 public static String toMMCIF(Chain chain) { 608 return CifStructureConverter.toText(chain); 609 } 610}