001/*
002 * This code may be freely distributed and modified under the
003 * terms of the GNU Lesser General Public Licence.  This should
004 * be distributed with the code.  If you do not have a copy,
005 * see:
006 *
007 *      http://www.gnu.org/copyleft/lesser.html
008 *
009 * Copyright for this code is held jointly by the individual
010 * authors.  These should be listed in @author doc comments.
011 *
012 * For more information on the BioJava project and its aims,
013 * or to join the biojava-l mailing list, visit the home page
014 * at:
015 *
016 *      http://www.biojava.org/
017 *
018 * Created on 26.04.2004
019 * @author Andreas Prlic
020 *
021 */
022package org.biojava.nbio.structure.io;
023
024import java.io.IOException;
025import java.text.DateFormat;
026import java.text.DecimalFormat;
027import java.text.NumberFormat;
028import java.text.SimpleDateFormat;
029import java.util.List;
030import java.util.Locale;
031
032import org.biojava.nbio.core.util.XMLWriter;
033import org.biojava.nbio.structure.Atom;
034import org.biojava.nbio.structure.Bond;
035import org.biojava.nbio.structure.Chain;
036import org.biojava.nbio.structure.DBRef;
037import org.biojava.nbio.structure.Element;
038import org.biojava.nbio.structure.Group;
039import org.biojava.nbio.structure.GroupType;
040import org.biojava.nbio.structure.PDBHeader;
041import org.biojava.nbio.structure.Site;
042import org.biojava.nbio.structure.Structure;
043import org.biojava.nbio.structure.io.cif.CifStructureConverter;
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047
048/**
049 * Methods to convert a structure object into different file formats.
050 * @author Andreas Prlic
051 * @since 1.4
052 */
053public class FileConvert {
054
055        private static final Logger logger = LoggerFactory.getLogger(FileConvert.class);
056
057
058
059        private Structure structure ;
060
061        private boolean printConnections;
062
063        // Locale should be english, e.g. in DE separator is "," -> PDB files have "." !
064        public static DecimalFormat d3 = (DecimalFormat)NumberFormat.getInstance(Locale.US);
065        static {
066                d3.setMaximumIntegerDigits(4);
067                d3.setMinimumFractionDigits(3);
068                d3.setMaximumFractionDigits(3);
069                d3.setGroupingUsed(false);
070        }
071        public static DecimalFormat d2 = (DecimalFormat)NumberFormat.getInstance(Locale.US);
072        static {
073                d2.setMaximumIntegerDigits(3);
074                d2.setMinimumFractionDigits(2);
075                d2.setMaximumFractionDigits(2);
076                d2.setGroupingUsed(false);
077        }
078
079        private static final String newline = System.getProperty("line.separator");
080
081        /**
082         * Constructs a FileConvert object.
083         *
084         * @param struc  a Structure object
085         */
086        public FileConvert(Structure struc) {
087                structure = struc ;
088                printConnections = true;
089        }
090
091        /**
092         * Returns if the Connections should be added
093         * default is true;
094         * @return if the printConnections flag is set
095         */
096        public boolean doPrintConnections() {
097                return printConnections;
098        }
099
100        /** enable/disable printing of connections
101         * connections are sometimes buggy in PDB files
102         * so there are some cases where one might turn this off.
103         * @param printConnections
104         */
105        public void setPrintConnections(boolean printConnections) {
106                this.printConnections = printConnections;
107        }
108
109        /**
110         * Prints the connections in PDB style
111         *
112         * Rewritten since 5.0 to use {@link Bond}s
113         * Will produce strictly one CONECT record per bond (won't group several bonds in one line)
114         */
115        private String printPDBConnections(){
116
117                StringBuilder str = new StringBuilder();
118
119                for (Chain c:structure.getChains()) {
120                        for (Group g:c.getAtomGroups()) {
121                                for (Atom a:g.getAtoms()) {
122                                        if (a.getBonds()!=null) {
123                                                for (Bond b:a.getBonds()) {                             //7890123456789012345678901234567890123456789012345678901234567890
124                                                        str.append(String.format("CONECT%5d%5d                                                                "+newline, b.getAtomA().getPDBserial(), b.getAtomB().getPDBserial()));
125                                                }
126                                        }
127                                }
128                        }
129                }
130
131                return str.toString();
132        }
133
134        /** Convert a structure into a PDB file.
135         * @return a String representing a PDB file.
136         */
137        public String toPDB() {
138
139
140                StringBuffer str = new StringBuffer();
141                //int i = 0 ;
142
143
144
145                // TODO: print all the PDB header informaton in PDB style
146                // some objects (PDBHeader, Compound) are still missing
147                //
148
149                PDBHeader header = structure.getPDBHeader();
150                header.toPDB(str);
151
152
153                //REMARK 800
154                if (!structure.getSites().isEmpty()) {
155                        str.append("REMARK 800                                                                      ").append(newline);
156                        str.append("REMARK 800 SITE                                                                 ").append(newline);
157                        for (Site site : structure.getSites()) {
158                                site.remark800toPDB(str);
159                        }
160                }
161                //DBREF
162                for (DBRef dbref : structure.getDBRefs()){
163                        dbref.toPDB(str);
164                        str.append(newline);
165                }
166                //SSBOND
167                List<SSBondImpl> ssbonds = SSBondImpl.getSsBondListFromBondList(structure.getSSBonds());
168                for (SSBondImpl ssbond : ssbonds){
169                        ssbond.toPDB(str);
170                        str.append(newline);
171                }
172                //SITE
173                for (Site site : structure.getSites()) {
174                        try {
175                                site.toPDB(str);
176                        } catch (Exception e){
177                                e.printStackTrace();
178                        }
179                }
180
181                //
182                // print the atom records
183                //
184
185                // do for all models
186                int nrModels = structure.nrModels() ;
187                if ( structure.isNmr()) {
188                        str.append("EXPDTA    NMR, "+ nrModels+" STRUCTURES"+newline) ;
189                }
190                for (int m = 0 ; m < nrModels ; m++) {
191
192
193                        if ( nrModels>1 ) {
194                                str.append("MODEL      " + (m+1)+ newline);
195                        }
196
197                        List<Chain> polyChains = structure.getPolyChains(m);
198                        List<Chain> nonPolyChains = structure.getNonPolyChains(m);
199                        List<Chain> waterChains = structure.getWaterChains(m);
200
201                        for (Chain chain : polyChains) {
202
203                                // do for all groups
204                                int nrGroups = chain.getAtomLength();
205                                for ( int h=0; h<nrGroups;h++){
206
207                                        Group g= chain.getAtomGroup(h);
208
209                                        toPDB(g,str);
210
211                                }
212                                // End any polymeric chain with a "TER" record
213                                if (nrGroups > 0) str.append(String.format("%-80s","TER")).append(newline);
214
215                        }
216
217                        boolean nonPolyGroupsExist = false;
218                        for (Chain chain : nonPolyChains) {
219
220                                // do for all groups
221                                int nrGroups = chain.getAtomLength();
222                                for ( int h=0; h<nrGroups;h++){
223
224                                        Group g= chain.getAtomGroup(h);
225
226                                        toPDB(g,str);
227
228                                        nonPolyGroupsExist = true;
229                                }
230
231                        }
232                        if (nonPolyGroupsExist) str.append(String.format("%-80s","TER")).append(newline);;
233
234                        boolean waterGroupsExist = false;
235                        for (Chain chain : waterChains) {
236
237                                // do for all groups
238                                int nrGroups = chain.getAtomLength();
239                                for ( int h=0; h<nrGroups;h++){
240
241                                        Group g= chain.getAtomGroup(h);
242
243                                        toPDB(g,str);
244
245                                        waterGroupsExist = true;
246                                }
247
248                        }
249                        if (waterGroupsExist) str.append(String.format("%-80s","TER")).append(newline);;
250
251
252                        if ( nrModels>1) {
253                                str.append(String.format("%-80s","ENDMDL")).append(newline);
254                        }
255
256
257
258                }
259
260                if ( doPrintConnections() )
261                        str.append(printPDBConnections());
262
263                return str.toString() ;
264        }
265
266        private static void toPDB(Group g, StringBuffer str) {
267                // iterate over all atoms ...
268                // format output ...
269                int groupsize  = g.size();
270
271                for ( int atompos = 0 ; atompos < groupsize; atompos++) {
272                        Atom a = null ;
273
274                        a = g.getAtom(atompos);
275                        if ( a == null)
276                                continue ;
277
278                        toPDB(a, str);
279
280
281                        //line = record + serial + " " + fullname +altLoc
282                        //+ leftResName + " " + chainID + resseq
283                        //+ "   " + x+y+z
284                        //+ occupancy + tempfactor;
285                        //str.append(line + newline);
286                        //System.out.println(line);
287                }
288                if ( g.hasAltLoc()){
289                        for (Group alt : g.getAltLocs() ) {
290                                toPDB(alt,str);
291                        }
292                }
293
294        }
295
296        /** Prints the content of an Atom object as a PDB formatted line.
297         *
298         * @param a
299         * @return
300         */
301        public static String toPDB(Atom a){
302                StringBuffer w = new StringBuffer();
303
304                toPDB(a,w);
305
306                return w.toString();
307
308        }
309
310        public static String toPDB(Atom a, String chainId) {
311                StringBuffer w = new StringBuffer();
312
313                toPDB(a,w, chainId);
314
315                return w.toString();
316        }
317
318
319        /**
320         * Convert a Chain object to PDB representation
321         *
322         * @param chain
323         * @return
324         */
325        public static String toPDB(Chain chain){
326                StringBuffer w = new StringBuffer();
327                int nrGroups = chain.getAtomLength();
328
329                for ( int h=0; h<nrGroups;h++){
330
331                        Group g= chain.getAtomGroup(h);
332
333
334                        toPDB(g,w);
335
336
337                }
338
339                return w.toString();
340        }
341
342        /**
343         * Convert a Group object to PDB representation
344         *
345         * @param g
346         * @return
347         */
348        public static String toPDB(Group g){
349                StringBuffer w = new StringBuffer();
350                toPDB(g,w);
351                return w.toString();
352        }
353
354        /**
355         * Print ATOM record in the following syntax
356         * <pre>
357         * ATOM      1  N   ASP A  15     110.964  24.941  59.191  1.00 83.44           N
358         *
359         * COLUMNS        DATA TYPE       FIELD         DEFINITION
360         * ---------------------------------------------------------------------------------
361         * 1 -  6        Record name     "ATOM  "
362         * 7 - 11        Integer         serial        Atom serial number.
363         * 13 - 16        Atom            name          Atom name.
364         * 17             Character       altLoc        Alternate location indicator.
365         * 18 - 20        Residue name    resName       Residue name.
366         * 22             Character       chainID       Chain identifier.
367         * 23 - 26        Integer         resSeq        Residue sequence number.
368         * 27             AChar           iCode         Code for insertion of residues.
369         * 31 - 38        Real(8.3)       x             Orthogonal coordinates for X in
370         * Angstroms.
371         * 39 - 46        Real(8.3)       y             Orthogonal coordinates for Y in
372         * Angstroms.
373         * 47 - 54        Real(8.3)       z             Orthogonal coordinates for Z in
374         * Angstroms.
375         * 55 - 60        Real(6.2)       occupancy     Occupancy.
376         * 61 - 66        Real(6.2)       tempFactor    Temperature factor.
377         * 73 - 76        LString(4)      segID         Segment identifier, left-justified.
378         * 77 - 78        LString(2)      element       Element symbol, right-justified.
379         * 79 - 80        LString(2)      charge        Charge on the atom.
380         * </pre>
381         * @param a
382         * @param str
383         * @param chainID the chain ID that the Atom will have in the output string
384         */
385        public static void toPDB(Atom a, StringBuffer str, String chainID) {
386
387                Group g = a.getGroup();
388
389                GroupType type = g.getType() ;
390
391                String record = "" ;
392                if ( type.equals(GroupType.HETATM) ) {
393                        record = "HETATM";
394                } else {
395                        record = "ATOM  ";
396                }
397
398
399                // format output ...
400                String resName = g.getPDBName();
401                String pdbcode = g.getResidueNumber().toString();
402
403
404                int    seri       = a.getPDBserial()        ;
405                String serial     = String.format("%5d",seri);
406                String fullName   = formatAtomName(a);
407
408                Character  altLoc = a.getAltLoc();
409                if ( altLoc == null)
410                        altLoc = ' ';
411
412                String resseq = "" ;
413                if ( hasInsertionCode(pdbcode) )
414                        resseq     = String.format("%5s",pdbcode);
415                else
416                        resseq     = String.format("%4s",pdbcode)+" ";
417
418                String x          = String.format("%8s",d3.format(a.getX()));
419                String y          = String.format("%8s",d3.format(a.getY()));
420                String z          = String.format("%8s",d3.format(a.getZ()));
421                String occupancy  = String.format("%6s",d2.format(a.getOccupancy())) ;
422                String tempfactor = String.format("%6s",d2.format(a.getTempFactor()));
423
424
425                String leftResName = String.format("%3s",resName);
426
427                StringBuffer s = new StringBuffer();
428                s.append(record);
429                s.append(serial);
430                s.append(" ");
431                s.append(fullName);
432                s.append(altLoc);
433                s.append(leftResName);
434                s.append(" ");
435                s.append(chainID);
436                s.append(resseq);
437                s.append("   ");
438                s.append(x);
439                s.append(y);
440                s.append(z);
441                s.append(occupancy);
442                s.append(tempfactor);
443
444                Element e = a.getElement();
445
446                String eString = e.toString().toUpperCase();
447
448                if ( e.equals(Element.R)) {
449                        eString = "X";
450                }
451                str.append(String.format("%-76s%2s", s.toString(),eString));
452                str.append(newline);
453
454        }
455
456        public static void toPDB(Atom a, StringBuffer str) {
457                toPDB(a,str,a.getGroup().getChain().getName());
458        }
459
460
461        /** test if pdbserial has an insertion code */
462        private static boolean hasInsertionCode(String pdbserial) {
463                try {
464                        Integer.parseInt(pdbserial) ;
465                } catch (NumberFormatException e) {
466                        return true ;
467                }
468                return false ;
469        }
470
471
472        /**
473         * Convert a protein Structure to a DAS Structure XML response .
474         * Since 5.0, bond (CONECT records) information is not supported anymore.
475         * @param xw  a XMLWriter object
476         * @throws IOException ...
477         *
478         */
479        public void toDASStructure(XMLWriter xw)
480                        throws IOException
481        {
482
483                /*xmlns="http://www.sanger.ac.uk/xml/das/2004/06/17/dasalignment.xsd" xmlns:align="http://www.sanger.ac.uk/xml/das/2004/06/17/alignment.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema-instance" xsd:schemaLocation="http://www.sanger.ac.uk/xml/das/2004/06/17/dasalignment.xsd http://www.sanger.ac.uk/xml/das//2004/06/17/dasalignment.xsd"*/
484
485                if ( structure == null){
486                        System.err.println("can not convert structure null");
487                        return;
488                }
489
490                PDBHeader header = structure.getPDBHeader();
491
492                xw.openTag("object");
493                xw.attribute("dbAccessionId",structure.getPDBCode());
494                xw.attribute("intObjectId"  ,structure.getPDBCode());
495                // missing modification date
496                DateFormat dateFormat = new SimpleDateFormat("dd-MMM-yy",Locale.US);
497                String modificationDate = dateFormat.format(header.getModDate());
498                xw.attribute("objectVersion",modificationDate);
499                xw.attribute("type","protein structure");
500                xw.attribute("dbSource","PDB");
501                xw.attribute("dbVersion","20070116");
502                xw.attribute("dbCoordSys","PDBresnum,Protein Structure");
503
504                // do we need object details ???
505                xw.closeTag("object");
506
507
508                // do for all models
509                for (int modelnr = 0;modelnr<structure.nrModels();modelnr++){
510
511                        // do for all chains:
512                        for (int chainnr = 0;chainnr<structure.size(modelnr);chainnr++){
513                                Chain chain = structure.getChainByIndex(modelnr,chainnr);
514                                xw.openTag("chain");
515                                xw.attribute("id",chain.getId());
516                                if (structure.nrModels()>1){
517                                        xw.attribute("model",Integer.toString(modelnr+1));
518                                }
519
520                                //do for all groups:
521                                for (int groupnr =0;
522                                                groupnr<chain.getAtomLength()
523                                                ;groupnr++){
524                                        Group gr = chain.getAtomGroup(groupnr);
525                                        xw.openTag("group");
526                                        xw.attribute("name",gr.getPDBName());
527                                        xw.attribute("type",gr.getType().toString());
528                                        xw.attribute("groupID",gr.getResidueNumber().toString());
529
530
531                                        // do for all atoms:
532                                        //Atom[] atoms  = gr.getAtoms();
533                                        List<Atom> atoms =  gr.getAtoms();
534                                        for (int atomnr=0;atomnr<atoms.size();atomnr++){
535                                                Atom atom = atoms.get(atomnr);
536                                                xw.openTag("atom");
537                                                xw.attribute("atomID",Integer.toString(atom.getPDBserial()));
538                                                xw.attribute("atomName",formatAtomName(atom));
539                                                xw.attribute("x",Double.toString(atom.getX()));
540                                                xw.attribute("y",Double.toString(atom.getY()));
541                                                xw.attribute("z",Double.toString(atom.getZ()));
542                                                xw.closeTag("atom");
543                                        }
544                                        xw.closeTag("group") ;
545                                }
546
547                                xw.closeTag("chain");
548                        }
549                }
550
551
552                if ( doPrintConnections() ) {
553                        // not supported anymore since 5.0
554                }
555        }
556
557        private static String formatAtomName(Atom a) {
558
559                String fullName = null;
560                String name = a.getName();
561                Element element = a.getElement();
562
563                // RULES FOR ATOM NAME PADDING: 4 columns in total: 13, 14, 15, 16
564
565                // if length 4: nothing to do
566                if (name.length()==4)
567                        fullName = name;
568
569                // if length 3: they stay at 14
570                else if (name.length()==3)
571                        fullName = " "+name;
572
573                // for length 2 it depends:
574                //    carbon, oxygens, nitrogens, phosphorous stay at column 14
575                //    elements with 2 letters (e.g. NA, FE) will go to column 13
576                else if (name.length()==2) {
577                        if (element == Element.C || element == Element.N || element == Element.O || element == Element.P || element == Element.S)
578                                fullName = " "+name+" ";
579                        else
580                                fullName = name+"  ";
581                }
582
583                // for length 1 (e.g. K but also C, O) they stay in column 14
584                else if (name.length()==1)
585                        fullName = " "+name+"  ";
586
587                //if (fullName.length()!=4)
588                //      logger.warn("Atom name "+fullName+"to be written in PDB format does not have length 4. Formatting will be incorrect");
589
590                return fullName;
591        }
592
593
594        /**
595         * Convert this structure to its CIF representation.
596         * @return a String representing this structure as CIF
597         */
598        public String toMMCIF() {
599                return CifStructureConverter.toText(this.structure);
600        }
601
602        /**
603         * Convert a chain to its CIF representation.
604         * @param chain data
605         * @return a String representing this chain as CIF
606         */
607        public static String toMMCIF(Chain chain) {
608                return CifStructureConverter.toText(chain);
609        }
610}