001/*
002 * This code may be freely distributed and modified under the
003 * terms of the GNU Lesser General Public Licence.  This should
004 * be distributed with the code.  If you do not have a copy,
005 * see:
006 *
007 *      http://www.gnu.org/copyleft/lesser.html
008 *
009 * Copyright for this code is held jointly by the individual
010 * authors.  These should be listed in @author doc comments.
011 *
012 * For more information on the BioJava project and its aims,
013 * or to join the biojava-l mailing list, visit the home page
014 * at:
015 *
016 *      http://www.biojava.org/
017 *
018 * Created on 26.04.2004
019 * @author Andreas Prlic
020 *
021 */
022package org.biojava.nbio.structure.io;
023
024import java.io.IOException;
025import java.text.DateFormat;
026import java.text.DecimalFormat;
027import java.text.NumberFormat;
028import java.text.SimpleDateFormat;
029import java.util.List;
030import java.util.Locale;
031
032import org.biojava.nbio.core.util.XMLWriter;
033import org.biojava.nbio.structure.Atom;
034import org.biojava.nbio.structure.Bond;
035import org.biojava.nbio.structure.Chain;
036import org.biojava.nbio.structure.DBRef;
037import org.biojava.nbio.structure.Element;
038import org.biojava.nbio.structure.Group;
039import org.biojava.nbio.structure.GroupType;
040import org.biojava.nbio.structure.PDBHeader;
041import org.biojava.nbio.structure.Site;
042import org.biojava.nbio.structure.Structure;
043import org.biojava.nbio.structure.io.mmcif.MMCIFFileTools;
044import org.biojava.nbio.structure.io.mmcif.SimpleMMcifParser;
045import org.biojava.nbio.structure.io.mmcif.model.AtomSite;
046import org.slf4j.Logger;
047import org.slf4j.LoggerFactory;
048
049
050/**
051 * Methods to convert a structure object into different file formats.
052 * @author Andreas Prlic
053 * @since 1.4
054 */
055public class FileConvert {
056
057        private static final Logger logger = LoggerFactory.getLogger(FileConvert.class);
058
059
060
061        private Structure structure ;
062
063        private boolean printConnections;
064
065        // Locale should be english, e.g. in DE separator is "," -> PDB files have "." !
066        public static DecimalFormat d3 = (DecimalFormat)NumberFormat.getInstance(Locale.US);
067        static {
068                d3.setMaximumIntegerDigits(4);
069                d3.setMinimumFractionDigits(3);
070                d3.setMaximumFractionDigits(3);
071                d3.setGroupingUsed(false);
072        }
073        public static DecimalFormat d2 = (DecimalFormat)NumberFormat.getInstance(Locale.US);
074        static {
075                d2.setMaximumIntegerDigits(3);
076                d2.setMinimumFractionDigits(2);
077                d2.setMaximumFractionDigits(2);
078                d2.setGroupingUsed(false);
079        }
080
081        private static final String newline = System.getProperty("line.separator");
082
083        /**
084         * Constructs a FileConvert object.
085         *
086         * @param struc  a Structure object
087         */
088        public FileConvert(Structure struc) {
089                structure = struc ;
090                printConnections = true;
091        }
092
093        /**
094         * Returns if the Connections should be added
095         * default is true;
096         * @return if the printConnections flag is set
097         */
098        public boolean doPrintConnections() {
099                return printConnections;
100        }
101
102        /** enable/disable printing of connections
103         * connections are sometimes buggy in PDB files
104         * so there are some cases where one might turn this off.
105         * @param printConnections
106         */
107        public void setPrintConnections(boolean printConnections) {
108                this.printConnections = printConnections;
109        }
110
111        /**
112         * Prints the connections in PDB style
113         *
114         * Rewritten since 5.0 to use {@link Bond}s
115         * Will produce strictly one CONECT record per bond (won't group several bonds in one line)
116         */
117        private String printPDBConnections(){
118
119                StringBuilder str = new StringBuilder();
120
121                for (Chain c:structure.getChains()) {
122                        for (Group g:c.getAtomGroups()) {
123                                for (Atom a:g.getAtoms()) {
124                                        if (a.getBonds()!=null) {
125                                                for (Bond b:a.getBonds()) {                             //7890123456789012345678901234567890123456789012345678901234567890
126                                                        str.append(String.format("CONECT%5d%5d                                                                "+newline, b.getAtomA().getPDBserial(), b.getAtomB().getPDBserial()));
127                                                }
128                                        }
129                                }
130                        }
131                }
132
133                return str.toString();
134        }
135
136        /** Convert a structure into a PDB file.
137         * @return a String representing a PDB file.
138         */
139        public String toPDB() {
140
141
142                StringBuffer str = new StringBuffer();
143                //int i = 0 ;
144
145
146
147                // TODO: print all the PDB header informaton in PDB style
148                // some objects (PDBHeader, Compound) are still missing
149                //
150
151                PDBHeader header = structure.getPDBHeader();
152                header.toPDB(str);
153
154
155                //REMARK 800
156                if (!structure.getSites().isEmpty()) {
157                        str.append("REMARK 800                                                                      ").append(newline);
158                        str.append("REMARK 800 SITE                                                                 ").append(newline);
159                        for (Site site : structure.getSites()) {
160                                site.remark800toPDB(str);
161                        }
162                }
163                //DBREF
164                for (DBRef dbref : structure.getDBRefs()){
165                        dbref.toPDB(str);
166                        str.append(newline);
167                }
168                //SSBOND
169                List<SSBondImpl> ssbonds = SSBondImpl.getSsBondListFromBondList(structure.getSSBonds());
170                for (SSBondImpl ssbond : ssbonds){
171                        ssbond.toPDB(str);
172                        str.append(newline);
173                }
174                //SITE
175                for (Site site : structure.getSites()) {
176                        try {
177                                site.toPDB(str);
178                        } catch (Exception e){
179                                e.printStackTrace();
180                        }
181                }
182
183                //
184                // print the atom records
185                //
186
187                // do for all models
188                int nrModels = structure.nrModels() ;
189                if ( structure.isNmr()) {
190                        str.append("EXPDTA    NMR, "+ nrModels+" STRUCTURES"+newline) ;
191                }
192                for (int m = 0 ; m < nrModels ; m++) {
193
194
195                        if ( nrModels>1 ) {
196                                str.append("MODEL      " + (m+1)+ newline);
197                        }
198
199                        List<Chain> polyChains = structure.getPolyChains(m);
200                        List<Chain> nonPolyChains = structure.getNonPolyChains(m);
201                        List<Chain> waterChains = structure.getWaterChains(m);
202
203                        for (Chain chain : polyChains) {
204
205                                // do for all groups
206                                int nrGroups = chain.getAtomLength();
207                                for ( int h=0; h<nrGroups;h++){
208
209                                        Group g= chain.getAtomGroup(h);
210
211                                        toPDB(g,str);
212
213                                }
214                                // End any polymeric chain with a "TER" record
215                                if (nrGroups > 0) str.append(String.format("%-80s","TER")).append(newline);
216
217                        }
218
219                        boolean nonPolyGroupsExist = false;
220                        for (Chain chain : nonPolyChains) {
221
222                                // do for all groups
223                                int nrGroups = chain.getAtomLength();
224                                for ( int h=0; h<nrGroups;h++){
225
226                                        Group g= chain.getAtomGroup(h);
227
228                                        toPDB(g,str);
229
230                                        nonPolyGroupsExist = true;
231                                }
232
233                        }
234                        if (nonPolyGroupsExist) str.append(String.format("%-80s","TER")).append(newline);;
235
236                        boolean waterGroupsExist = false;
237                        for (Chain chain : waterChains) {
238
239                                // do for all groups
240                                int nrGroups = chain.getAtomLength();
241                                for ( int h=0; h<nrGroups;h++){
242
243                                        Group g= chain.getAtomGroup(h);
244
245                                        toPDB(g,str);
246
247                                        waterGroupsExist = true;
248                                }
249
250                        }
251                        if (waterGroupsExist) str.append(String.format("%-80s","TER")).append(newline);;
252
253
254                        if ( nrModels>1) {
255                                str.append(String.format("%-80s","ENDMDL")).append(newline);
256                        }
257
258
259
260                }
261
262                if ( doPrintConnections() )
263                        str.append(printPDBConnections());
264
265                return str.toString() ;
266        }
267
268        private static void toPDB(Group g, StringBuffer str) {
269                // iterate over all atoms ...
270                // format output ...
271                int groupsize  = g.size();
272
273                for ( int atompos = 0 ; atompos < groupsize; atompos++) {
274                        Atom a = null ;
275
276                        a = g.getAtom(atompos);
277                        if ( a == null)
278                                continue ;
279
280                        toPDB(a, str);
281
282
283                        //line = record + serial + " " + fullname +altLoc
284                        //+ leftResName + " " + chainID + resseq
285                        //+ "   " + x+y+z
286                        //+ occupancy + tempfactor;
287                        //str.append(line + newline);
288                        //System.out.println(line);
289                }
290                if ( g.hasAltLoc()){
291                        for (Group alt : g.getAltLocs() ) {
292                                toPDB(alt,str);
293                        }
294                }
295
296        }
297
298        /** Prints the content of an Atom object as a PDB formatted line.
299         *
300         * @param a
301         * @return
302         */
303        public static String toPDB(Atom a){
304                StringBuffer w = new StringBuffer();
305
306                toPDB(a,w);
307
308                return w.toString();
309
310        }
311
312        public static String toPDB(Atom a, String chainId) {
313                StringBuffer w = new StringBuffer();
314
315                toPDB(a,w, chainId);
316
317                return w.toString();
318        }
319
320
321        /**
322         * Convert a Chain object to PDB representation
323         *
324         * @param chain
325         * @return
326         */
327        public static String toPDB(Chain chain){
328                StringBuffer w = new StringBuffer();
329                int nrGroups = chain.getAtomLength();
330
331                for ( int h=0; h<nrGroups;h++){
332
333                        Group g= chain.getAtomGroup(h);
334
335
336                        toPDB(g,w);
337
338
339                }
340
341                return w.toString();
342        }
343
344        /**
345         * Convert a Group object to PDB representation
346         *
347         * @param g
348         * @return
349         */
350        public static String toPDB(Group g){
351                StringBuffer w = new StringBuffer();
352                toPDB(g,w);
353                return w.toString();
354        }
355
356        /**
357         * Print ATOM record in the following syntax
358         * <pre>
359         * ATOM      1  N   ASP A  15     110.964  24.941  59.191  1.00 83.44           N
360         *
361         * COLUMNS        DATA TYPE       FIELD         DEFINITION
362         * ---------------------------------------------------------------------------------
363         * 1 -  6        Record name     "ATOM  "
364         * 7 - 11        Integer         serial        Atom serial number.
365         * 13 - 16        Atom            name          Atom name.
366         * 17             Character       altLoc        Alternate location indicator.
367         * 18 - 20        Residue name    resName       Residue name.
368         * 22             Character       chainID       Chain identifier.
369         * 23 - 26        Integer         resSeq        Residue sequence number.
370         * 27             AChar           iCode         Code for insertion of residues.
371         * 31 - 38        Real(8.3)       x             Orthogonal coordinates for X in
372         * Angstroms.
373         * 39 - 46        Real(8.3)       y             Orthogonal coordinates for Y in
374         * Angstroms.
375         * 47 - 54        Real(8.3)       z             Orthogonal coordinates for Z in
376         * Angstroms.
377         * 55 - 60        Real(6.2)       occupancy     Occupancy.
378         * 61 - 66        Real(6.2)       tempFactor    Temperature factor.
379         * 73 - 76        LString(4)      segID         Segment identifier, left-justified.
380         * 77 - 78        LString(2)      element       Element symbol, right-justified.
381         * 79 - 80        LString(2)      charge        Charge on the atom.
382         * </pre>
383         * @param a
384         * @param str
385         * @param chainID the chain ID that the Atom will have in the output string
386         */
387        public static void toPDB(Atom a, StringBuffer str, String chainID) {
388
389                Group g = a.getGroup();
390
391                GroupType type = g.getType() ;
392
393                String record = "" ;
394                if ( type.equals(GroupType.HETATM) ) {
395                        record = "HETATM";
396                } else {
397                        record = "ATOM  ";
398                }
399
400
401                // format output ...
402                String resName = g.getPDBName();
403                String pdbcode = g.getResidueNumber().toString();
404
405
406                int    seri       = a.getPDBserial()        ;
407                String serial     = String.format("%5d",seri);
408                String fullName   = formatAtomName(a);
409
410                Character  altLoc = a.getAltLoc();
411                if ( altLoc == null)
412                        altLoc = ' ';
413
414                String resseq = "" ;
415                if ( hasInsertionCode(pdbcode) )
416                        resseq     = String.format("%5s",pdbcode);
417                else
418                        resseq     = String.format("%4s",pdbcode)+" ";
419
420                String x          = String.format("%8s",d3.format(a.getX()));
421                String y          = String.format("%8s",d3.format(a.getY()));
422                String z          = String.format("%8s",d3.format(a.getZ()));
423                String occupancy  = String.format("%6s",d2.format(a.getOccupancy())) ;
424                String tempfactor = String.format("%6s",d2.format(a.getTempFactor()));
425
426
427                String leftResName = String.format("%3s",resName);
428
429                StringBuffer s = new StringBuffer();
430                s.append(record);
431                s.append(serial);
432                s.append(" ");
433                s.append(fullName);
434                s.append(altLoc);
435                s.append(leftResName);
436                s.append(" ");
437                s.append(chainID);
438                s.append(resseq);
439                s.append("   ");
440                s.append(x);
441                s.append(y);
442                s.append(z);
443                s.append(occupancy);
444                s.append(tempfactor);
445
446                Element e = a.getElement();
447
448                String eString = e.toString().toUpperCase();
449
450                if ( e.equals(Element.R)) {
451                        eString = "X";
452                }
453                str.append(String.format("%-76s%2s", s.toString(),eString));
454                str.append(newline);
455
456        }
457
458        public static void toPDB(Atom a, StringBuffer str) {
459                toPDB(a,str,a.getGroup().getChain().getName());
460        }
461
462
463        /** test if pdbserial has an insertion code */
464        private static boolean hasInsertionCode(String pdbserial) {
465                try {
466                        Integer.parseInt(pdbserial) ;
467                } catch (NumberFormatException e) {
468                        return true ;
469                }
470                return false ;
471        }
472
473
474        /**
475         * Convert a protein Structure to a DAS Structure XML response .
476         * Since 5.0, bond (CONECT records) information is not supported anymore.
477         * @param xw  a XMLWriter object
478         * @throws IOException ...
479         *
480         */
481        public void toDASStructure(XMLWriter xw)
482                        throws IOException
483        {
484
485                /*xmlns="http://www.sanger.ac.uk/xml/das/2004/06/17/dasalignment.xsd" xmlns:align="http://www.sanger.ac.uk/xml/das/2004/06/17/alignment.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema-instance" xsd:schemaLocation="http://www.sanger.ac.uk/xml/das/2004/06/17/dasalignment.xsd http://www.sanger.ac.uk/xml/das//2004/06/17/dasalignment.xsd"*/
486
487                if ( structure == null){
488                        System.err.println("can not convert structure null");
489                        return;
490                }
491
492                PDBHeader header = structure.getPDBHeader();
493
494                xw.openTag("object");
495                xw.attribute("dbAccessionId",structure.getPDBCode());
496                xw.attribute("intObjectId"  ,structure.getPDBCode());
497                // missing modification date
498                DateFormat dateFormat = new SimpleDateFormat("dd-MMM-yy",Locale.US);
499                String modificationDate = dateFormat.format(header.getModDate());
500                xw.attribute("objectVersion",modificationDate);
501                xw.attribute("type","protein structure");
502                xw.attribute("dbSource","PDB");
503                xw.attribute("dbVersion","20070116");
504                xw.attribute("dbCoordSys","PDBresnum,Protein Structure");
505
506                // do we need object details ???
507                xw.closeTag("object");
508
509
510                // do for all models
511                for (int modelnr = 0;modelnr<structure.nrModels();modelnr++){
512
513                        // do for all chains:
514                        for (int chainnr = 0;chainnr<structure.size(modelnr);chainnr++){
515                                Chain chain = structure.getChainByIndex(modelnr,chainnr);
516                                xw.openTag("chain");
517                                xw.attribute("id",chain.getId());
518                                xw.attribute("SwissprotId",chain.getSwissprotId() );
519                                if (structure.nrModels()>1){
520                                        xw.attribute("model",Integer.toString(modelnr+1));
521                                }
522
523                                //do for all groups:
524                                for (int groupnr =0;
525                                                groupnr<chain.getAtomLength()
526                                                ;groupnr++){
527                                        Group gr = chain.getAtomGroup(groupnr);
528                                        xw.openTag("group");
529                                        xw.attribute("name",gr.getPDBName());
530                                        xw.attribute("type",gr.getType().toString());
531                                        xw.attribute("groupID",gr.getResidueNumber().toString());
532
533
534                                        // do for all atoms:
535                                        //Atom[] atoms  = gr.getAtoms();
536                                        List<Atom> atoms =  gr.getAtoms();
537                                        for (int atomnr=0;atomnr<atoms.size();atomnr++){
538                                                Atom atom = atoms.get(atomnr);
539                                                xw.openTag("atom");
540                                                xw.attribute("atomID",Integer.toString(atom.getPDBserial()));
541                                                xw.attribute("atomName",formatAtomName(atom));
542                                                xw.attribute("x",Double.toString(atom.getX()));
543                                                xw.attribute("y",Double.toString(atom.getY()));
544                                                xw.attribute("z",Double.toString(atom.getZ()));
545                                                xw.closeTag("atom");
546                                        }
547                                        xw.closeTag("group") ;
548                                }
549
550                                xw.closeTag("chain");
551                        }
552                }
553
554
555                if ( doPrintConnections() ) {
556                        // not supported anymore since 5.0
557                }
558        }
559
560        private static String formatAtomName(Atom a) {
561
562                String fullName = null;
563                String name = a.getName();
564                Element element = a.getElement();
565
566                // RULES FOR ATOM NAME PADDING: 4 columns in total: 13, 14, 15, 16
567
568                // if length 4: nothing to do
569                if (name.length()==4)
570                        fullName = name;
571
572                // if length 3: they stay at 14
573                else if (name.length()==3)
574                        fullName = " "+name;
575
576                // for length 2 it depends:
577                //    carbon, oxygens, nitrogens, phosphorous stay at column 14
578                //    elements with 2 letters (e.g. NA, FE) will go to column 13
579                else if (name.length()==2) {
580                        if (element == Element.C || element == Element.N || element == Element.O || element == Element.P || element == Element.S)
581                                fullName = " "+name+" ";
582                        else
583                                fullName = name+"  ";
584                }
585
586                // for length 1 (e.g. K but also C, O) they stay in column 14
587                else if (name.length()==1)
588                        fullName = " "+name+"  ";
589
590                //if (fullName.length()!=4)
591                //      logger.warn("Atom name "+fullName+"to be written in PDB format does not have length 4. Formatting will be incorrect");
592
593                return fullName;
594        }
595
596
597        public String toMMCIF() {
598
599                StringBuilder str = new StringBuilder();
600
601                str.append(SimpleMMcifParser.MMCIF_TOP_HEADER+"BioJava_mmCIF_file"+newline);
602
603                if (structure.getPDBHeader()!=null && structure.getPDBHeader().getCrystallographicInfo()!=null &&
604                                structure.getPDBHeader().getCrystallographicInfo().getSpaceGroup()!=null &&
605                                structure.getPDBHeader().getCrystallographicInfo().getCrystalCell()!=null) {
606
607                        str.append(MMCIFFileTools.toMMCIF("_cell",
608                                        MMCIFFileTools.convertCrystalCellToCell(structure.getPDBHeader().getCrystallographicInfo().getCrystalCell())));
609                        str.append(MMCIFFileTools.toMMCIF("_symmetry",
610                                        MMCIFFileTools.convertSpaceGroupToSymmetry(structure.getPDBHeader().getCrystallographicInfo().getSpaceGroup())));
611
612                }
613
614
615                str.append(getAtomSiteHeader());
616
617                List<AtomSite> list =  MMCIFFileTools.convertStructureToAtomSites(structure);
618
619
620                str.append(MMCIFFileTools.toMMCIF(list,AtomSite.class));
621
622                return str.toString();
623        }
624
625        public static String toMMCIF(Chain chain, String authId, String asymId, boolean writeHeader) {
626                StringBuilder str = new StringBuilder();
627
628                if (writeHeader)
629                        str.append(getAtomSiteHeader());
630
631
632                List<AtomSite> list = MMCIFFileTools.convertChainToAtomSites(chain, 1, authId, asymId);
633
634                str.append(MMCIFFileTools.toMMCIF(list,AtomSite.class));
635                return str.toString();
636        }
637
638        public static String toMMCIF(Chain chain, boolean writeHeader) {
639                StringBuilder sb = new StringBuilder();
640                sb.append(SimpleMMcifParser.MMCIF_TOP_HEADER+"BioJava_mmCIF_file"+newline);
641                sb.append(toMMCIF(chain, chain.getName(), chain.getId(),writeHeader));
642                return sb.toString();
643        }
644
645        public static String getAtomSiteHeader() {
646                String header;
647                try {
648                        header = MMCIFFileTools.toLoopMmCifHeaderString("_atom_site", AtomSite.class.getName());
649
650                } catch (ClassNotFoundException e) {
651                        logger.error("Class not found, will not have a header for this MMCIF category: "+e.getMessage());
652                        header = "";
653                }
654
655                return header;
656        }
657}