001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.align.multiple.util;
022
023import java.io.IOException;
024import java.io.PrintWriter;
025import java.io.StringWriter;
026import java.util.ArrayList;
027import java.util.List;
028
029import javax.vecmath.Matrix4d;
030
031import org.biojava.nbio.core.util.PrettyXMLWriter;
032import org.biojava.nbio.structure.Atom;
033import org.biojava.nbio.structure.ResidueRange;
034import org.biojava.nbio.structure.StructureException;
035import org.biojava.nbio.structure.StructureIdentifier;
036import org.biojava.nbio.structure.SubstructureIdentifier;
037import org.biojava.nbio.structure.align.multiple.Block;
038import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
039import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble;
040import org.biojava.nbio.structure.align.xml.MultipleAlignmentXMLConverter;
041
042/**
043 * This class contains functions for the conversion of {@link MultipleAlignment}
044 * to various String outputs.
045 * <p>
046 * Supported formats: FASTA, FatCat, Aligned Residues, Transformation Matrices,
047 * XML, 3D format.
048 *
049 * @author Aleix Lafita
050 * @since 4.1.0
051 *
052 */
053public class MultipleAlignmentWriter {
054
055        /**
056         * Converts the {@link MultipleAlignment} into a multiple sequence alignment
057         * String in FASTA format.
058         *
059         * @param alignment
060         *            MultipleAlignment
061         * @return String multiple sequence alignment in FASTA format
062         * @see MultipleAlignmentTools#getSequenceAlignment(MultipleAlignment)
063         */
064        public static String toFASTA(MultipleAlignment alignment) {
065
066                // Get the alignment sequences
067                List<String> alnSequences = MultipleAlignmentTools
068                                .getSequenceAlignment(alignment);
069
070                String fasta = "";
071                for (int st = 0; st < alignment.size(); st++) {
072                        // Add the structure identifier as the head of the FASTA
073                        fasta += ">" + alignment.getEnsemble().getStructureIdentifiers().get(st).getIdentifier()
074                                        + "\n" + alnSequences.get(st) + "\n";
075                }
076                return fasta;
077        }
078
079        /**
080         * Converts the {@link MultipleAlignment} into a FatCat String format.
081         * Includes summary information about the alignment in the top and a
082         * multiple sequence alignment at the bottom.
083         *
084         * @param alignment
085         *            MultipleAlignment
086         * @return String multiple sequence alignment in FASTA format
087         * @see MultipleAlignmentTools#getSequenceAlignment(MultipleAlignment)
088         */
089        public static String toFatCat(MultipleAlignment alignment) {
090
091                // Initialize the String and put the summary information
092                StringWriter fatcat = new StringWriter();
093                fatcat.append(alignment.toString() + "\n\n");
094
095                // Get the alignment sequences and the mapping
096                List<Integer> mapSeqToStruct = new ArrayList<Integer>();
097                List<String> alnSequences = MultipleAlignmentTools
098                                .getSequenceAlignment(alignment, mapSeqToStruct);
099
100                // Get the String of the Block Numbers for Position
101                String blockNumbers = "";
102                for (int pos = 0; pos < alnSequences.get(0).length(); pos++) {
103                        int blockNr = MultipleAlignmentTools.getBlockForSequencePosition(
104                                        alignment, mapSeqToStruct, pos);
105                        if (blockNr != -1) {
106                                blockNumbers = blockNumbers.concat("" + (blockNr + 1));
107                        } else
108                                blockNumbers = blockNumbers.concat(" ");
109                }
110
111                // Write the Sequence Alignment
112                for (int str = 0; str < alignment.size(); str++) {
113                        if (str < 9) {
114                                fatcat.append("Chain 0" + (str + 1) + ": "
115                                                + alnSequences.get(str) + "\n");
116                        } else {
117                                fatcat.append("Chain " + (str + 1) + ": "
118                                                + alnSequences.get(str) + "\n");
119                        }
120                        if (str != alignment.size() - 1) {
121                                fatcat.append("          " + blockNumbers + "\n");
122                        }
123                }
124                return fatcat.toString();
125        }
126
127        /**
128         * Converts the alignment to its simplest form: a list of groups of aligned
129         * residues. Format is one line per residue group, tab delimited:
130         * <ul>
131         * <li>PDB number (includes insertion code)
132         * <li>Chain
133         * <li>Amino Acid (three letter code)</li>
134         * </ul>
135         * Example: <code>52    A       ALA     102     A       VAL     154     A       THR</code>
136         * <p>
137         * Note that this format loses information about blocks.
138         *
139         * @param multAln
140         *            MultipleAlignment object
141         * @return a String representation of the aligned residues.
142         */
143        public static String toAlignedResidues(MultipleAlignment multAln) {
144                StringWriter residueGroup = new StringWriter();
145
146                // Write structure names & PDB codes
147                for (int str = 0; str < multAln.size(); str++) {
148                        residueGroup.append("#Struct" + (str + 1) + ":\t");
149                        residueGroup.append(multAln.getEnsemble().getStructureIdentifiers()
150                                        .get(str).getIdentifier());
151                        residueGroup.append("\n");
152                }
153                // Whrite header for columns
154                for (int str = 0; str < multAln.size(); str++)
155                        residueGroup.append("#Num" + (str + 1) + "\tChain" + (str + 1)
156                                        + "\tAA" + (str + 1) + "\t");
157                residueGroup.append("\n");
158
159                // Write optimally aligned pairs
160                for (Block b : multAln.getBlocks()) {
161                        for (int res = 0; res < b.length(); res++) {
162                                for (int str = 0; str < multAln.size(); str++) {
163                                        Integer residue = b.getAlignRes().get(str).get(res);
164                                        if (residue == null) {
165                                                residueGroup.append("-");
166                                                residueGroup.append('\t');
167                                                residueGroup.append("-");
168                                                residueGroup.append('\t');
169                                                residueGroup.append("-");
170                                                residueGroup.append('\t');
171                                        } else {
172                                                Atom atom = multAln.getAtomArrays().get(str)[residue];
173
174                                                residueGroup.append(atom.getGroup().getResidueNumber()
175                                                                .toString());
176                                                residueGroup.append('\t');
177                                                residueGroup.append(atom.getGroup().getChain()
178                                                                .getChainID());
179                                                residueGroup.append('\t');
180                                                residueGroup.append(atom.getGroup().getPDBName());
181                                                residueGroup.append('\t');
182                                        }
183                                }
184                                residueGroup.append('\n');
185                        }
186                }
187                return residueGroup.toString();
188        }
189
190        /**
191         * Converts the transformation Matrices of the alignment into a String
192         * output.
193         *
194         * @param afpChain
195         * @return String transformation Matrices
196         */
197        public static String toTransformMatrices(MultipleAlignment alignment) {
198
199                StringBuffer txt = new StringBuffer();
200
201                for (int bs = 0; bs < alignment.getBlockSets().size(); bs++) {
202
203                        List<Matrix4d> btransforms = alignment.getBlockSet(bs)
204                                        .getTransformations();
205                        if (btransforms == null || btransforms.size() < 1)
206                                continue;
207
208                        if (alignment.getBlockSets().size() > 1) {
209                                txt.append("Operations for block ");
210                                txt.append(bs + 1);
211                                txt.append("\n");
212                        }
213
214                        for (int str = 0; str < alignment.size(); str++) {
215                                String origString = "ref";
216
217                                txt.append(String.format("     X"+(str+1)+ " = (%9.6f)*X"+
218                                                origString +" + (%9.6f)*Y"+
219                                                origString +" + (%9.6f)*Z"+
220                                                origString +" + (%12.6f)",
221                                                btransforms.get(str).getElement(0,0),
222                                                btransforms.get(str).getElement(0,1),
223                                                btransforms.get(str).getElement(0,2),
224                                                btransforms.get(str).getElement(0,3)));
225                                txt.append( "\n");
226                                txt.append(String.format("     Y"+(str+1)+" = (%9.6f)*X"+
227                                                origString +" + (%9.6f)*Y"+
228                                                origString +" + (%9.6f)*Z"+
229                                                origString +" + (%12.6f)",
230                                                btransforms.get(str).getElement(1,0),
231                                                btransforms.get(str).getElement(1,1),
232                                                btransforms.get(str).getElement(1,2),
233                                                btransforms.get(str).getElement(1,3)));
234                                txt.append( "\n");
235                                txt.append(String.format("     Z"+(str+1)+" = (%9.6f)*X"+
236                                                origString +" + (%9.6f)*Y"+
237                                                origString +" + (%9.6f)*Z"+
238                                                origString +" + (%12.6f)",
239                                                btransforms.get(str).getElement(2,0),
240                                                btransforms.get(str).getElement(2,1),
241                                                btransforms.get(str).getElement(2,2),
242                                                btransforms.get(str).getElement(2,3)));
243                                txt.append("\n\n");
244                        }
245                }
246                return txt.toString();
247        }
248
249        /**
250         * Converts all the information of a multiple alignment ensemble into an XML
251         * String format. Cached variables, like transformation matrices and scores,
252         * are also converted.
253         *
254         * @param ensemble
255         *            the MultipleAlignmentEnsemble to convert.
256         * @return String XML representation of the ensemble
257         * @throws IOException
258         * @see MultipleAlignmentXMLConverter Helper methods for XML conversion
259         */
260        public static String toXML(MultipleAlignmentEnsemble ensemble)
261                        throws IOException {
262
263                StringWriter result = new StringWriter();
264                PrintWriter writer = new PrintWriter(result);
265                PrettyXMLWriter xml = new PrettyXMLWriter(writer);
266
267                MultipleAlignmentXMLConverter.printXMLensemble(xml, ensemble);
268
269                writer.close();
270
271                return result.toString();
272        }
273
274        /**
275         * Outputs a pairwise alignment in I-TASSER's 3D Format for target-template
276         * alignment. http://zhanglab.ccmb.med.umich.edu/I-TASSER/option4.html
277         *
278         * <p>
279         * The format is closely related to a standard PDB file, but contains only
280         * CA atoms and adds two columns for specifying the alignment:
281         *
282         * <pre>
283         * ATOM   2001  CA  MET     1      41.116 -30.727   6.866  129 THR
284         * ATOM   2002  CA  ALA     2      39.261 -27.408   6.496  130 ARG
285         * ATOM   2003  CA  ALA     3      35.665 -27.370   7.726  131 THR
286         * ATOM   2004  CA  ARG     4      32.662 -25.111   7.172  132 ARG
287         * ATOM   2005  CA  GLY     5      29.121 -25.194   8.602  133 ARG
288         *
289         * Column 1 -30: Atom & Residue records of query sequence.
290         * Column 31-54: Coordinates of atoms in query copied from corresponding atoms in template.
291         * Column 55-59: Corresponding residue number in template based on alignment
292         * Column 60-64: Corresponding residue name in template
293         * </pre>
294         *
295         * <p>
296         * Note that the output is a pairwise alignment. Only the first and second
297         * rows in the MultipleAlignment will be used, others ignored.
298         *
299         * <p>
300         * This method supports topology-independent alignments. The output will
301         * have sequence order matching the query, but include atoms from the
302         * template.
303         *
304         * @param alignment
305         *            A <em>full</em> multiple alignment between proteins
306         * @param queryIndex
307         *            index of the query within the multiple alignment
308         * @param templateIndex
309         *            index of the template within the multiple alignment
310         * @return The file contents as a string
311         * @throws StructureException If an error occurs parsing the alignment's structure names
312         */
313        public static String to3DFormat(MultipleAlignment alignment,
314                        int queryIndex, int templateIndex) throws StructureException {
315                List<Atom[]> atomArrays = alignment.getEnsemble().getAtomArrays();
316                Atom[] queryAtoms = atomArrays.get(queryIndex);
317                Atom[] templateAtoms = atomArrays.get(templateIndex);
318
319                List<Block> blocks = alignment.getBlocks();
320                MultipleAlignmentTools.sortBlocks(blocks, queryIndex);
321
322                StringBuilder str = new StringBuilder();
323
324                // Gather info about the template structure
325                StructureIdentifier tName = alignment.getEnsemble().getStructureIdentifiers()
326                                .get(templateIndex);
327                SubstructureIdentifier canon = tName.toCanonical();
328                String tPdbId = canon.getPdbId();
329                String tChain = null;
330                for(ResidueRange range : canon.getResidueRanges()) {
331                        tChain = range.getChainId();
332                        break;
333                }
334
335                if (tChain == null) {
336                        // Use the chain of the first template block
337                        for (Integer i : blocks.get(0).getAlignRes().get(templateIndex)) {
338                                if (i != null) {
339                                        tChain = templateAtoms[i].getGroup().getChainId();
340                                        break;
341                                }
342                        }
343                }
344                str.append(String
345                                .format("REMARK Template name:%s:%s\n", tPdbId, tChain));
346                for (Block block : blocks) {
347                        List<Integer> qAlign = block.getAlignRes().get(queryIndex);
348                        List<Integer> tAlign = block.getAlignRes().get(templateIndex);
349                        for (int i = 0; i < block.length(); i++) {
350                                Integer qRes = qAlign.get(i);
351                                Integer tRes = tAlign.get(i);
352
353                                // skip gaps
354                                if (qRes == null || tRes == null)
355                                        continue;
356
357                                // Get PDB-format ATOM records
358                                String qPDB = queryAtoms[qRes].toPDB();
359                                String tPDB = templateAtoms[tRes].toPDB();
360
361                                // merge the two records into 3D format
362                                str.append(qPDB.substring(0, 30)); // up through coordinates
363                                str.append(tPDB.substring(30, 54)); // coordinates
364                                str.append(tPDB.substring(22, 27)); // residue number
365                                str.append(' ');
366                                str.append(tPDB.substring(17, 20));
367                                str.append('\n');
368                        }
369                }
370                return str.toString();
371        }
372
373}