001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.align.multiple.util;
022
023import java.io.IOException;
024import java.io.PrintWriter;
025import java.io.StringWriter;
026import java.util.ArrayList;
027import java.util.List;
028
029import javax.vecmath.Matrix4d;
030
031import org.biojava.nbio.core.util.PrettyXMLWriter;
032import org.biojava.nbio.structure.Atom;
033import org.biojava.nbio.structure.ResidueRange;
034import org.biojava.nbio.structure.StructureException;
035import org.biojava.nbio.structure.StructureIdentifier;
036import org.biojava.nbio.structure.SubstructureIdentifier;
037import org.biojava.nbio.structure.align.multiple.Block;
038import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
039import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble;
040import org.biojava.nbio.structure.align.xml.MultipleAlignmentXMLConverter;
041
042/**
043 * This class contains functions for the conversion of {@link MultipleAlignment}
044 * to various String outputs.
045 * <p>
046 * Supported formats: FASTA, FatCat, Aligned Residues, Transformation Matrices,
047 * XML, 3D format.
048 *
049 * @author Aleix Lafita
050 * @since 4.1.0
051 *
052 */
053public class MultipleAlignmentWriter {
054
055        /**
056         * Converts the {@link MultipleAlignment} into a multiple sequence alignment
057         * String in FASTA format.
058         *
059         * @param alignment
060         *            MultipleAlignment
061         * @return String multiple sequence alignment in FASTA format
062         * @see MultipleAlignmentTools#getSequenceAlignment(MultipleAlignment)
063         */
064        public static String toFASTA(MultipleAlignment alignment) {
065
066                // Get the alignment sequences
067                List<String> alnSequences = MultipleAlignmentTools
068                                .getSequenceAlignment(alignment);
069
070                String fasta = "";
071                for (int st = 0; st < alignment.size(); st++) {
072                        // Add the structure identifier as the head of the FASTA
073                        fasta += ">" + alignment.getEnsemble().getStructureIdentifiers().get(st).getIdentifier()
074                                        + "\n" + alnSequences.get(st) + "\n";
075                }
076                return fasta;
077        }
078
079        /**
080         * Converts the {@link MultipleAlignment} into a FatCat String format.
081         * Includes summary information about the alignment in the top and a
082         * multiple sequence alignment at the bottom.
083         *
084         * @param alignment
085         *            MultipleAlignment
086         * @return String multiple sequence alignment in FASTA format
087         * @see MultipleAlignmentTools#getSequenceAlignment(MultipleAlignment)
088         */
089        public static String toFatCat(MultipleAlignment alignment) {
090
091                // Initialize the String and put the summary information
092                StringWriter fatcat = new StringWriter();
093                fatcat.append(alignment.toString() + "\n\n");
094
095                // Get the alignment sequences and the mapping
096                List<Integer> mapSeqToStruct = new ArrayList<Integer>();
097                List<String> alnSequences = MultipleAlignmentTools
098                                .getSequenceAlignment(alignment, mapSeqToStruct);
099
100                // Get the String of the Block Numbers for Position
101                String blockNumbers = "";
102                for (int pos = 0; pos < alnSequences.get(0).length(); pos++) {
103                        int blockNr = MultipleAlignmentTools.getBlockForSequencePosition(
104                                        alignment, mapSeqToStruct, pos);
105                        if (blockNr != -1) {
106                                blockNumbers = blockNumbers.concat(String.valueOf(blockNr + 1));
107                        } else
108                                blockNumbers = blockNumbers.concat(" ");
109                }
110
111                // Write the Sequence Alignment
112                for (int str = 0; str < alignment.size(); str++) {
113                        if (str < 9) {
114                                fatcat.append("Chain 0" + (str + 1) + ": "
115                                                + alnSequences.get(str) + "\n");
116                        } else {
117                                fatcat.append("Chain " + (str + 1) + ": "
118                                                + alnSequences.get(str) + "\n");
119                        }
120                        if (str != alignment.size() - 1) {
121                                fatcat.append("          " + blockNumbers + "\n");
122                        }
123                }
124                return fatcat.toString();
125        }
126
127        /**
128         * Converts the alignment to its simplest form: a list of groups of aligned
129         * residues. Format is one line per residue group, tab delimited:
130         * <ul>
131         * <li>PDB number (includes insertion code)
132         * <li>Chain
133         * <li>Amino Acid (three letter code)</li>
134         * </ul>
135         * Example: <code>52    A       ALA     102     A       VAL     154     A       THR</code>
136         * <p>
137         * Note that this format loses information about blocks.
138         *
139         * @param multAln
140         *            MultipleAlignment object
141         * @return a String representation of the aligned residues.
142         */
143        public static String toAlignedResidues(MultipleAlignment multAln) {
144                StringWriter residueGroup = new StringWriter();
145
146                // Write structure names & PDB codes
147                for (int str = 0; str < multAln.size(); str++) {
148                        residueGroup.append("#Struct" + (str + 1) + ":\t");
149                        residueGroup.append(multAln.getEnsemble().getStructureIdentifiers()
150                                        .get(str).getIdentifier());
151                        residueGroup.append("\n");
152                }
153                // Whrite header for columns
154                for (int str = 0; str < multAln.size(); str++)
155                        residueGroup.append("#Num" + (str + 1) + "\tChain" + (str + 1)
156                                        + "\tAA" + (str + 1) + "\t");
157                residueGroup.append("\n");
158
159                // Write optimally aligned pairs
160                for (Block b : multAln.getBlocks()) {
161                        for (int res = 0; res < b.length(); res++) {
162                                for (int str = 0; str < multAln.size(); str++) {
163                                        Integer residue = b.getAlignRes().get(str).get(res);
164                                        if (residue == null) {
165                                                residueGroup.append("-");
166                                                residueGroup.append('\t');
167                                                residueGroup.append("-");
168                                                residueGroup.append('\t');
169                                                residueGroup.append("-");
170                                                residueGroup.append('\t');
171                                        } else {
172                                                Atom atom = multAln.getAtomArrays().get(str)[residue];
173
174                                                residueGroup.append(atom.getGroup().getResidueNumber()
175                                                                .toString());
176                                                residueGroup.append('\t');
177                                                residueGroup.append(atom.getGroup().getChain()
178                                // ABradley - I'm assuming Auth Id's here 04/05/16
179                                                                .getName());
180                                                residueGroup.append('\t');
181                                                residueGroup.append(atom.getGroup().getPDBName());
182                                                residueGroup.append('\t');
183                                        }
184                                }
185                                residueGroup.append('\n');
186                        }
187                }
188                return residueGroup.toString();
189        }
190
191        /**
192         * Converts the transformation Matrices of the alignment into a String
193         * output.
194         *
195         * @param afpChain
196         * @return String transformation Matrices
197         */
198        public static String toTransformMatrices(MultipleAlignment alignment) {
199
200                StringBuffer txt = new StringBuffer();
201
202                for (int bs = 0; bs < alignment.getBlockSets().size(); bs++) {
203
204                        List<Matrix4d> btransforms = alignment.getBlockSet(bs)
205                                        .getTransformations();
206                        if (btransforms == null || btransforms.size() < 1)
207                                continue;
208
209                        if (alignment.getBlockSets().size() > 1) {
210                                txt.append("Operations for block ");
211                                txt.append(bs + 1);
212                                txt.append("\n");
213                        }
214
215                        for (int str = 0; str < alignment.size(); str++) {
216                                String origString = "ref";
217
218                                txt.append(String.format("     X"+(str+1)+ " = (%9.6f)*X"+
219                                                origString +" + (%9.6f)*Y"+
220                                                origString +" + (%9.6f)*Z"+
221                                                origString +" + (%12.6f)",
222                                                btransforms.get(str).getElement(0,0),
223                                                btransforms.get(str).getElement(0,1),
224                                                btransforms.get(str).getElement(0,2),
225                                                btransforms.get(str).getElement(0,3)));
226                                txt.append( "\n");
227                                txt.append(String.format("     Y"+(str+1)+" = (%9.6f)*X"+
228                                                origString +" + (%9.6f)*Y"+
229                                                origString +" + (%9.6f)*Z"+
230                                                origString +" + (%12.6f)",
231                                                btransforms.get(str).getElement(1,0),
232                                                btransforms.get(str).getElement(1,1),
233                                                btransforms.get(str).getElement(1,2),
234                                                btransforms.get(str).getElement(1,3)));
235                                txt.append( "\n");
236                                txt.append(String.format("     Z"+(str+1)+" = (%9.6f)*X"+
237                                                origString +" + (%9.6f)*Y"+
238                                                origString +" + (%9.6f)*Z"+
239                                                origString +" + (%12.6f)",
240                                                btransforms.get(str).getElement(2,0),
241                                                btransforms.get(str).getElement(2,1),
242                                                btransforms.get(str).getElement(2,2),
243                                                btransforms.get(str).getElement(2,3)));
244                                txt.append("\n\n");
245                        }
246                }
247                return txt.toString();
248        }
249
250        /**
251         * Converts all the information of a multiple alignment ensemble into an XML
252         * String format. Cached variables, like transformation matrices and scores,
253         * are also converted.
254         *
255         * @param ensemble
256         *            the MultipleAlignmentEnsemble to convert.
257         * @return String XML representation of the ensemble
258         * @throws IOException
259         * @see MultipleAlignmentXMLConverter Helper methods for XML conversion
260         */
261        public static String toXML(MultipleAlignmentEnsemble ensemble)
262                        throws IOException {
263
264                StringWriter result = new StringWriter();
265                PrintWriter writer = new PrintWriter(result);
266                PrettyXMLWriter xml = new PrettyXMLWriter(writer);
267
268                MultipleAlignmentXMLConverter.printXMLensemble(xml, ensemble);
269
270                writer.close();
271
272                return result.toString();
273        }
274
275        /**
276         * Outputs a pairwise alignment in I-TASSER's 3D Format for target-template
277         * alignment. http://zhanglab.ccmb.med.umich.edu/I-TASSER/option4.html
278         *
279         * <p>
280         * The format is closely related to a standard PDB file, but contains only
281         * CA atoms and adds two columns for specifying the alignment:
282         *
283         * <pre>
284         * ATOM   2001  CA  MET     1      41.116 -30.727   6.866  129 THR
285         * ATOM   2002  CA  ALA     2      39.261 -27.408   6.496  130 ARG
286         * ATOM   2003  CA  ALA     3      35.665 -27.370   7.726  131 THR
287         * ATOM   2004  CA  ARG     4      32.662 -25.111   7.172  132 ARG
288         * ATOM   2005  CA  GLY     5      29.121 -25.194   8.602  133 ARG
289         *
290         * Column 1 -30: Atom & Residue records of query sequence.
291         * Column 31-54: Coordinates of atoms in query copied from corresponding atoms in template.
292         * Column 55-59: Corresponding residue number in template based on alignment
293         * Column 60-64: Corresponding residue name in template
294         * </pre>
295         *
296         * <p>
297         * Note that the output is a pairwise alignment. Only the first and second
298         * rows in the MultipleAlignment will be used, others ignored.
299         *
300         * <p>
301         * This method supports topology-independent alignments. The output will
302         * have sequence order matching the query, but include atoms from the
303         * template.
304         *
305         * @param alignment
306         *            A <em>full</em> multiple alignment between proteins
307         * @param queryIndex
308         *            index of the query within the multiple alignment
309         * @param templateIndex
310         *            index of the template within the multiple alignment
311         * @return The file contents as a string
312         * @throws StructureException If an error occurs parsing the alignment's structure names
313         */
314        public static String to3DFormat(MultipleAlignment alignment,
315                        int queryIndex, int templateIndex) throws StructureException {
316                List<Atom[]> atomArrays = alignment.getEnsemble().getAtomArrays();
317                Atom[] queryAtoms = atomArrays.get(queryIndex);
318                Atom[] templateAtoms = atomArrays.get(templateIndex);
319
320                List<Block> blocks = alignment.getBlocks();
321                MultipleAlignmentTools.sortBlocks(blocks, queryIndex);
322
323                StringBuilder str = new StringBuilder();
324
325                // Gather info about the template structure
326                StructureIdentifier tName = alignment.getEnsemble().getStructureIdentifiers()
327                                .get(templateIndex);
328                SubstructureIdentifier canon = tName.toCanonical();
329                String tPdbId = canon.getPdbId();
330                String tChain = null;
331                for(ResidueRange range : canon.getResidueRanges()) {
332                        tChain = range.getChainName();
333                        break;
334                }
335
336                if (tChain == null) {
337                        // Use the chain of the first template block
338                        for (Integer i : blocks.get(0).getAlignRes().get(templateIndex)) {
339                                if (i != null) {
340                                        tChain = templateAtoms[i].getGroup().getChainId();
341                                        break;
342                                }
343                        }
344                }
345                str.append(String
346                                .format("REMARK Template name:%s:%s\n", tPdbId, tChain));
347                for (Block block : blocks) {
348                        List<Integer> qAlign = block.getAlignRes().get(queryIndex);
349                        List<Integer> tAlign = block.getAlignRes().get(templateIndex);
350                        for (int i = 0; i < block.length(); i++) {
351                                Integer qRes = qAlign.get(i);
352                                Integer tRes = tAlign.get(i);
353
354                                // skip gaps
355                                if (qRes == null || tRes == null)
356                                        continue;
357
358                                // Get PDB-format ATOM records
359                                String qPDB = queryAtoms[qRes].toPDB();
360                                String tPDB = templateAtoms[tRes].toPDB();
361
362                                // merge the two records into 3D format
363                                str.append(qPDB.substring(0, 30)); // up through coordinates
364                                str.append(tPDB.substring(30, 54)); // coordinates
365                                str.append(tPDB.substring(22, 27)); // residue number
366                                str.append(' ');
367                                str.append(tPDB.substring(17, 20));
368                                str.append('\n');
369                        }
370                }
371                return str.toString();
372        }
373
374}