001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.align.multiple.util;
022
023import java.io.IOException;
024import java.io.PrintWriter;
025import java.io.StringWriter;
026import java.util.ArrayList;
027import java.util.List;
028import java.util.Locale;
029
030import javax.vecmath.Matrix4d;
031
032import org.biojava.nbio.core.util.PrettyXMLWriter;
033import org.biojava.nbio.structure.Atom;
034import org.biojava.nbio.structure.PdbId;
035import org.biojava.nbio.structure.ResidueRange;
036import org.biojava.nbio.structure.StructureException;
037import org.biojava.nbio.structure.StructureIdentifier;
038import org.biojava.nbio.structure.SubstructureIdentifier;
039import org.biojava.nbio.structure.align.multiple.Block;
040import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
041import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble;
042import org.biojava.nbio.structure.align.xml.MultipleAlignmentXMLConverter;
043
044/**
045 * This class contains functions for the conversion of {@link MultipleAlignment}
046 * to various String outputs.
047 * <p>
048 * Supported formats: FASTA, FatCat, Aligned Residues, Transformation Matrices,
049 * XML, 3D format.
050 *
051 * @author Aleix Lafita
052 * @since 4.1.0
053 *
054 */
055public class MultipleAlignmentWriter {
056
057        /**
058         * Converts the {@link MultipleAlignment} into a multiple sequence alignment
059         * String in FASTA format.
060         *
061         * @param alignment
062         *            MultipleAlignment
063         * @return String multiple sequence alignment in FASTA format
064         * @see MultipleAlignmentTools#getSequenceAlignment(MultipleAlignment)
065         */
066        public static String toFASTA(MultipleAlignment alignment) {
067
068                // Get the alignment sequences
069                List<String> alnSequences = MultipleAlignmentTools
070                                .getSequenceAlignment(alignment);
071
072                String fasta = "";
073                for (int st = 0; st < alignment.size(); st++) {
074                        // Add the structure identifier as the head of the FASTA
075                        fasta += ">" + alignment.getEnsemble().getStructureIdentifiers().get(st).getIdentifier()
076                                        + "\n" + alnSequences.get(st) + "\n";
077                }
078                return fasta;
079        }
080
081        /**
082         * Converts the {@link MultipleAlignment} into a FatCat String format.
083         * Includes summary information about the alignment in the top and a
084         * multiple sequence alignment at the bottom.
085         *
086         * @param alignment
087         *            MultipleAlignment
088         * @return String multiple sequence alignment in FASTA format
089         * @see MultipleAlignmentTools#getSequenceAlignment(MultipleAlignment)
090         */
091        public static String toFatCat(MultipleAlignment alignment) {
092
093                // Initialize the String and put the summary information
094                StringWriter fatcat = new StringWriter();
095                fatcat.append(alignment.toString() + "\n\n");
096
097                // Get the alignment sequences and the mapping
098                List<Integer> mapSeqToStruct = new ArrayList<>();
099                List<String> alnSequences = MultipleAlignmentTools
100                                .getSequenceAlignment(alignment, mapSeqToStruct);
101
102                // Get the String of the Block Numbers for Position
103                String blockNumbers = "";
104                for (int pos = 0; pos < alnSequences.get(0).length(); pos++) {
105                        int blockNr = MultipleAlignmentTools.getBlockForSequencePosition(
106                                        alignment, mapSeqToStruct, pos);
107                        if (blockNr != -1) {
108                                blockNumbers = blockNumbers.concat(String.valueOf(blockNr + 1));
109                        } else
110                                blockNumbers = blockNumbers.concat(" ");
111                }
112
113                // Write the Sequence Alignment
114                for (int str = 0; str < alignment.size(); str++) {
115                        if (str < 9) {
116                                fatcat.append("Chain 0" + (str + 1) + ": "
117                                                + alnSequences.get(str) + "\n");
118                        } else {
119                                fatcat.append("Chain " + (str + 1) + ": "
120                                                + alnSequences.get(str) + "\n");
121                        }
122                        if (str != alignment.size() - 1) {
123                                fatcat.append("          " + blockNumbers + "\n");
124                        }
125                }
126                return fatcat.toString();
127        }
128
129        /**
130         * Converts the alignment to its simplest form: a list of groups of aligned
131         * residues. Format is one line per residue group, tab delimited:
132         * <ul>
133         * <li>PDB number (includes insertion code)
134         * <li>Chain
135         * <li>Amino Acid (three letter code)</li>
136         * </ul>
137         * Example: <code>52    A       ALA     102     A       VAL     154     A       THR</code>
138         * <p>
139         * Note that this format loses information about blocks.
140         *
141         * @param multAln
142         *            MultipleAlignment object
143         * @return a String representation of the aligned residues.
144         */
145        public static String toAlignedResidues(MultipleAlignment multAln) {
146                StringWriter residueGroup = new StringWriter();
147
148                // Write structure names & PDB codes
149                for (int str = 0; str < multAln.size(); str++) {
150                        residueGroup.append("#Struct" + (str + 1) + ":\t");
151                        residueGroup.append(multAln.getEnsemble().getStructureIdentifiers()
152                                        .get(str).getIdentifier());
153                        residueGroup.append("\n");
154                }
155                // Whrite header for columns
156                for (int str = 0; str < multAln.size(); str++)
157                        residueGroup.append("#Num" + (str + 1) + "\tChain" + (str + 1)
158                                        + "\tAA" + (str + 1) + "\t");
159                residueGroup.append("\n");
160
161                // Write optimally aligned pairs
162                for (Block b : multAln.getBlocks()) {
163                        for (int res = 0; res < b.length(); res++) {
164                                for (int str = 0; str < multAln.size(); str++) {
165                                        Integer residue = b.getAlignRes().get(str).get(res);
166                                        if (residue == null) {
167                                                residueGroup.append("-");
168                                                residueGroup.append('\t');
169                                                residueGroup.append("-");
170                                                residueGroup.append('\t');
171                                                residueGroup.append("-");
172                                                residueGroup.append('\t');
173                                        } else {
174                                                Atom atom = multAln.getAtomArrays().get(str)[residue];
175
176                                                residueGroup.append(atom.getGroup().getResidueNumber()
177                                                                .toString());
178                                                residueGroup.append('\t');
179                                                residueGroup.append(atom.getGroup().getChain()
180                                // ABradley - I'm assuming Auth Id's here 04/05/16
181                                                                .getName());
182                                                residueGroup.append('\t');
183                                                residueGroup.append(atom.getGroup().getPDBName());
184                                                residueGroup.append('\t');
185                                        }
186                                }
187                                residueGroup.append('\n');
188                        }
189                }
190                return residueGroup.toString();
191        }
192
193        /**
194         * Converts the transformation Matrices of the alignment into a String
195         * output.
196         *
197         * @param alignment
198         * @return String transformation Matrices
199         */
200        public static String toTransformMatrices(MultipleAlignment alignment) {
201
202                StringBuffer txt = new StringBuffer();
203
204                for (int bs = 0; bs < alignment.getBlockSets().size(); bs++) {
205
206                        List<Matrix4d> btransforms = alignment.getBlockSet(bs)
207                                        .getTransformations();
208                        if (btransforms == null || btransforms.size() < 1)
209                                continue;
210
211                        if (alignment.getBlockSets().size() > 1) {
212                                txt.append("Operations for block ");
213                                txt.append(bs + 1);
214                                txt.append("\n");
215                        }
216
217                        for (int str = 0; str < alignment.size(); str++) {
218                                String origString = "ref";
219
220                                txt.append(String.format(Locale.US, "     X"+(str+1)+ " = (%9.6f)*X"+
221                                                origString +" + (%9.6f)*Y"+
222                                                origString +" + (%9.6f)*Z"+
223                                                origString +" + (%12.6f)",
224                                                btransforms.get(str).getElement(0,0),
225                                                btransforms.get(str).getElement(0,1),
226                                                btransforms.get(str).getElement(0,2),
227                                                btransforms.get(str).getElement(0,3)));
228                                txt.append( "\n");
229                                txt.append(String.format(Locale.US, "     Y"+(str+1)+" = (%9.6f)*X"+
230                                                origString +" + (%9.6f)*Y"+
231                                                origString +" + (%9.6f)*Z"+
232                                                origString +" + (%12.6f)",
233                                                btransforms.get(str).getElement(1,0),
234                                                btransforms.get(str).getElement(1,1),
235                                                btransforms.get(str).getElement(1,2),
236                                                btransforms.get(str).getElement(1,3)));
237                                txt.append( "\n");
238                                txt.append(String.format(Locale.US, "     Z"+(str+1)+" = (%9.6f)*X"+
239                                                origString +" + (%9.6f)*Y"+
240                                                origString +" + (%9.6f)*Z"+
241                                                origString +" + (%12.6f)",
242                                                btransforms.get(str).getElement(2,0),
243                                                btransforms.get(str).getElement(2,1),
244                                                btransforms.get(str).getElement(2,2),
245                                                btransforms.get(str).getElement(2,3)));
246                                txt.append("\n\n");
247                        }
248                }
249                return txt.toString();
250        }
251
252        /**
253         * Converts all the information of a multiple alignment ensemble into an XML
254         * String format. Cached variables, like transformation matrices and scores,
255         * are also converted.
256         *
257         * @param ensemble
258         *            the MultipleAlignmentEnsemble to convert.
259         * @return String XML representation of the ensemble
260         * @throws IOException
261         * @see MultipleAlignmentXMLConverter Helper methods for XML conversion
262         */
263        public static String toXML(MultipleAlignmentEnsemble ensemble)
264                        throws IOException {
265
266                StringWriter result = new StringWriter();
267                PrintWriter writer = new PrintWriter(result);
268                PrettyXMLWriter xml = new PrettyXMLWriter(writer);
269
270                MultipleAlignmentXMLConverter.printXMLensemble(xml, ensemble);
271
272                writer.close();
273
274                return result.toString();
275        }
276
277        /**
278         * Outputs a pairwise alignment in I-TASSER's 3D Format for target-template
279         * alignment. http://zhanglab.ccmb.med.umich.edu/I-TASSER/option4.html
280         *
281         * <p>
282         * The format is closely related to a standard PDB file, but contains only
283         * CA atoms and adds two columns for specifying the alignment:
284         *
285         * <pre>
286         * ATOM   2001  CA  MET     1      41.116 -30.727   6.866  129 THR
287         * ATOM   2002  CA  ALA     2      39.261 -27.408   6.496  130 ARG
288         * ATOM   2003  CA  ALA     3      35.665 -27.370   7.726  131 THR
289         * ATOM   2004  CA  ARG     4      32.662 -25.111   7.172  132 ARG
290         * ATOM   2005  CA  GLY     5      29.121 -25.194   8.602  133 ARG
291         *
292         * Column 1 -30: Atom and Residue records of query sequence.
293         * Column 31-54: Coordinates of atoms in query copied from corresponding atoms in template.
294         * Column 55-59: Corresponding residue number in template based on alignment
295         * Column 60-64: Corresponding residue name in template
296         * </pre>
297         *
298         * <p>
299         * Note that the output is a pairwise alignment. Only the first and second
300         * rows in the MultipleAlignment will be used, others ignored.
301         *
302         * <p>
303         * This method supports topology-independent alignments. The output will
304         * have sequence order matching the query, but include atoms from the
305         * template.
306         *
307         * @param alignment
308         *            A <em>full</em> multiple alignment between proteins
309         * @param queryIndex
310         *            index of the query within the multiple alignment
311         * @param templateIndex
312         *            index of the template within the multiple alignment
313         * @return The file contents as a string
314         * @throws StructureException If an error occurs parsing the alignment's structure names
315         */
316        public static String to3DFormat(MultipleAlignment alignment,
317                        int queryIndex, int templateIndex) throws StructureException {
318                List<Atom[]> atomArrays = alignment.getEnsemble().getAtomArrays();
319                Atom[] queryAtoms = atomArrays.get(queryIndex);
320                Atom[] templateAtoms = atomArrays.get(templateIndex);
321
322                List<Block> blocks = alignment.getBlocks();
323                MultipleAlignmentTools.sortBlocks(blocks, queryIndex);
324
325                StringBuilder str = new StringBuilder();
326
327                // Gather info about the template structure
328                StructureIdentifier tName = alignment.getEnsemble().getStructureIdentifiers()
329                                .get(templateIndex);
330                SubstructureIdentifier canon = tName.toCanonical();
331                PdbId tPdbId = canon.getPdbId();
332                String tChain = null;
333                for(ResidueRange range : canon.getResidueRanges()) {
334                        tChain = range.getChainName();
335                        break;
336                }
337
338                if (tChain == null) {
339                        // Use the chain of the first template block
340                        for (Integer i : blocks.get(0).getAlignRes().get(templateIndex)) {
341                                if (i != null) {
342                                        tChain = templateAtoms[i].getGroup().getChainId();
343                                        break;
344                                }
345                        }
346                }
347                str.append(String
348                                .format("REMARK Template name:%s:%s\n", tPdbId, tChain));
349                for (Block block : blocks) {
350                        List<Integer> qAlign = block.getAlignRes().get(queryIndex);
351                        List<Integer> tAlign = block.getAlignRes().get(templateIndex);
352                        for (int i = 0; i < block.length(); i++) {
353                                Integer qRes = qAlign.get(i);
354                                Integer tRes = tAlign.get(i);
355
356                                // skip gaps
357                                if (qRes == null || tRes == null)
358                                        continue;
359
360                                // Get PDB-format ATOM records
361                                String qPDB = queryAtoms[qRes].toPDB();
362                                String tPDB = templateAtoms[tRes].toPDB();
363
364                                // merge the two records into 3D format
365                                str.append(qPDB.substring(0, 30)); // up through coordinates
366                                str.append(tPDB.substring(30, 54)); // coordinates
367                                str.append(tPDB.substring(22, 27)); // residue number
368                                str.append(' ');
369                                str.append(tPDB.substring(17, 20));
370                                str.append('\n');
371                        }
372                }
373                return str.toString();
374        }
375
376}