001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.multiple.util; 022 023import java.io.IOException; 024import java.io.PrintWriter; 025import java.io.StringWriter; 026import java.util.ArrayList; 027import java.util.List; 028 029import javax.vecmath.Matrix4d; 030 031import org.biojava.nbio.core.util.PrettyXMLWriter; 032import org.biojava.nbio.structure.Atom; 033import org.biojava.nbio.structure.ResidueRange; 034import org.biojava.nbio.structure.StructureException; 035import org.biojava.nbio.structure.StructureIdentifier; 036import org.biojava.nbio.structure.SubstructureIdentifier; 037import org.biojava.nbio.structure.align.multiple.Block; 038import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 039import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble; 040import org.biojava.nbio.structure.align.xml.MultipleAlignmentXMLConverter; 041 042/** 043 * This class contains functions for the conversion of {@link MultipleAlignment} 044 * to various String outputs. 045 * <p> 046 * Supported formats: FASTA, FatCat, Aligned Residues, Transformation Matrices, 047 * XML, 3D format. 048 * 049 * @author Aleix Lafita 050 * @since 4.1.0 051 * 052 */ 053public class MultipleAlignmentWriter { 054 055 /** 056 * Converts the {@link MultipleAlignment} into a multiple sequence alignment 057 * String in FASTA format. 058 * 059 * @param alignment 060 * MultipleAlignment 061 * @return String multiple sequence alignment in FASTA format 062 * @see MultipleAlignmentTools#getSequenceAlignment(MultipleAlignment) 063 */ 064 public static String toFASTA(MultipleAlignment alignment) { 065 066 // Get the alignment sequences 067 List<String> alnSequences = MultipleAlignmentTools 068 .getSequenceAlignment(alignment); 069 070 String fasta = ""; 071 for (int st = 0; st < alignment.size(); st++) { 072 // Add the structure identifier as the head of the FASTA 073 fasta += ">" + alignment.getEnsemble().getStructureIdentifiers().get(st).getIdentifier() 074 + "\n" + alnSequences.get(st) + "\n"; 075 } 076 return fasta; 077 } 078 079 /** 080 * Converts the {@link MultipleAlignment} into a FatCat String format. 081 * Includes summary information about the alignment in the top and a 082 * multiple sequence alignment at the bottom. 083 * 084 * @param alignment 085 * MultipleAlignment 086 * @return String multiple sequence alignment in FASTA format 087 * @see MultipleAlignmentTools#getSequenceAlignment(MultipleAlignment) 088 */ 089 public static String toFatCat(MultipleAlignment alignment) { 090 091 // Initialize the String and put the summary information 092 StringWriter fatcat = new StringWriter(); 093 fatcat.append(alignment.toString() + "\n\n"); 094 095 // Get the alignment sequences and the mapping 096 List<Integer> mapSeqToStruct = new ArrayList<Integer>(); 097 List<String> alnSequences = MultipleAlignmentTools 098 .getSequenceAlignment(alignment, mapSeqToStruct); 099 100 // Get the String of the Block Numbers for Position 101 String blockNumbers = ""; 102 for (int pos = 0; pos < alnSequences.get(0).length(); pos++) { 103 int blockNr = MultipleAlignmentTools.getBlockForSequencePosition( 104 alignment, mapSeqToStruct, pos); 105 if (blockNr != -1) { 106 blockNumbers = blockNumbers.concat("" + (blockNr + 1)); 107 } else 108 blockNumbers = blockNumbers.concat(" "); 109 } 110 111 // Write the Sequence Alignment 112 for (int str = 0; str < alignment.size(); str++) { 113 if (str < 9) { 114 fatcat.append("Chain 0" + (str + 1) + ": " 115 + alnSequences.get(str) + "\n"); 116 } else { 117 fatcat.append("Chain " + (str + 1) + ": " 118 + alnSequences.get(str) + "\n"); 119 } 120 if (str != alignment.size() - 1) { 121 fatcat.append(" " + blockNumbers + "\n"); 122 } 123 } 124 return fatcat.toString(); 125 } 126 127 /** 128 * Converts the alignment to its simplest form: a list of groups of aligned 129 * residues. Format is one line per residue group, tab delimited: 130 * <ul> 131 * <li>PDB number (includes insertion code) 132 * <li>Chain 133 * <li>Amino Acid (three letter code)</li> 134 * </ul> 135 * Example: <code>52 A ALA 102 A VAL 154 A THR</code> 136 * <p> 137 * Note that this format loses information about blocks. 138 * 139 * @param multAln 140 * MultipleAlignment object 141 * @return a String representation of the aligned residues. 142 */ 143 public static String toAlignedResidues(MultipleAlignment multAln) { 144 StringWriter residueGroup = new StringWriter(); 145 146 // Write structure names & PDB codes 147 for (int str = 0; str < multAln.size(); str++) { 148 residueGroup.append("#Struct" + (str + 1) + ":\t"); 149 residueGroup.append(multAln.getEnsemble().getStructureIdentifiers() 150 .get(str).getIdentifier()); 151 residueGroup.append("\n"); 152 } 153 // Whrite header for columns 154 for (int str = 0; str < multAln.size(); str++) 155 residueGroup.append("#Num" + (str + 1) + "\tChain" + (str + 1) 156 + "\tAA" + (str + 1) + "\t"); 157 residueGroup.append("\n"); 158 159 // Write optimally aligned pairs 160 for (Block b : multAln.getBlocks()) { 161 for (int res = 0; res < b.length(); res++) { 162 for (int str = 0; str < multAln.size(); str++) { 163 Integer residue = b.getAlignRes().get(str).get(res); 164 if (residue == null) { 165 residueGroup.append("-"); 166 residueGroup.append('\t'); 167 residueGroup.append("-"); 168 residueGroup.append('\t'); 169 residueGroup.append("-"); 170 residueGroup.append('\t'); 171 } else { 172 Atom atom = multAln.getAtomArrays().get(str)[residue]; 173 174 residueGroup.append(atom.getGroup().getResidueNumber() 175 .toString()); 176 residueGroup.append('\t'); 177 residueGroup.append(atom.getGroup().getChain() 178 .getChainID()); 179 residueGroup.append('\t'); 180 residueGroup.append(atom.getGroup().getPDBName()); 181 residueGroup.append('\t'); 182 } 183 } 184 residueGroup.append('\n'); 185 } 186 } 187 return residueGroup.toString(); 188 } 189 190 /** 191 * Converts the transformation Matrices of the alignment into a String 192 * output. 193 * 194 * @param afpChain 195 * @return String transformation Matrices 196 */ 197 public static String toTransformMatrices(MultipleAlignment alignment) { 198 199 StringBuffer txt = new StringBuffer(); 200 201 for (int bs = 0; bs < alignment.getBlockSets().size(); bs++) { 202 203 List<Matrix4d> btransforms = alignment.getBlockSet(bs) 204 .getTransformations(); 205 if (btransforms == null || btransforms.size() < 1) 206 continue; 207 208 if (alignment.getBlockSets().size() > 1) { 209 txt.append("Operations for block "); 210 txt.append(bs + 1); 211 txt.append("\n"); 212 } 213 214 for (int str = 0; str < alignment.size(); str++) { 215 String origString = "ref"; 216 217 txt.append(String.format(" X"+(str+1)+ " = (%9.6f)*X"+ 218 origString +" + (%9.6f)*Y"+ 219 origString +" + (%9.6f)*Z"+ 220 origString +" + (%12.6f)", 221 btransforms.get(str).getElement(0,0), 222 btransforms.get(str).getElement(0,1), 223 btransforms.get(str).getElement(0,2), 224 btransforms.get(str).getElement(0,3))); 225 txt.append( "\n"); 226 txt.append(String.format(" Y"+(str+1)+" = (%9.6f)*X"+ 227 origString +" + (%9.6f)*Y"+ 228 origString +" + (%9.6f)*Z"+ 229 origString +" + (%12.6f)", 230 btransforms.get(str).getElement(1,0), 231 btransforms.get(str).getElement(1,1), 232 btransforms.get(str).getElement(1,2), 233 btransforms.get(str).getElement(1,3))); 234 txt.append( "\n"); 235 txt.append(String.format(" Z"+(str+1)+" = (%9.6f)*X"+ 236 origString +" + (%9.6f)*Y"+ 237 origString +" + (%9.6f)*Z"+ 238 origString +" + (%12.6f)", 239 btransforms.get(str).getElement(2,0), 240 btransforms.get(str).getElement(2,1), 241 btransforms.get(str).getElement(2,2), 242 btransforms.get(str).getElement(2,3))); 243 txt.append("\n\n"); 244 } 245 } 246 return txt.toString(); 247 } 248 249 /** 250 * Converts all the information of a multiple alignment ensemble into an XML 251 * String format. Cached variables, like transformation matrices and scores, 252 * are also converted. 253 * 254 * @param ensemble 255 * the MultipleAlignmentEnsemble to convert. 256 * @return String XML representation of the ensemble 257 * @throws IOException 258 * @see MultipleAlignmentXMLConverter Helper methods for XML conversion 259 */ 260 public static String toXML(MultipleAlignmentEnsemble ensemble) 261 throws IOException { 262 263 StringWriter result = new StringWriter(); 264 PrintWriter writer = new PrintWriter(result); 265 PrettyXMLWriter xml = new PrettyXMLWriter(writer); 266 267 MultipleAlignmentXMLConverter.printXMLensemble(xml, ensemble); 268 269 writer.close(); 270 271 return result.toString(); 272 } 273 274 /** 275 * Outputs a pairwise alignment in I-TASSER's 3D Format for target-template 276 * alignment. http://zhanglab.ccmb.med.umich.edu/I-TASSER/option4.html 277 * 278 * <p> 279 * The format is closely related to a standard PDB file, but contains only 280 * CA atoms and adds two columns for specifying the alignment: 281 * 282 * <pre> 283 * ATOM 2001 CA MET 1 41.116 -30.727 6.866 129 THR 284 * ATOM 2002 CA ALA 2 39.261 -27.408 6.496 130 ARG 285 * ATOM 2003 CA ALA 3 35.665 -27.370 7.726 131 THR 286 * ATOM 2004 CA ARG 4 32.662 -25.111 7.172 132 ARG 287 * ATOM 2005 CA GLY 5 29.121 -25.194 8.602 133 ARG 288 * 289 * Column 1 -30: Atom & Residue records of query sequence. 290 * Column 31-54: Coordinates of atoms in query copied from corresponding atoms in template. 291 * Column 55-59: Corresponding residue number in template based on alignment 292 * Column 60-64: Corresponding residue name in template 293 * </pre> 294 * 295 * <p> 296 * Note that the output is a pairwise alignment. Only the first and second 297 * rows in the MultipleAlignment will be used, others ignored. 298 * 299 * <p> 300 * This method supports topology-independent alignments. The output will 301 * have sequence order matching the query, but include atoms from the 302 * template. 303 * 304 * @param alignment 305 * A <em>full</em> multiple alignment between proteins 306 * @param queryIndex 307 * index of the query within the multiple alignment 308 * @param templateIndex 309 * index of the template within the multiple alignment 310 * @return The file contents as a string 311 * @throws StructureException If an error occurs parsing the alignment's structure names 312 */ 313 public static String to3DFormat(MultipleAlignment alignment, 314 int queryIndex, int templateIndex) throws StructureException { 315 List<Atom[]> atomArrays = alignment.getEnsemble().getAtomArrays(); 316 Atom[] queryAtoms = atomArrays.get(queryIndex); 317 Atom[] templateAtoms = atomArrays.get(templateIndex); 318 319 List<Block> blocks = alignment.getBlocks(); 320 MultipleAlignmentTools.sortBlocks(blocks, queryIndex); 321 322 StringBuilder str = new StringBuilder(); 323 324 // Gather info about the template structure 325 StructureIdentifier tName = alignment.getEnsemble().getStructureIdentifiers() 326 .get(templateIndex); 327 SubstructureIdentifier canon = tName.toCanonical(); 328 String tPdbId = canon.getPdbId(); 329 String tChain = null; 330 for(ResidueRange range : canon.getResidueRanges()) { 331 tChain = range.getChainId(); 332 break; 333 } 334 335 if (tChain == null) { 336 // Use the chain of the first template block 337 for (Integer i : blocks.get(0).getAlignRes().get(templateIndex)) { 338 if (i != null) { 339 tChain = templateAtoms[i].getGroup().getChainId(); 340 break; 341 } 342 } 343 } 344 str.append(String 345 .format("REMARK Template name:%s:%s\n", tPdbId, tChain)); 346 for (Block block : blocks) { 347 List<Integer> qAlign = block.getAlignRes().get(queryIndex); 348 List<Integer> tAlign = block.getAlignRes().get(templateIndex); 349 for (int i = 0; i < block.length(); i++) { 350 Integer qRes = qAlign.get(i); 351 Integer tRes = tAlign.get(i); 352 353 // skip gaps 354 if (qRes == null || tRes == null) 355 continue; 356 357 // Get PDB-format ATOM records 358 String qPDB = queryAtoms[qRes].toPDB(); 359 String tPDB = templateAtoms[tRes].toPDB(); 360 361 // merge the two records into 3D format 362 str.append(qPDB.substring(0, 30)); // up through coordinates 363 str.append(tPDB.substring(30, 54)); // coordinates 364 str.append(tPDB.substring(22, 27)); // residue number 365 str.append(' '); 366 str.append(tPDB.substring(17, 20)); 367 str.append('\n'); 368 } 369 } 370 return str.toString(); 371 } 372 373}