001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.multiple.util; 022 023import java.io.IOException; 024import java.io.PrintWriter; 025import java.io.StringWriter; 026import java.util.ArrayList; 027import java.util.List; 028 029import javax.vecmath.Matrix4d; 030 031import org.biojava.nbio.core.util.PrettyXMLWriter; 032import org.biojava.nbio.structure.Atom; 033import org.biojava.nbio.structure.ResidueRange; 034import org.biojava.nbio.structure.StructureException; 035import org.biojava.nbio.structure.StructureIdentifier; 036import org.biojava.nbio.structure.SubstructureIdentifier; 037import org.biojava.nbio.structure.align.multiple.Block; 038import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 039import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble; 040import org.biojava.nbio.structure.align.xml.MultipleAlignmentXMLConverter; 041 042/** 043 * This class contains functions for the conversion of {@link MultipleAlignment} 044 * to various String outputs. 045 * <p> 046 * Supported formats: FASTA, FatCat, Aligned Residues, Transformation Matrices, 047 * XML, 3D format. 048 * 049 * @author Aleix Lafita 050 * @since 4.1.0 051 * 052 */ 053public class MultipleAlignmentWriter { 054 055 /** 056 * Converts the {@link MultipleAlignment} into a multiple sequence alignment 057 * String in FASTA format. 058 * 059 * @param alignment 060 * MultipleAlignment 061 * @return String multiple sequence alignment in FASTA format 062 * @see MultipleAlignmentTools#getSequenceAlignment(MultipleAlignment) 063 */ 064 public static String toFASTA(MultipleAlignment alignment) { 065 066 // Get the alignment sequences 067 List<String> alnSequences = MultipleAlignmentTools 068 .getSequenceAlignment(alignment); 069 070 String fasta = ""; 071 for (int st = 0; st < alignment.size(); st++) { 072 // Add the structure identifier as the head of the FASTA 073 fasta += ">" + alignment.getEnsemble().getStructureIdentifiers().get(st).getIdentifier() 074 + "\n" + alnSequences.get(st) + "\n"; 075 } 076 return fasta; 077 } 078 079 /** 080 * Converts the {@link MultipleAlignment} into a FatCat String format. 081 * Includes summary information about the alignment in the top and a 082 * multiple sequence alignment at the bottom. 083 * 084 * @param alignment 085 * MultipleAlignment 086 * @return String multiple sequence alignment in FASTA format 087 * @see MultipleAlignmentTools#getSequenceAlignment(MultipleAlignment) 088 */ 089 public static String toFatCat(MultipleAlignment alignment) { 090 091 // Initialize the String and put the summary information 092 StringWriter fatcat = new StringWriter(); 093 fatcat.append(alignment.toString() + "\n\n"); 094 095 // Get the alignment sequences and the mapping 096 List<Integer> mapSeqToStruct = new ArrayList<Integer>(); 097 List<String> alnSequences = MultipleAlignmentTools 098 .getSequenceAlignment(alignment, mapSeqToStruct); 099 100 // Get the String of the Block Numbers for Position 101 String blockNumbers = ""; 102 for (int pos = 0; pos < alnSequences.get(0).length(); pos++) { 103 int blockNr = MultipleAlignmentTools.getBlockForSequencePosition( 104 alignment, mapSeqToStruct, pos); 105 if (blockNr != -1) { 106 blockNumbers = blockNumbers.concat(String.valueOf(blockNr + 1)); 107 } else 108 blockNumbers = blockNumbers.concat(" "); 109 } 110 111 // Write the Sequence Alignment 112 for (int str = 0; str < alignment.size(); str++) { 113 if (str < 9) { 114 fatcat.append("Chain 0" + (str + 1) + ": " 115 + alnSequences.get(str) + "\n"); 116 } else { 117 fatcat.append("Chain " + (str + 1) + ": " 118 + alnSequences.get(str) + "\n"); 119 } 120 if (str != alignment.size() - 1) { 121 fatcat.append(" " + blockNumbers + "\n"); 122 } 123 } 124 return fatcat.toString(); 125 } 126 127 /** 128 * Converts the alignment to its simplest form: a list of groups of aligned 129 * residues. Format is one line per residue group, tab delimited: 130 * <ul> 131 * <li>PDB number (includes insertion code) 132 * <li>Chain 133 * <li>Amino Acid (three letter code)</li> 134 * </ul> 135 * Example: <code>52 A ALA 102 A VAL 154 A THR</code> 136 * <p> 137 * Note that this format loses information about blocks. 138 * 139 * @param multAln 140 * MultipleAlignment object 141 * @return a String representation of the aligned residues. 142 */ 143 public static String toAlignedResidues(MultipleAlignment multAln) { 144 StringWriter residueGroup = new StringWriter(); 145 146 // Write structure names & PDB codes 147 for (int str = 0; str < multAln.size(); str++) { 148 residueGroup.append("#Struct" + (str + 1) + ":\t"); 149 residueGroup.append(multAln.getEnsemble().getStructureIdentifiers() 150 .get(str).getIdentifier()); 151 residueGroup.append("\n"); 152 } 153 // Whrite header for columns 154 for (int str = 0; str < multAln.size(); str++) 155 residueGroup.append("#Num" + (str + 1) + "\tChain" + (str + 1) 156 + "\tAA" + (str + 1) + "\t"); 157 residueGroup.append("\n"); 158 159 // Write optimally aligned pairs 160 for (Block b : multAln.getBlocks()) { 161 for (int res = 0; res < b.length(); res++) { 162 for (int str = 0; str < multAln.size(); str++) { 163 Integer residue = b.getAlignRes().get(str).get(res); 164 if (residue == null) { 165 residueGroup.append("-"); 166 residueGroup.append('\t'); 167 residueGroup.append("-"); 168 residueGroup.append('\t'); 169 residueGroup.append("-"); 170 residueGroup.append('\t'); 171 } else { 172 Atom atom = multAln.getAtomArrays().get(str)[residue]; 173 174 residueGroup.append(atom.getGroup().getResidueNumber() 175 .toString()); 176 residueGroup.append('\t'); 177 residueGroup.append(atom.getGroup().getChain() 178 // ABradley - I'm assuming Auth Id's here 04/05/16 179 .getName()); 180 residueGroup.append('\t'); 181 residueGroup.append(atom.getGroup().getPDBName()); 182 residueGroup.append('\t'); 183 } 184 } 185 residueGroup.append('\n'); 186 } 187 } 188 return residueGroup.toString(); 189 } 190 191 /** 192 * Converts the transformation Matrices of the alignment into a String 193 * output. 194 * 195 * @param afpChain 196 * @return String transformation Matrices 197 */ 198 public static String toTransformMatrices(MultipleAlignment alignment) { 199 200 StringBuffer txt = new StringBuffer(); 201 202 for (int bs = 0; bs < alignment.getBlockSets().size(); bs++) { 203 204 List<Matrix4d> btransforms = alignment.getBlockSet(bs) 205 .getTransformations(); 206 if (btransforms == null || btransforms.size() < 1) 207 continue; 208 209 if (alignment.getBlockSets().size() > 1) { 210 txt.append("Operations for block "); 211 txt.append(bs + 1); 212 txt.append("\n"); 213 } 214 215 for (int str = 0; str < alignment.size(); str++) { 216 String origString = "ref"; 217 218 txt.append(String.format(" X"+(str+1)+ " = (%9.6f)*X"+ 219 origString +" + (%9.6f)*Y"+ 220 origString +" + (%9.6f)*Z"+ 221 origString +" + (%12.6f)", 222 btransforms.get(str).getElement(0,0), 223 btransforms.get(str).getElement(0,1), 224 btransforms.get(str).getElement(0,2), 225 btransforms.get(str).getElement(0,3))); 226 txt.append( "\n"); 227 txt.append(String.format(" Y"+(str+1)+" = (%9.6f)*X"+ 228 origString +" + (%9.6f)*Y"+ 229 origString +" + (%9.6f)*Z"+ 230 origString +" + (%12.6f)", 231 btransforms.get(str).getElement(1,0), 232 btransforms.get(str).getElement(1,1), 233 btransforms.get(str).getElement(1,2), 234 btransforms.get(str).getElement(1,3))); 235 txt.append( "\n"); 236 txt.append(String.format(" Z"+(str+1)+" = (%9.6f)*X"+ 237 origString +" + (%9.6f)*Y"+ 238 origString +" + (%9.6f)*Z"+ 239 origString +" + (%12.6f)", 240 btransforms.get(str).getElement(2,0), 241 btransforms.get(str).getElement(2,1), 242 btransforms.get(str).getElement(2,2), 243 btransforms.get(str).getElement(2,3))); 244 txt.append("\n\n"); 245 } 246 } 247 return txt.toString(); 248 } 249 250 /** 251 * Converts all the information of a multiple alignment ensemble into an XML 252 * String format. Cached variables, like transformation matrices and scores, 253 * are also converted. 254 * 255 * @param ensemble 256 * the MultipleAlignmentEnsemble to convert. 257 * @return String XML representation of the ensemble 258 * @throws IOException 259 * @see MultipleAlignmentXMLConverter Helper methods for XML conversion 260 */ 261 public static String toXML(MultipleAlignmentEnsemble ensemble) 262 throws IOException { 263 264 StringWriter result = new StringWriter(); 265 PrintWriter writer = new PrintWriter(result); 266 PrettyXMLWriter xml = new PrettyXMLWriter(writer); 267 268 MultipleAlignmentXMLConverter.printXMLensemble(xml, ensemble); 269 270 writer.close(); 271 272 return result.toString(); 273 } 274 275 /** 276 * Outputs a pairwise alignment in I-TASSER's 3D Format for target-template 277 * alignment. http://zhanglab.ccmb.med.umich.edu/I-TASSER/option4.html 278 * 279 * <p> 280 * The format is closely related to a standard PDB file, but contains only 281 * CA atoms and adds two columns for specifying the alignment: 282 * 283 * <pre> 284 * ATOM 2001 CA MET 1 41.116 -30.727 6.866 129 THR 285 * ATOM 2002 CA ALA 2 39.261 -27.408 6.496 130 ARG 286 * ATOM 2003 CA ALA 3 35.665 -27.370 7.726 131 THR 287 * ATOM 2004 CA ARG 4 32.662 -25.111 7.172 132 ARG 288 * ATOM 2005 CA GLY 5 29.121 -25.194 8.602 133 ARG 289 * 290 * Column 1 -30: Atom & Residue records of query sequence. 291 * Column 31-54: Coordinates of atoms in query copied from corresponding atoms in template. 292 * Column 55-59: Corresponding residue number in template based on alignment 293 * Column 60-64: Corresponding residue name in template 294 * </pre> 295 * 296 * <p> 297 * Note that the output is a pairwise alignment. Only the first and second 298 * rows in the MultipleAlignment will be used, others ignored. 299 * 300 * <p> 301 * This method supports topology-independent alignments. The output will 302 * have sequence order matching the query, but include atoms from the 303 * template. 304 * 305 * @param alignment 306 * A <em>full</em> multiple alignment between proteins 307 * @param queryIndex 308 * index of the query within the multiple alignment 309 * @param templateIndex 310 * index of the template within the multiple alignment 311 * @return The file contents as a string 312 * @throws StructureException If an error occurs parsing the alignment's structure names 313 */ 314 public static String to3DFormat(MultipleAlignment alignment, 315 int queryIndex, int templateIndex) throws StructureException { 316 List<Atom[]> atomArrays = alignment.getEnsemble().getAtomArrays(); 317 Atom[] queryAtoms = atomArrays.get(queryIndex); 318 Atom[] templateAtoms = atomArrays.get(templateIndex); 319 320 List<Block> blocks = alignment.getBlocks(); 321 MultipleAlignmentTools.sortBlocks(blocks, queryIndex); 322 323 StringBuilder str = new StringBuilder(); 324 325 // Gather info about the template structure 326 StructureIdentifier tName = alignment.getEnsemble().getStructureIdentifiers() 327 .get(templateIndex); 328 SubstructureIdentifier canon = tName.toCanonical(); 329 String tPdbId = canon.getPdbId(); 330 String tChain = null; 331 for(ResidueRange range : canon.getResidueRanges()) { 332 tChain = range.getChainName(); 333 break; 334 } 335 336 if (tChain == null) { 337 // Use the chain of the first template block 338 for (Integer i : blocks.get(0).getAlignRes().get(templateIndex)) { 339 if (i != null) { 340 tChain = templateAtoms[i].getGroup().getChainId(); 341 break; 342 } 343 } 344 } 345 str.append(String 346 .format("REMARK Template name:%s:%s\n", tPdbId, tChain)); 347 for (Block block : blocks) { 348 List<Integer> qAlign = block.getAlignRes().get(queryIndex); 349 List<Integer> tAlign = block.getAlignRes().get(templateIndex); 350 for (int i = 0; i < block.length(); i++) { 351 Integer qRes = qAlign.get(i); 352 Integer tRes = tAlign.get(i); 353 354 // skip gaps 355 if (qRes == null || tRes == null) 356 continue; 357 358 // Get PDB-format ATOM records 359 String qPDB = queryAtoms[qRes].toPDB(); 360 String tPDB = templateAtoms[tRes].toPDB(); 361 362 // merge the two records into 3D format 363 str.append(qPDB.substring(0, 30)); // up through coordinates 364 str.append(tPDB.substring(30, 54)); // coordinates 365 str.append(tPDB.substring(22, 27)); // residue number 366 str.append(' '); 367 str.append(tPDB.substring(17, 20)); 368 str.append('\n'); 369 } 370 } 371 return str.toString(); 372 } 373 374}