001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.multiple.util; 022 023import java.io.IOException; 024import java.io.PrintWriter; 025import java.io.StringWriter; 026import java.util.ArrayList; 027import java.util.List; 028import java.util.Locale; 029 030import javax.vecmath.Matrix4d; 031 032import org.biojava.nbio.core.util.PrettyXMLWriter; 033import org.biojava.nbio.structure.Atom; 034import org.biojava.nbio.structure.PdbId; 035import org.biojava.nbio.structure.ResidueRange; 036import org.biojava.nbio.structure.StructureException; 037import org.biojava.nbio.structure.StructureIdentifier; 038import org.biojava.nbio.structure.SubstructureIdentifier; 039import org.biojava.nbio.structure.align.multiple.Block; 040import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 041import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsemble; 042import org.biojava.nbio.structure.align.xml.MultipleAlignmentXMLConverter; 043 044/** 045 * This class contains functions for the conversion of {@link MultipleAlignment} 046 * to various String outputs. 047 * <p> 048 * Supported formats: FASTA, FatCat, Aligned Residues, Transformation Matrices, 049 * XML, 3D format. 050 * 051 * @author Aleix Lafita 052 * @since 4.1.0 053 * 054 */ 055public class MultipleAlignmentWriter { 056 057 /** 058 * Converts the {@link MultipleAlignment} into a multiple sequence alignment 059 * String in FASTA format. 060 * 061 * @param alignment 062 * MultipleAlignment 063 * @return String multiple sequence alignment in FASTA format 064 * @see MultipleAlignmentTools#getSequenceAlignment(MultipleAlignment) 065 */ 066 public static String toFASTA(MultipleAlignment alignment) { 067 068 // Get the alignment sequences 069 List<String> alnSequences = MultipleAlignmentTools 070 .getSequenceAlignment(alignment); 071 072 String fasta = ""; 073 for (int st = 0; st < alignment.size(); st++) { 074 // Add the structure identifier as the head of the FASTA 075 fasta += ">" + alignment.getEnsemble().getStructureIdentifiers().get(st).getIdentifier() 076 + "\n" + alnSequences.get(st) + "\n"; 077 } 078 return fasta; 079 } 080 081 /** 082 * Converts the {@link MultipleAlignment} into a FatCat String format. 083 * Includes summary information about the alignment in the top and a 084 * multiple sequence alignment at the bottom. 085 * 086 * @param alignment 087 * MultipleAlignment 088 * @return String multiple sequence alignment in FASTA format 089 * @see MultipleAlignmentTools#getSequenceAlignment(MultipleAlignment) 090 */ 091 public static String toFatCat(MultipleAlignment alignment) { 092 093 // Initialize the String and put the summary information 094 StringWriter fatcat = new StringWriter(); 095 fatcat.append(alignment.toString() + "\n\n"); 096 097 // Get the alignment sequences and the mapping 098 List<Integer> mapSeqToStruct = new ArrayList<>(); 099 List<String> alnSequences = MultipleAlignmentTools 100 .getSequenceAlignment(alignment, mapSeqToStruct); 101 102 // Get the String of the Block Numbers for Position 103 String blockNumbers = ""; 104 for (int pos = 0; pos < alnSequences.get(0).length(); pos++) { 105 int blockNr = MultipleAlignmentTools.getBlockForSequencePosition( 106 alignment, mapSeqToStruct, pos); 107 if (blockNr != -1) { 108 blockNumbers = blockNumbers.concat(String.valueOf(blockNr + 1)); 109 } else 110 blockNumbers = blockNumbers.concat(" "); 111 } 112 113 // Write the Sequence Alignment 114 for (int str = 0; str < alignment.size(); str++) { 115 if (str < 9) { 116 fatcat.append("Chain 0" + (str + 1) + ": " 117 + alnSequences.get(str) + "\n"); 118 } else { 119 fatcat.append("Chain " + (str + 1) + ": " 120 + alnSequences.get(str) + "\n"); 121 } 122 if (str != alignment.size() - 1) { 123 fatcat.append(" " + blockNumbers + "\n"); 124 } 125 } 126 return fatcat.toString(); 127 } 128 129 /** 130 * Converts the alignment to its simplest form: a list of groups of aligned 131 * residues. Format is one line per residue group, tab delimited: 132 * <ul> 133 * <li>PDB number (includes insertion code) 134 * <li>Chain 135 * <li>Amino Acid (three letter code)</li> 136 * </ul> 137 * Example: <code>52 A ALA 102 A VAL 154 A THR</code> 138 * <p> 139 * Note that this format loses information about blocks. 140 * 141 * @param multAln 142 * MultipleAlignment object 143 * @return a String representation of the aligned residues. 144 */ 145 public static String toAlignedResidues(MultipleAlignment multAln) { 146 StringWriter residueGroup = new StringWriter(); 147 148 // Write structure names & PDB codes 149 for (int str = 0; str < multAln.size(); str++) { 150 residueGroup.append("#Struct" + (str + 1) + ":\t"); 151 residueGroup.append(multAln.getEnsemble().getStructureIdentifiers() 152 .get(str).getIdentifier()); 153 residueGroup.append("\n"); 154 } 155 // Whrite header for columns 156 for (int str = 0; str < multAln.size(); str++) 157 residueGroup.append("#Num" + (str + 1) + "\tChain" + (str + 1) 158 + "\tAA" + (str + 1) + "\t"); 159 residueGroup.append("\n"); 160 161 // Write optimally aligned pairs 162 for (Block b : multAln.getBlocks()) { 163 for (int res = 0; res < b.length(); res++) { 164 for (int str = 0; str < multAln.size(); str++) { 165 Integer residue = b.getAlignRes().get(str).get(res); 166 if (residue == null) { 167 residueGroup.append("-"); 168 residueGroup.append('\t'); 169 residueGroup.append("-"); 170 residueGroup.append('\t'); 171 residueGroup.append("-"); 172 residueGroup.append('\t'); 173 } else { 174 Atom atom = multAln.getAtomArrays().get(str)[residue]; 175 176 residueGroup.append(atom.getGroup().getResidueNumber() 177 .toString()); 178 residueGroup.append('\t'); 179 residueGroup.append(atom.getGroup().getChain() 180 // ABradley - I'm assuming Auth Id's here 04/05/16 181 .getName()); 182 residueGroup.append('\t'); 183 residueGroup.append(atom.getGroup().getPDBName()); 184 residueGroup.append('\t'); 185 } 186 } 187 residueGroup.append('\n'); 188 } 189 } 190 return residueGroup.toString(); 191 } 192 193 /** 194 * Converts the transformation Matrices of the alignment into a String 195 * output. 196 * 197 * @param alignment 198 * @return String transformation Matrices 199 */ 200 public static String toTransformMatrices(MultipleAlignment alignment) { 201 202 StringBuffer txt = new StringBuffer(); 203 204 for (int bs = 0; bs < alignment.getBlockSets().size(); bs++) { 205 206 List<Matrix4d> btransforms = alignment.getBlockSet(bs) 207 .getTransformations(); 208 if (btransforms == null || btransforms.size() < 1) 209 continue; 210 211 if (alignment.getBlockSets().size() > 1) { 212 txt.append("Operations for block "); 213 txt.append(bs + 1); 214 txt.append("\n"); 215 } 216 217 for (int str = 0; str < alignment.size(); str++) { 218 String origString = "ref"; 219 220 txt.append(String.format(Locale.US, " X"+(str+1)+ " = (%9.6f)*X"+ 221 origString +" + (%9.6f)*Y"+ 222 origString +" + (%9.6f)*Z"+ 223 origString +" + (%12.6f)", 224 btransforms.get(str).getElement(0,0), 225 btransforms.get(str).getElement(0,1), 226 btransforms.get(str).getElement(0,2), 227 btransforms.get(str).getElement(0,3))); 228 txt.append( "\n"); 229 txt.append(String.format(Locale.US, " Y"+(str+1)+" = (%9.6f)*X"+ 230 origString +" + (%9.6f)*Y"+ 231 origString +" + (%9.6f)*Z"+ 232 origString +" + (%12.6f)", 233 btransforms.get(str).getElement(1,0), 234 btransforms.get(str).getElement(1,1), 235 btransforms.get(str).getElement(1,2), 236 btransforms.get(str).getElement(1,3))); 237 txt.append( "\n"); 238 txt.append(String.format(Locale.US, " Z"+(str+1)+" = (%9.6f)*X"+ 239 origString +" + (%9.6f)*Y"+ 240 origString +" + (%9.6f)*Z"+ 241 origString +" + (%12.6f)", 242 btransforms.get(str).getElement(2,0), 243 btransforms.get(str).getElement(2,1), 244 btransforms.get(str).getElement(2,2), 245 btransforms.get(str).getElement(2,3))); 246 txt.append("\n\n"); 247 } 248 } 249 return txt.toString(); 250 } 251 252 /** 253 * Converts all the information of a multiple alignment ensemble into an XML 254 * String format. Cached variables, like transformation matrices and scores, 255 * are also converted. 256 * 257 * @param ensemble 258 * the MultipleAlignmentEnsemble to convert. 259 * @return String XML representation of the ensemble 260 * @throws IOException 261 * @see MultipleAlignmentXMLConverter Helper methods for XML conversion 262 */ 263 public static String toXML(MultipleAlignmentEnsemble ensemble) 264 throws IOException { 265 266 StringWriter result = new StringWriter(); 267 PrintWriter writer = new PrintWriter(result); 268 PrettyXMLWriter xml = new PrettyXMLWriter(writer); 269 270 MultipleAlignmentXMLConverter.printXMLensemble(xml, ensemble); 271 272 writer.close(); 273 274 return result.toString(); 275 } 276 277 /** 278 * Outputs a pairwise alignment in I-TASSER's 3D Format for target-template 279 * alignment. http://zhanglab.ccmb.med.umich.edu/I-TASSER/option4.html 280 * 281 * <p> 282 * The format is closely related to a standard PDB file, but contains only 283 * CA atoms and adds two columns for specifying the alignment: 284 * 285 * <pre> 286 * ATOM 2001 CA MET 1 41.116 -30.727 6.866 129 THR 287 * ATOM 2002 CA ALA 2 39.261 -27.408 6.496 130 ARG 288 * ATOM 2003 CA ALA 3 35.665 -27.370 7.726 131 THR 289 * ATOM 2004 CA ARG 4 32.662 -25.111 7.172 132 ARG 290 * ATOM 2005 CA GLY 5 29.121 -25.194 8.602 133 ARG 291 * 292 * Column 1 -30: Atom and Residue records of query sequence. 293 * Column 31-54: Coordinates of atoms in query copied from corresponding atoms in template. 294 * Column 55-59: Corresponding residue number in template based on alignment 295 * Column 60-64: Corresponding residue name in template 296 * </pre> 297 * 298 * <p> 299 * Note that the output is a pairwise alignment. Only the first and second 300 * rows in the MultipleAlignment will be used, others ignored. 301 * 302 * <p> 303 * This method supports topology-independent alignments. The output will 304 * have sequence order matching the query, but include atoms from the 305 * template. 306 * 307 * @param alignment 308 * A <em>full</em> multiple alignment between proteins 309 * @param queryIndex 310 * index of the query within the multiple alignment 311 * @param templateIndex 312 * index of the template within the multiple alignment 313 * @return The file contents as a string 314 * @throws StructureException If an error occurs parsing the alignment's structure names 315 */ 316 public static String to3DFormat(MultipleAlignment alignment, 317 int queryIndex, int templateIndex) throws StructureException { 318 List<Atom[]> atomArrays = alignment.getEnsemble().getAtomArrays(); 319 Atom[] queryAtoms = atomArrays.get(queryIndex); 320 Atom[] templateAtoms = atomArrays.get(templateIndex); 321 322 List<Block> blocks = alignment.getBlocks(); 323 MultipleAlignmentTools.sortBlocks(blocks, queryIndex); 324 325 StringBuilder str = new StringBuilder(); 326 327 // Gather info about the template structure 328 StructureIdentifier tName = alignment.getEnsemble().getStructureIdentifiers() 329 .get(templateIndex); 330 SubstructureIdentifier canon = tName.toCanonical(); 331 PdbId tPdbId = canon.getPdbId(); 332 String tChain = null; 333 for(ResidueRange range : canon.getResidueRanges()) { 334 tChain = range.getChainName(); 335 break; 336 } 337 338 if (tChain == null) { 339 // Use the chain of the first template block 340 for (Integer i : blocks.get(0).getAlignRes().get(templateIndex)) { 341 if (i != null) { 342 tChain = templateAtoms[i].getGroup().getChainId(); 343 break; 344 } 345 } 346 } 347 str.append(String 348 .format("REMARK Template name:%s:%s\n", tPdbId, tChain)); 349 for (Block block : blocks) { 350 List<Integer> qAlign = block.getAlignRes().get(queryIndex); 351 List<Integer> tAlign = block.getAlignRes().get(templateIndex); 352 for (int i = 0; i < block.length(); i++) { 353 Integer qRes = qAlign.get(i); 354 Integer tRes = tAlign.get(i); 355 356 // skip gaps 357 if (qRes == null || tRes == null) 358 continue; 359 360 // Get PDB-format ATOM records 361 String qPDB = queryAtoms[qRes].toPDB(); 362 String tPDB = templateAtoms[tRes].toPDB(); 363 364 // merge the two records into 3D format 365 str.append(qPDB.substring(0, 30)); // up through coordinates 366 str.append(tPDB.substring(30, 54)); // coordinates 367 str.append(tPDB.substring(22, 27)); // residue number 368 str.append(' '); 369 str.append(tPDB.substring(17, 20)); 370 str.append('\n'); 371 } 372 } 373 return str.toString(); 374 } 375 376}