001/* 002 * PDB web development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * 015 * Created on Jun 17, 2009 016 * Created by ap3 017 * 018 */ 019 020package org.biojava.nbio.structure.align.util; 021 022import org.biojava.nbio.structure.*; 023import org.biojava.nbio.structure.align.ce.GuiWrapper; 024import org.biojava.nbio.structure.align.model.AFPChain; 025import org.biojava.nbio.structure.geometry.Matrices; 026import org.biojava.nbio.structure.geometry.SuperPositions; 027import org.biojava.nbio.structure.jama.Matrix; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031import java.lang.reflect.InvocationTargetException; 032import java.util.*; 033 034import javax.vecmath.Matrix4d; 035 036 037public class AFPAlignmentDisplay 038{ 039 private static final Logger logger = LoggerFactory.getLogger(AFPAlignmentDisplay.class); 040 041 private static final int[][] aaMatrix = new int[][] {{6,0,-2,-3,-2,0,-1,0,-2,-2,-2,-2,-2,-3,-4,-4,-3,-3,-3,-2,-4}, 042 {0,4,-1,0,0,1,-2,-2,-1,-1,-1,-2,-1,0,-1,-1,-1,-2,-2,-3,-4}, 043 {-2,-1,7,-3,-1,-1,-1,-2,-1,-1,-1,-2,-2,-2,-3,-3,-2,-4,-3,-4,-4}, 044 {-3,0,-3,9,-1,-1,-3,-3,-4,-3,-3,-3,-3,-1,-1,-1,-1,-2,-2,-2,-4}, 045 {-2,0,-1,-1,5,1,-1,0,-1,-1,-1,-2,-1,0,-1,-1,-1,-2,-2,-2,-4}, 046 {0,1,-1,-1,1,4,0,1,0,0,0,-1,-1,-2,-2,-2,-1,-2,-2,-3,-4}, 047 {-1,-2,-1,-3,-1,0,6,1,2,0,-1,-1,-2,-3,-3,-4,-3,-3,-3,-4,-4}, 048 {0,-2,-2,-3,0,1,1,6,0,0,0,1,0,-3,-3,-3,-2,-3,-2,-4,-4}, 049 {-2,-1,-1,-4,-1,0,2,0,5,2,1,0,0,-2,-3,-3,-2,-3,-2,-3,-4}, 050 {-2,-1,-1,-3,-1,0,0,0,2,5,1,0,1,-2,-3,-2,0,-3,-1,-2,-4}, 051 {-2,-1,-1,-3,-1,0,-1,0,1,1,5,-1,2,-2,-3,-2,-1,-3,-2,-3,-4}, 052 {-2,-2,-2,-3,-2,-1,-1,1,0,0,-1,8,0,-3,-3,-3,-2,-1,2,-2,-4}, 053 {-2,-1,-2,-3,-1,-1,-2,0,0,1,2,0,5,-3,-3,-2,-1,-3,-2,-3,-4}, 054 {-3,0,-2,-1,0,-2,-3,-3,-2,-2,-2,-3,-3,4,3,1,1,-1,-1,-3,-4}, 055 {-4,-1,-3,-1,-1,-2,-3,-3,-3,-3,-3,-3,-3,3,4,2,1,0,-1,-3,-4}, 056 {-4,-1,-3,-1,-1,-2,-4,-3,-3,-2,-2,-3,-2,1,2,4,2,0,-1,-2,-4}, 057 {-3,-1,-2,-1,-1,-1,-3,-2,-2,0,-1,-2,-1,1,1,2,5,0,-1,-1,-4}, 058 {-3,-2,-4,-2,-2,-2,-3,-3,-3,-3,-3,-1,-3,-1,0,0,0,6,3,1,-4}, 059 {-3,-2,-3,-2,-2,-2,-3,-2,-2,-1,-2,2,-2,-1,-1,-1,-1,3,7,2,-4}, 060 {-2,-3,-4,-2,-2,-3,-4,-4,-3,-2,-3,-2,-3,-3,-3,-2,-1,1,2,11,-4}, 061 {-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,1}}; 062 063 private static Character[] aa1 = new Character[]{ 'G', 'A', 'P', 'C', 'T', 'S','D', 'N', 'E', 'Q', 'K', 'H', 'R', 'V', 'I', 'L', 'M', 'F', 'Y', 'W', '-'}; 064 065 private static final List<Character> aa1List = Arrays.asList(aa1); 066 067 public static Matrix getRotMax(AFPChain afpChain,Atom[] ca1,Atom[] ca2) throws StructureException{ 068 069 Atom[] a1 = getAlignedAtoms1(afpChain,ca1); 070 Atom[] a2 = getAlignedAtoms2(afpChain,ca2); 071 072 Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(a1), 073 Calc.atomsToPoints(a2)); 074 075 return Matrices.getRotationJAMA(trans); 076 077 } 078 079 public static Atom getTranslation(AFPChain afpChain,Atom[] ca1,Atom[] ca2) throws StructureException{ 080 081 082 Atom[] a1 = getAlignedAtoms1(afpChain,ca1); 083 Atom[] a2 = getAlignedAtoms2(afpChain,ca2); 084 085 Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(a1), 086 Calc.atomsToPoints(a2)); 087 088 return Calc.getTranslationVector(trans); 089 090 } 091 092 public static Atom[] getAlignedAtoms1(AFPChain afpChain,Atom[] ca1){ 093 List<Atom> atoms = new ArrayList<Atom>(); 094 095 int blockNum = afpChain.getBlockNum(); 096 097 int[] optLen = afpChain.getOptLen(); 098 int[][][] optAln = afpChain.getOptAln(); 099 100 101 for(int bk = 0; bk < blockNum; bk ++) { 102 103 104 for ( int i=0;i< optLen[bk];i++){ 105 int pos = optAln[bk][0][i]; 106 atoms.add(ca1[pos]); 107 } 108 109 } 110 return atoms.toArray(new Atom[atoms.size()]); 111 } 112 public static Atom[] getAlignedAtoms2(AFPChain afpChain,Atom[] ca2){ 113 114 List<Atom> atoms = new ArrayList<Atom>(); 115 116 int blockNum = afpChain.getBlockNum(); 117 118 int[] optLen = afpChain.getOptLen(); 119 int[][][] optAln = afpChain.getOptAln(); 120 121 122 for(int bk = 0; bk < blockNum; bk ++) { 123 124 125 for ( int i=0;i< optLen[bk];i++){ 126 int pos = optAln[bk][1][i]; 127 atoms.add(ca2[pos]); 128 } 129 130 } 131 return atoms.toArray(new Atom[atoms.size()]); 132 } 133 134 135 136 /** 137 * Extract the alignment output 138 * <p>eg<pre> 139 * STWNTWACTWHLKQP--WSTILILA 140 * 111111111111 22222222 141 * SQNNTYACSWKLKSWNNNSTILILG 142 * </pre> 143 * Those position pairs labeled by 1 and 2 are equivalent positions, belongs to 144 * two blocks 1 and 2. The residues between labeled residues are non-equivalent, 145 * with '-' filling in their resulting gaps. 146 * <p> 147 * The three lines can be accessed using 148 * {@link AFPChain#getAlnseq1()}, {@link AFPChain#getAlnsymb()}, 149 * and {@link AFPChain#getAlnseq2()}. 150 * 151 */ 152 public static void getAlign(AFPChain afpChain,Atom[] ca1,Atom[] ca2) { 153 getAlign(afpChain, ca1, ca2, false); 154 } 155 156 /** 157 * Sets the following properties: 158 * <ul> 159 * <li>The alignment strings {@link AFPChain#setAlnseq1(char[]) alnseq1}, 160 * {@link AFPChain#setAlnseq2(char[]) alnseq2}, 161 * and {@link AFPChain#setAlnsymb(char[]) alnsymb}</li> 162 * <li>{@link AFPChain#setAlnbeg1(int) alnbeg1} and 2</li> 163 * <li>{@link AFPChain#setAlnLength(int) alnLength} and 164 * {@link AFPChain#setGapLen(int) gapLen}</li> 165 * </ul> 166 * <p> 167 * Expects the following properties to be previously computed: 168 * <ul> 169 * <li>{@link AFPChain#getOptAln()} and lengths 170 * </ul> 171 * 172 * <section>Known Bugs</section> 173 * Expects the alignment to have linear topology. May give odd results 174 * for circular permutations and other complicated topologies. 175 * 176 * @param afpChain Alignment between ca1 and ca2 177 * @param ca1 CA atoms of the first protein 178 * @param ca2 CA atoms of the second protein 179 * @param showSeq Use symbols reflecting sequence similarity: '|' for identical, 180 * ':' for similar, '.' for dissimilar. Otherwise, use the block number 181 * to show aligned residues. 182 */ 183 public static void getAlign(AFPChain afpChain,Atom[] ca1,Atom[] ca2, boolean showSeq) { 184 185 char[] alnsymb = afpChain.getAlnsymb(); 186 char[] alnseq1 = afpChain.getAlnseq1(); 187 char[] alnseq2 = afpChain.getAlnseq2(); 188 189 int i, j, k, p1, p2, p1b, p2b, lmax; 190 191 int pro1Len = ca1.length; 192 int pro2Len = ca2.length; 193 194 p1b = p2b = 0; 195 196 197 198 if(alnsymb == null) { 199 alnseq1 = new char[pro1Len+pro2Len +1]; 200 alnseq2 = new char[pro1Len+pro2Len +1] ; 201 alnsymb = new char[pro1Len+pro2Len+1]; 202 203 afpChain.setAlnseq1(alnseq1); 204 afpChain.setAlnseq2(alnseq2); 205 afpChain.setAlnsymb(alnsymb); 206 } 207 208 209 int blockNum = afpChain.getBlockNum(); 210 211 int[] optLen = afpChain.getOptLen(); 212 int[][][] optAln = afpChain.getOptAln(); 213 214 int alnbeg1 = afpChain.getAlnbeg1(); // immediately overwritten 215 int alnbeg2 = afpChain.getAlnbeg2(); // immediately overwritten 216 int alnLength; // immediately overwritten 217 int optLength = afpChain.getOptLength(); 218 219 if ( optLen == null) { 220 optLen = new int[blockNum]; 221 for (int oi =0 ; oi < blockNum ; oi++) 222 optLen[oi] = 0; 223 } 224 int len = 0; 225 for(i = 0; i < blockNum; i ++) { 226 for(j = 0; j < optLen[i]; j ++) { 227 p1 = optAln[i][0][j]; 228 p2 = optAln[i][1][j]; 229 230 // weird, could not find a residue in the Atom array. Did something change in the underlying data? 231 if (( p1 == -1 ) || ( p2 == -1)) { 232 logger.warn("Could not get atom on position " + j ); 233 continue; 234 } 235 if(len > 0) { 236 lmax = (p1 - p1b - 1)>(p2 - p2b - 1)?(p1 - p1b - 1):(p2 - p2b - 1); 237 for(k = 0; k < lmax; k ++) { 238 if(k >= (p1 - p1b - 1)) alnseq1[len] = '-'; 239 else { 240 char oneletter = getOneLetter(ca1[p1b+1+k].getGroup()); 241 alnseq1[len] = oneletter; 242 } 243 if(k >= (p2 - p2b - 1)) alnseq2[len] = '-'; 244 else { 245 char oneletter = getOneLetter(ca2[p2b+1+k].getGroup()); 246 alnseq2[len] = oneletter; 247 } 248 alnsymb[len ++] = ' '; 249 } 250 } 251 else { 252 alnbeg1 = p1; //the first position of sequence in alignment 253 alnbeg2 = p2; 254 } 255 256 if ( p1 < ca1.length && p2<ca2.length) { 257 258 alnseq1[len] = getOneLetter(ca1[p1].getGroup()); 259 alnseq2[len] = getOneLetter(ca2[p2].getGroup()); 260 } else { 261 //TODO handle permutations 262 alnseq1[len]='?'; 263 alnseq2[len]='?'; 264 } 265 if ( showSeq) { 266 if ( alnseq1[len] == alnseq2[len]){ 267 alnsymb[len ++] = '|'; 268 } else { 269 double score = aaScore(alnseq1[len], alnseq2[len]); 270 271 if ( score > 1) 272 alnsymb[len ++] = ':'; 273 else 274 alnsymb[len ++] = '.'; 275 } 276 } else { 277 String tmpS = String.format( "%d", i + 1); 278 alnsymb[len ++] = tmpS.charAt(0); 279 } 280 p1b = p1; 281 p2b = p2; 282 } 283 } 284 alnLength = len; 285 286 afpChain.setOptAln(optAln); 287 afpChain.setOptLen(optLen); 288 afpChain.setAlnbeg1(alnbeg1); 289 afpChain.setAlnbeg2(alnbeg2); 290 afpChain.setAlnLength(alnLength); 291 afpChain.setGapLen(alnLength-optLength); 292 } 293 294 private static char getOneLetter(Group g){ 295 return StructureTools.get1LetterCode(g.getPDBName()); 296 } 297 298 public static int aaScore(char a, char b){ 299 if (a == 'x') 300 a= '-'; 301 if (b == 'x') 302 b='-'; 303 if ( a == 'X') 304 a = '-'; 305 if ( b == 'X') 306 b = '-'; 307 308 int pos1 = aa1List.indexOf(a); 309 int pos2 = aa1List.indexOf(b); 310 311 312 // SEC an PYL amino acids are not supported as of yet... 313 314 if ( pos1 < 0) { 315 logger.warn("unknown char " + a); 316 return 0; 317 } 318 if ( pos2 < 0) { 319 logger.warn("unknown char " + b); 320 return 0; 321 } 322 323 return aaMatrix[pos1][pos2]; 324 } 325 326 public static Map<String,Double> calcIdSimilarity(char[] seq1, char[] seq2, int alnLength){ 327 double identity = 0.0; 328 double similarity = 0.0; 329 330 if ( seq1 == null || seq2 == null){ 331 logger.warn("Can't calc %ID for an empty alignment! "); 332 Map<String, Double> m = new HashMap<String, Double>(); 333 m.put("similarity", similarity); 334 m.put("identity", identity); 335 return m; 336 } 337 338 int i; 339 int eqr = 0; 340 341 @SuppressWarnings("unused") 342 int count = 0; 343 for(i = 0; i < alnLength; i ++) { 344 345 //System.out.println(i + " " + count + " " + seq1[i] + " " + seq2[i]); 346 // ignore gaps 347 if(seq1[i] == '-' || seq1[i] == '*' || seq1[i] == '.' || 348 seq2[i] == '-' || seq2[i] == '*' || seq2[i] == '.' ) 349 continue ; 350 351 eqr++; 352 353 if(seq1[i] == seq2[i]) { 354 355 identity += 1.0; 356 similarity += 1.0; 357 count++; 358 359 } else if(aaScore(seq1[i], seq2[i]) > 0) { 360 361 similarity += 1.0; 362 count++; 363 364 } 365 } 366 367 368 if ( alnLength > 0){ 369 similarity = (similarity) / eqr; 370 identity = identity/eqr; 371 } 372 Map<String, Double> m = new HashMap<String, Double>(); 373 m.put("similarity", similarity); 374 m.put("identity", identity); 375 376 return m; 377 } 378 379 380 /** 381 * 382 * @param afpChain 383 * @param ca1 384 * @param ca2 385 * @return 386 * @throws ClassNotFoundException If an error occurs when invoking jmol 387 * @throws NoSuchMethodException If an error occurs when invoking jmol 388 * @throws InvocationTargetException If an error occurs when invoking jmol 389 * @throws IllegalAccessException If an error occurs when invoking jmol 390 * @throws StructureException 391 */ 392 public static Structure createArtificalStructure(AFPChain afpChain, Atom[] ca1, 393 Atom[] ca2) throws ClassNotFoundException, NoSuchMethodException, 394 InvocationTargetException, IllegalAccessException, StructureException 395 { 396 397 if ( afpChain.getNrEQR() < 1){ 398 return GuiWrapper.getAlignedStructure(ca1, ca2); 399 } 400 401 Group[] twistedGroups = AlignmentTools.prepareGroupsForDisplay(afpChain,ca1, ca2); 402 403 List<Atom> twistedAs = new ArrayList<Atom>(); 404 405 for ( Group g: twistedGroups){ 406 if ( g == null ) 407 continue; 408 if ( g.size() < 1) 409 continue; 410 Atom a = g.getAtom(0); 411 twistedAs.add(a); 412 } 413 Atom[] twistedAtoms = twistedAs.toArray(new Atom[twistedAs.size()]); 414 415 List<Group> hetatms = new ArrayList<Group>(); 416 List<Group> nucs1 = new ArrayList<Group>(); 417 Group g1 = ca1[0].getGroup(); 418 Chain c1 = null; 419 if ( g1 != null) { 420 c1 = g1.getChain(); 421 if ( c1 != null){ 422 hetatms = c1.getAtomGroups(GroupType.HETATM);; 423 nucs1 = c1.getAtomGroups(GroupType.NUCLEOTIDE); 424 } 425 } 426 List<Group> hetatms2 = new ArrayList<Group>(); 427 List<Group> nucs2 = new ArrayList<Group>(); 428 Group g2 = ca2[0].getGroup(); 429 Chain c2 = null; 430 if ( g2 != null){ 431 c2 = g2.getChain(); 432 if ( c2 != null){ 433 hetatms2 = c2.getAtomGroups(GroupType.HETATM); 434 nucs2 = c2.getAtomGroups(GroupType.NUCLEOTIDE); 435 } 436 } 437 Atom[] arr1 = GuiWrapper.getAtomArray(ca1, hetatms, nucs1); 438 Atom[] arr2 = GuiWrapper.getAtomArray(twistedAtoms, hetatms2, nucs2); 439 440 return GuiWrapper.getAlignedStructure(arr1,arr2); 441 } 442 443 /** get the block number for an aligned position 444 * 445 * @param afpChain 446 * @param aligPos 447 * @return 448 */ 449 public static int getBlockNrForAlignPos(AFPChain afpChain, int aligPos){ 450 // moved here from DisplayAFP; 451 452 int blockNum = afpChain.getBlockNum(); 453 454 int[] optLen = afpChain.getOptLen(); 455 int[][][] optAln = afpChain.getOptAln(); 456 457 int len = 0; 458 int p1b=0; 459 int p2b=0; 460 461 for(int i = 0; i < blockNum; i ++) { 462 463 for(int j = 0; j < optLen[i]; j ++) { 464 465 int p1 = optAln[i][0][j]; 466 int p2 = optAln[i][1][j]; 467 468 if (len != 0) { 469 // check for gapped region 470 int lmax = (p1 - p1b - 1)>(p2 - p2b - 1)?(p1 - p1b - 1):(p2 - p2b - 1); 471 for(int k = 0; k < lmax; k ++) { 472 len++; 473 } 474 } 475 476 477 p1b = p1; 478 p2b = p2; 479 if ( len >= aligPos) { 480 481 return i; 482 } 483 len++; 484 } 485 } 486 487 return blockNum; 488 489 } 490}