001/* 002 * PDB web development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * 015 * Created on Jun 17, 2009 016 * Created by ap3 017 * 018 */ 019 020package org.biojava.nbio.structure.align.util; 021 022import org.biojava.nbio.structure.*; 023import org.biojava.nbio.structure.align.ce.GuiWrapper; 024import org.biojava.nbio.structure.align.model.AFPChain; 025import org.biojava.nbio.structure.geometry.Matrices; 026import org.biojava.nbio.structure.geometry.SuperPositions; 027import org.biojava.nbio.structure.jama.Matrix; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031import java.lang.reflect.InvocationTargetException; 032import java.util.*; 033 034import javax.vecmath.Matrix4d; 035 036 037public class AFPAlignmentDisplay 038{ 039 private static final Logger logger = LoggerFactory.getLogger(AFPAlignmentDisplay.class); 040 041 private static final int[][] aaMatrix = new int[][] {{6,0,-2,-3,-2,0,-1,0,-2,-2,-2,-2,-2,-3,-4,-4,-3,-3,-3,-2,-4}, 042 {0,4,-1,0,0,1,-2,-2,-1,-1,-1,-2,-1,0,-1,-1,-1,-2,-2,-3,-4}, 043 {-2,-1,7,-3,-1,-1,-1,-2,-1,-1,-1,-2,-2,-2,-3,-3,-2,-4,-3,-4,-4}, 044 {-3,0,-3,9,-1,-1,-3,-3,-4,-3,-3,-3,-3,-1,-1,-1,-1,-2,-2,-2,-4}, 045 {-2,0,-1,-1,5,1,-1,0,-1,-1,-1,-2,-1,0,-1,-1,-1,-2,-2,-2,-4}, 046 {0,1,-1,-1,1,4,0,1,0,0,0,-1,-1,-2,-2,-2,-1,-2,-2,-3,-4}, 047 {-1,-2,-1,-3,-1,0,6,1,2,0,-1,-1,-2,-3,-3,-4,-3,-3,-3,-4,-4}, 048 {0,-2,-2,-3,0,1,1,6,0,0,0,1,0,-3,-3,-3,-2,-3,-2,-4,-4}, 049 {-2,-1,-1,-4,-1,0,2,0,5,2,1,0,0,-2,-3,-3,-2,-3,-2,-3,-4}, 050 {-2,-1,-1,-3,-1,0,0,0,2,5,1,0,1,-2,-3,-2,0,-3,-1,-2,-4}, 051 {-2,-1,-1,-3,-1,0,-1,0,1,1,5,-1,2,-2,-3,-2,-1,-3,-2,-3,-4}, 052 {-2,-2,-2,-3,-2,-1,-1,1,0,0,-1,8,0,-3,-3,-3,-2,-1,2,-2,-4}, 053 {-2,-1,-2,-3,-1,-1,-2,0,0,1,2,0,5,-3,-3,-2,-1,-3,-2,-3,-4}, 054 {-3,0,-2,-1,0,-2,-3,-3,-2,-2,-2,-3,-3,4,3,1,1,-1,-1,-3,-4}, 055 {-4,-1,-3,-1,-1,-2,-3,-3,-3,-3,-3,-3,-3,3,4,2,1,0,-1,-3,-4}, 056 {-4,-1,-3,-1,-1,-2,-4,-3,-3,-2,-2,-3,-2,1,2,4,2,0,-1,-2,-4}, 057 {-3,-1,-2,-1,-1,-1,-3,-2,-2,0,-1,-2,-1,1,1,2,5,0,-1,-1,-4}, 058 {-3,-2,-4,-2,-2,-2,-3,-3,-3,-3,-3,-1,-3,-1,0,0,0,6,3,1,-4}, 059 {-3,-2,-3,-2,-2,-2,-3,-2,-2,-1,-2,2,-2,-1,-1,-1,-1,3,7,2,-4}, 060 {-2,-3,-4,-2,-2,-3,-4,-4,-3,-2,-3,-2,-3,-3,-3,-2,-1,1,2,11,-4}, 061 {-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,1}}; 062 063 private static Character[] aa1 = new Character[]{ 'G', 'A', 'P', 'C', 'T', 'S','D', 'N', 'E', 'Q', 'K', 'H', 'R', 'V', 'I', 'L', 'M', 'F', 'Y', 'W', '-'}; 064 065 private static final List<Character> aa1List = Arrays.asList(aa1); 066 067 public static Matrix getRotMax(AFPChain afpChain,Atom[] ca1,Atom[] ca2) throws StructureException{ 068 069 Atom[] a1 = getAlignedAtoms1(afpChain,ca1); 070 Atom[] a2 = getAlignedAtoms2(afpChain,ca2); 071 072 Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(a1), 073 Calc.atomsToPoints(a2)); 074 075 return Matrices.getRotationJAMA(trans); 076 077 } 078 079 public static Atom getTranslation(AFPChain afpChain,Atom[] ca1,Atom[] ca2) throws StructureException{ 080 081 082 Atom[] a1 = getAlignedAtoms1(afpChain,ca1); 083 Atom[] a2 = getAlignedAtoms2(afpChain,ca2); 084 085 Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(a1), 086 Calc.atomsToPoints(a2)); 087 088 return Calc.getTranslationVector(trans); 089 090 } 091 092 public static Atom[] getAlignedAtoms1(AFPChain afpChain,Atom[] ca1){ 093 List<Atom> atoms = new ArrayList<>(); 094 095 int blockNum = afpChain.getBlockNum(); 096 097 int[] optLen = afpChain.getOptLen(); 098 int[][][] optAln = afpChain.getOptAln(); 099 100 101 for(int bk = 0; bk < blockNum; bk ++) { 102 103 104 for ( int i=0;i< optLen[bk];i++){ 105 int pos = optAln[bk][0][i]; 106 atoms.add(ca1[pos]); 107 } 108 109 } 110 return atoms.toArray(new Atom[atoms.size()]); 111 } 112 public static Atom[] getAlignedAtoms2(AFPChain afpChain,Atom[] ca2){ 113 114 List<Atom> atoms = new ArrayList<>(); 115 116 int blockNum = afpChain.getBlockNum(); 117 118 int[] optLen = afpChain.getOptLen(); 119 int[][][] optAln = afpChain.getOptAln(); 120 121 122 for(int bk = 0; bk < blockNum; bk ++) { 123 124 125 for ( int i=0;i< optLen[bk];i++){ 126 int pos = optAln[bk][1][i]; 127 atoms.add(ca2[pos]); 128 } 129 130 } 131 return atoms.toArray(new Atom[atoms.size()]); 132 } 133 134 135 136 /** 137 * Extract the alignment output 138 * <p>eg<pre> 139 * STWNTWACTWHLKQP--WSTILILA 140 * 111111111111 22222222 141 * SQNNTYACSWKLKSWNNNSTILILG 142 * </pre> 143 * Those position pairs labeled by 1 and 2 are equivalent positions, belongs to 144 * two blocks 1 and 2. The residues between labeled residues are non-equivalent, 145 * with '-' filling in their resulting gaps. 146 * <p> 147 * The three lines can be accessed using 148 * {@link AFPChain#getAlnseq1()}, {@link AFPChain#getAlnsymb()}, 149 * and {@link AFPChain#getAlnseq2()}. 150 * 151 */ 152 public static void getAlign(AFPChain afpChain,Atom[] ca1,Atom[] ca2) { 153 getAlign(afpChain, ca1, ca2, false); 154 } 155 156 /** 157 * Sets the following properties: 158 * <ul> 159 * <li>The alignment strings {@link AFPChain#setAlnseq1(char[]) alnseq1}, 160 * {@link AFPChain#setAlnseq2(char[]) alnseq2}, 161 * and {@link AFPChain#setAlnsymb(char[]) alnsymb}</li> 162 * <li>{@link AFPChain#setAlnbeg1(int) alnbeg1} and 2</li> 163 * <li>{@link AFPChain#setAlnLength(int) alnLength} and 164 * {@link AFPChain#setGapLen(int) gapLen}</li> 165 * </ul> 166 * <p> 167 * Expects the following properties to be previously computed: 168 * <ul> 169 * <li>{@link AFPChain#getOptAln()} and lengths 170 * </ul> 171 * <p> 172 * Known Bugs: 173 * <p> 174 * Expects the alignment to have linear topology. May give odd results 175 * for circular permutations and other complicated topologies. 176 * 177 * @param afpChain Alignment between ca1 and ca2 178 * @param ca1 CA atoms of the first protein 179 * @param ca2 CA atoms of the second protein 180 * @param showSeq Use symbols reflecting sequence similarity: '|' for identical, 181 * ':' for similar, '.' for dissimilar. Otherwise, use the block number 182 * to show aligned residues. 183 */ 184 public static void getAlign(AFPChain afpChain,Atom[] ca1,Atom[] ca2, boolean showSeq) { 185 186 char[] alnsymb = afpChain.getAlnsymb(); 187 char[] alnseq1 = afpChain.getAlnseq1(); 188 char[] alnseq2 = afpChain.getAlnseq2(); 189 190 int i, j, k, p1, p2, p1b, p2b, lmax; 191 192 int pro1Len = ca1.length; 193 int pro2Len = ca2.length; 194 195 p1b = p2b = 0; 196 197 198 199 if(alnsymb == null) { 200 alnseq1 = new char[pro1Len+pro2Len +1]; 201 alnseq2 = new char[pro1Len+pro2Len +1] ; 202 alnsymb = new char[pro1Len+pro2Len+1]; 203 204 afpChain.setAlnseq1(alnseq1); 205 afpChain.setAlnseq2(alnseq2); 206 afpChain.setAlnsymb(alnsymb); 207 } 208 209 210 int blockNum = afpChain.getBlockNum(); 211 212 int[] optLen = afpChain.getOptLen(); 213 int[][][] optAln = afpChain.getOptAln(); 214 215 int alnbeg1 = afpChain.getAlnbeg1(); // immediately overwritten 216 int alnbeg2 = afpChain.getAlnbeg2(); // immediately overwritten 217 int alnLength; // immediately overwritten 218 int optLength = afpChain.getOptLength(); 219 220 if ( optLen == null) { 221 optLen = new int[blockNum]; 222 for (int oi =0 ; oi < blockNum ; oi++) 223 optLen[oi] = 0; 224 } 225 int len = 0; 226 for(i = 0; i < blockNum; i ++) { 227 for(j = 0; j < optLen[i]; j ++) { 228 p1 = optAln[i][0][j]; 229 p2 = optAln[i][1][j]; 230 231 // weird, could not find a residue in the Atom array. Did something change in the underlying data? 232 if (( p1 == -1 ) || ( p2 == -1)) { 233 logger.warn("Could not get atom on position " + j ); 234 continue; 235 } 236 if(len > 0) { 237 lmax = (p1 - p1b - 1)>(p2 - p2b - 1)?(p1 - p1b - 1):(p2 - p2b - 1); 238 for(k = 0; k < lmax; k ++) { 239 if(k >= (p1 - p1b - 1)) alnseq1[len] = '-'; 240 else { 241 char oneletter = getOneLetter(ca1[p1b+1+k].getGroup()); 242 alnseq1[len] = oneletter; 243 } 244 if(k >= (p2 - p2b - 1)) alnseq2[len] = '-'; 245 else { 246 char oneletter = getOneLetter(ca2[p2b+1+k].getGroup()); 247 alnseq2[len] = oneletter; 248 } 249 alnsymb[len ++] = ' '; 250 } 251 } 252 else { 253 alnbeg1 = p1; //the first position of sequence in alignment 254 alnbeg2 = p2; 255 } 256 257 if ( p1 < ca1.length && p2<ca2.length) { 258 259 alnseq1[len] = getOneLetter(ca1[p1].getGroup()); 260 alnseq2[len] = getOneLetter(ca2[p2].getGroup()); 261 } else { 262 //TODO handle permutations 263 alnseq1[len]='?'; 264 alnseq2[len]='?'; 265 } 266 if ( showSeq) { 267 if ( alnseq1[len] == alnseq2[len]){ 268 alnsymb[len ++] = '|'; 269 } else { 270 double score = aaScore(alnseq1[len], alnseq2[len]); 271 272 if ( score > 1) 273 alnsymb[len ++] = ':'; 274 else 275 alnsymb[len ++] = '.'; 276 } 277 } else { 278 String tmpS = String.format( "%d", i + 1); 279 alnsymb[len ++] = tmpS.charAt(0); 280 } 281 p1b = p1; 282 p2b = p2; 283 } 284 } 285 alnLength = len; 286 287 afpChain.setOptAln(optAln); 288 afpChain.setOptLen(optLen); 289 afpChain.setAlnbeg1(alnbeg1); 290 afpChain.setAlnbeg2(alnbeg2); 291 afpChain.setAlnLength(alnLength); 292 afpChain.setGapLen(alnLength-optLength); 293 } 294 295 private static char getOneLetter(Group g){ 296 return StructureTools.get1LetterCode(g.getPDBName()); 297 } 298 299 public static int aaScore(char a, char b){ 300 if (a == 'x') 301 a= '-'; 302 if (b == 'x') 303 b='-'; 304 if ( a == 'X') 305 a = '-'; 306 if ( b == 'X') 307 b = '-'; 308 309 int pos1 = aa1List.indexOf(a); 310 int pos2 = aa1List.indexOf(b); 311 312 313 // SEC an PYL amino acids are not supported as of yet... 314 315 if ( pos1 < 0) { 316 logger.warn("unknown char " + a); 317 return 0; 318 } 319 if ( pos2 < 0) { 320 logger.warn("unknown char " + b); 321 return 0; 322 } 323 324 return aaMatrix[pos1][pos2]; 325 } 326 327 public static Map<String,Double> calcIdSimilarity(char[] seq1, char[] seq2, int alnLength){ 328 double identity = 0.0; 329 double similarity = 0.0; 330 331 if ( seq1 == null || seq2 == null){ 332 logger.warn("Can't calc %ID for an empty alignment! "); 333 Map<String, Double> m = new HashMap<>(); 334 m.put("similarity", similarity); 335 m.put("identity", identity); 336 return m; 337 } 338 339 int i; 340 int eqr = 0; 341 342 @SuppressWarnings("unused") 343 int count = 0; 344 for(i = 0; i < alnLength; i ++) { 345 346 //System.out.println(i + " " + count + " " + seq1[i] + " " + seq2[i]); 347 // ignore gaps 348 if(seq1[i] == '-' || seq1[i] == '*' || seq1[i] == '.' || 349 seq2[i] == '-' || seq2[i] == '*' || seq2[i] == '.' ) 350 continue ; 351 352 eqr++; 353 354 if(seq1[i] == seq2[i]) { 355 356 identity += 1.0; 357 similarity += 1.0; 358 count++; 359 360 } else if(aaScore(seq1[i], seq2[i]) > 0) { 361 362 similarity += 1.0; 363 count++; 364 365 } 366 } 367 368 369 if ( alnLength > 0){ 370 similarity = (similarity) / eqr; 371 identity = identity/eqr; 372 } 373 Map<String, Double> m = new HashMap<>(); 374 m.put("similarity", similarity); 375 m.put("identity", identity); 376 377 return m; 378 } 379 380 381 /** 382 * 383 * @param afpChain 384 * @param ca1 385 * @param ca2 386 * @return 387 * @throws ClassNotFoundException If an error occurs when invoking jmol 388 * @throws NoSuchMethodException If an error occurs when invoking jmol 389 * @throws InvocationTargetException If an error occurs when invoking jmol 390 * @throws IllegalAccessException If an error occurs when invoking jmol 391 * @throws StructureException 392 */ 393 public static Structure createArtificalStructure(AFPChain afpChain, Atom[] ca1, 394 Atom[] ca2) throws ClassNotFoundException, NoSuchMethodException, 395 InvocationTargetException, IllegalAccessException, StructureException 396 { 397 398 if ( afpChain.getNrEQR() < 1){ 399 return GuiWrapper.getAlignedStructure(ca1, ca2); 400 } 401 402 Group[] twistedGroups = AlignmentTools.prepareGroupsForDisplay(afpChain,ca1, ca2); 403 404 List<Atom> twistedAs = new ArrayList<>(); 405 406 for ( Group g: twistedGroups){ 407 if ( g == null ) 408 continue; 409 if ( g.size() < 1) 410 continue; 411 Atom a = g.getAtom(0); 412 twistedAs.add(a); 413 } 414 Atom[] twistedAtoms = twistedAs.toArray(new Atom[twistedAs.size()]); 415 416 List<Group> hetatms = new ArrayList<>(); 417 List<Group> nucs1 = new ArrayList<>(); 418 Group g1 = ca1[0].getGroup(); 419 Chain c1 = null; 420 if ( g1 != null) { 421 c1 = g1.getChain(); 422 if ( c1 != null){ 423 hetatms = c1.getAtomGroups(GroupType.HETATM);; 424 nucs1 = c1.getAtomGroups(GroupType.NUCLEOTIDE); 425 } 426 } 427 List<Group> hetatms2 = new ArrayList<>(); 428 List<Group> nucs2 = new ArrayList<>(); 429 Group g2 = ca2[0].getGroup(); 430 Chain c2 = null; 431 if ( g2 != null){ 432 c2 = g2.getChain(); 433 if ( c2 != null){ 434 hetatms2 = c2.getAtomGroups(GroupType.HETATM); 435 nucs2 = c2.getAtomGroups(GroupType.NUCLEOTIDE); 436 } 437 } 438 Atom[] arr1 = GuiWrapper.getAtomArray(ca1, hetatms, nucs1); 439 Atom[] arr2 = GuiWrapper.getAtomArray(twistedAtoms, hetatms2, nucs2); 440 441 return GuiWrapper.getAlignedStructure(arr1,arr2); 442 } 443 444 /** get the block number for an aligned position 445 * 446 * @param afpChain 447 * @param aligPos 448 * @return 449 */ 450 public static int getBlockNrForAlignPos(AFPChain afpChain, int aligPos){ 451 // moved here from DisplayAFP; 452 453 int blockNum = afpChain.getBlockNum(); 454 455 int[] optLen = afpChain.getOptLen(); 456 int[][][] optAln = afpChain.getOptAln(); 457 458 int len = 0; 459 int p1b=0; 460 int p2b=0; 461 462 for(int i = 0; i < blockNum; i ++) { 463 464 for(int j = 0; j < optLen[i]; j ++) { 465 466 int p1 = optAln[i][0][j]; 467 int p2 = optAln[i][1][j]; 468 469 if (len != 0) { 470 // check for gapped region 471 int lmax = (p1 - p1b - 1)>(p2 - p2b - 1)?(p1 - p1b - 1):(p2 - p2b - 1); 472 for(int k = 0; k < lmax; k ++) { 473 len++; 474 } 475 } 476 477 478 p1b = p1; 479 p2b = p2; 480 if ( len >= aligPos) { 481 482 return i; 483 } 484 len++; 485 } 486 } 487 488 return blockNum; 489 490 } 491}