001/* 002 * PDB web development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * 015 * Created on Jun 17, 2009 016 * Created by ap3 017 * 018 */ 019 020package org.biojava.nbio.structure.align.util; 021 022import org.biojava.nbio.structure.*; 023import org.biojava.nbio.structure.align.ce.GuiWrapper; 024import org.biojava.nbio.structure.align.model.AFPChain; 025import org.biojava.nbio.structure.jama.Matrix; 026import org.slf4j.Logger; 027import org.slf4j.LoggerFactory; 028 029import java.lang.reflect.InvocationTargetException; 030import java.util.*; 031 032 033public class AFPAlignmentDisplay 034{ 035 private static final Logger logger = LoggerFactory.getLogger(AFPAlignmentDisplay.class); 036 037 private static final int[][] aaMatrix = new int[][] {{6,0,-2,-3,-2,0,-1,0,-2,-2,-2,-2,-2,-3,-4,-4,-3,-3,-3,-2,-4}, 038 {0,4,-1,0,0,1,-2,-2,-1,-1,-1,-2,-1,0,-1,-1,-1,-2,-2,-3,-4}, 039 {-2,-1,7,-3,-1,-1,-1,-2,-1,-1,-1,-2,-2,-2,-3,-3,-2,-4,-3,-4,-4}, 040 {-3,0,-3,9,-1,-1,-3,-3,-4,-3,-3,-3,-3,-1,-1,-1,-1,-2,-2,-2,-4}, 041 {-2,0,-1,-1,5,1,-1,0,-1,-1,-1,-2,-1,0,-1,-1,-1,-2,-2,-2,-4}, 042 {0,1,-1,-1,1,4,0,1,0,0,0,-1,-1,-2,-2,-2,-1,-2,-2,-3,-4}, 043 {-1,-2,-1,-3,-1,0,6,1,2,0,-1,-1,-2,-3,-3,-4,-3,-3,-3,-4,-4}, 044 {0,-2,-2,-3,0,1,1,6,0,0,0,1,0,-3,-3,-3,-2,-3,-2,-4,-4}, 045 {-2,-1,-1,-4,-1,0,2,0,5,2,1,0,0,-2,-3,-3,-2,-3,-2,-3,-4}, 046 {-2,-1,-1,-3,-1,0,0,0,2,5,1,0,1,-2,-3,-2,0,-3,-1,-2,-4}, 047 {-2,-1,-1,-3,-1,0,-1,0,1,1,5,-1,2,-2,-3,-2,-1,-3,-2,-3,-4}, 048 {-2,-2,-2,-3,-2,-1,-1,1,0,0,-1,8,0,-3,-3,-3,-2,-1,2,-2,-4}, 049 {-2,-1,-2,-3,-1,-1,-2,0,0,1,2,0,5,-3,-3,-2,-1,-3,-2,-3,-4}, 050 {-3,0,-2,-1,0,-2,-3,-3,-2,-2,-2,-3,-3,4,3,1,1,-1,-1,-3,-4}, 051 {-4,-1,-3,-1,-1,-2,-3,-3,-3,-3,-3,-3,-3,3,4,2,1,0,-1,-3,-4}, 052 {-4,-1,-3,-1,-1,-2,-4,-3,-3,-2,-2,-3,-2,1,2,4,2,0,-1,-2,-4}, 053 {-3,-1,-2,-1,-1,-1,-3,-2,-2,0,-1,-2,-1,1,1,2,5,0,-1,-1,-4}, 054 {-3,-2,-4,-2,-2,-2,-3,-3,-3,-3,-3,-1,-3,-1,0,0,0,6,3,1,-4}, 055 {-3,-2,-3,-2,-2,-2,-3,-2,-2,-1,-2,2,-2,-1,-1,-1,-1,3,7,2,-4}, 056 {-2,-3,-4,-2,-2,-3,-4,-4,-3,-2,-3,-2,-3,-3,-3,-2,-1,1,2,11,-4}, 057 {-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,1}}; 058 059 private static Character[] aa1 = new Character[]{ 'G', 'A', 'P', 'C', 'T', 'S','D', 'N', 'E', 'Q', 'K', 'H', 'R', 'V', 'I', 'L', 'M', 'F', 'Y', 'W', '-'}; 060 061 private static final List<Character> aa1List = Arrays.asList(aa1); 062 063 public static Matrix getRotMax(AFPChain afpChain,Atom[] ca1,Atom[] ca2) throws StructureException{ 064 065 Atom[] a1 = getAlignedAtoms1(afpChain,ca1); 066 Atom[] a2 = getAlignedAtoms2(afpChain,ca2); 067 068 SVDSuperimposer svd = new SVDSuperimposer(a1,a2); 069 070 return svd.getRotation(); 071 072 } 073 074 public static Atom getTranslation(AFPChain afpChain,Atom[] ca1,Atom[] ca2) throws StructureException{ 075 076 077 Atom[] a1 = getAlignedAtoms1(afpChain,ca1); 078 Atom[] a2 = getAlignedAtoms2(afpChain,ca2); 079 080 SVDSuperimposer svd = new SVDSuperimposer(a1,a2); 081 082 return svd.getTranslation(); 083 084 } 085 086 public static Atom[] getAlignedAtoms1(AFPChain afpChain,Atom[] ca1){ 087 List<Atom> atoms = new ArrayList<Atom>(); 088 089 int blockNum = afpChain.getBlockNum(); 090 091 int[] optLen = afpChain.getOptLen(); 092 int[][][] optAln = afpChain.getOptAln(); 093 094 095 for(int bk = 0; bk < blockNum; bk ++) { 096 097 098 for ( int i=0;i< optLen[bk];i++){ 099 int pos = optAln[bk][0][i]; 100 atoms.add(ca1[pos]); 101 } 102 103 } 104 return atoms.toArray(new Atom[atoms.size()]); 105 } 106 public static Atom[] getAlignedAtoms2(AFPChain afpChain,Atom[] ca2){ 107 108 List<Atom> atoms = new ArrayList<Atom>(); 109 110 int blockNum = afpChain.getBlockNum(); 111 112 int[] optLen = afpChain.getOptLen(); 113 int[][][] optAln = afpChain.getOptAln(); 114 115 116 for(int bk = 0; bk < blockNum; bk ++) { 117 118 119 for ( int i=0;i< optLen[bk];i++){ 120 int pos = optAln[bk][1][i]; 121 atoms.add(ca2[pos]); 122 } 123 124 } 125 return atoms.toArray(new Atom[atoms.size()]); 126 } 127 128 129 130 /** 131 * Extract the alignment output 132 * <p>eg<pre> 133 * STWNTWACTWHLKQP--WSTILILA 134 * 111111111111 22222222 135 * SQNNTYACSWKLKSWNNNSTILILG 136 * </pre> 137 * Those position pairs labeled by 1 and 2 are equivalent positions, belongs to 138 * two blocks 1 and 2. The residues between labeled residues are non-equivalent, 139 * with '-' filling in their resulting gaps. 140 * <p> 141 * The three lines can be accessed using 142 * {@link AFPChain#getAlnseq1()}, {@link AFPChain#getAlnsymb()}, 143 * and {@link AFPChain#getAlnseq2()}. 144 * 145 */ 146 public static void getAlign(AFPChain afpChain,Atom[] ca1,Atom[] ca2) { 147 getAlign(afpChain, ca1, ca2, false); 148 } 149 150 /** 151 * Sets the following properties: 152 * <ul> 153 * <li>The alignment strings {@link AFPChain#setAlnseq1(char[]) alnseq1}, 154 * {@link AFPChain#setAlnseq2(char[]) alnseq2}, 155 * and {@link AFPChain#setAlnsymb(char[]) alnsymb}</li> 156 * <li>{@link AFPChain#setAlnbeg1(int) alnbeg1} and 2</li> 157 * <li>{@link AFPChain#setAlnLength(int) alnLength} and 158 * {@link AFPChain#setGapLen(int) gapLen}</li> 159 * </ul> 160 * <p> 161 * Expects the following properties to be previously computed: 162 * <ul> 163 * <li>{@link AFPChain#getOptAln()} and lengths 164 * </ul> 165 * 166 * <section>Known Bugs</section> 167 * Expects the alignment to have linear topology. May give odd results 168 * for circular permutations and other complicated topologies. 169 * 170 * @param afpChain Alignment between ca1 and ca2 171 * @param ca1 CA atoms of the first protein 172 * @param ca2 CA atoms of the second protein 173 * @param showSeq Use symbols reflecting sequence similarity: '|' for identical, 174 * ':' for similar, '.' for dissimilar. Otherwise, use the block number 175 * to show aligned residues. 176 */ 177 public static void getAlign(AFPChain afpChain,Atom[] ca1,Atom[] ca2, boolean showSeq) { 178 179 char[] alnsymb = afpChain.getAlnsymb(); 180 char[] alnseq1 = afpChain.getAlnseq1(); 181 char[] alnseq2 = afpChain.getAlnseq2(); 182 183 int i, j, k, p1, p2, p1b, p2b, lmax; 184 185 int pro1Len = ca1.length; 186 int pro2Len = ca2.length; 187 188 p1b = p2b = 0; 189 190 191 192 if(alnsymb == null) { 193 alnseq1 = new char[pro1Len+pro2Len +1]; 194 alnseq2 = new char[pro1Len+pro2Len +1] ; 195 alnsymb = new char[pro1Len+pro2Len+1]; 196 197 afpChain.setAlnseq1(alnseq1); 198 afpChain.setAlnseq2(alnseq2); 199 afpChain.setAlnsymb(alnsymb); 200 } 201 202 203 int blockNum = afpChain.getBlockNum(); 204 205 int[] optLen = afpChain.getOptLen(); 206 int[][][] optAln = afpChain.getOptAln(); 207 208 int alnbeg1 = afpChain.getAlnbeg1(); // immediately overwritten 209 int alnbeg2 = afpChain.getAlnbeg2(); // immediately overwritten 210 int alnLength; // immediately overwritten 211 int optLength = afpChain.getOptLength(); 212 213 if ( optLen == null) { 214 optLen = new int[blockNum]; 215 for (int oi =0 ; oi < blockNum ; oi++) 216 optLen[oi] = 0; 217 } 218 int len = 0; 219 for(i = 0; i < blockNum; i ++) { 220 for(j = 0; j < optLen[i]; j ++) { 221 p1 = optAln[i][0][j]; 222 p2 = optAln[i][1][j]; 223 224 // weird, could not find a residue in the Atom array. Did something change in the underlying data? 225 if (( p1 == -1 ) || ( p2 == -1)) { 226 logger.warn("Could not get atom on position " + j ); 227 continue; 228 } 229 if(len > 0) { 230 lmax = (p1 - p1b - 1)>(p2 - p2b - 1)?(p1 - p1b - 1):(p2 - p2b - 1); 231 for(k = 0; k < lmax; k ++) { 232 if(k >= (p1 - p1b - 1)) alnseq1[len] = '-'; 233 else { 234 char oneletter = getOneLetter(ca1[p1b+1+k].getGroup()); 235 alnseq1[len] = oneletter; 236 } 237 if(k >= (p2 - p2b - 1)) alnseq2[len] = '-'; 238 else { 239 char oneletter = getOneLetter(ca2[p2b+1+k].getGroup()); 240 alnseq2[len] = oneletter; 241 } 242 alnsymb[len ++] = ' '; 243 } 244 } 245 else { 246 alnbeg1 = p1; //the first position of sequence in alignment 247 alnbeg2 = p2; 248 } 249 250 if ( p1 < ca1.length && p2<ca2.length) { 251 252 alnseq1[len] = getOneLetter(ca1[p1].getGroup()); 253 alnseq2[len] = getOneLetter(ca2[p2].getGroup()); 254 } else { 255 //TODO handle permutations 256 alnseq1[len]='?'; 257 alnseq2[len]='?'; 258 } 259 if ( showSeq) { 260 if ( alnseq1[len] == alnseq2[len]){ 261 alnsymb[len ++] = '|'; 262 } else { 263 double score = aaScore(alnseq1[len], alnseq2[len]); 264 265 if ( score > 1) 266 alnsymb[len ++] = ':'; 267 else 268 alnsymb[len ++] = '.'; 269 } 270 } else { 271 String tmpS = String.format( "%d", i + 1); 272 alnsymb[len ++] = tmpS.charAt(0); 273 } 274 p1b = p1; 275 p2b = p2; 276 } 277 } 278 alnLength = len; 279 280 afpChain.setOptAln(optAln); 281 afpChain.setOptLen(optLen); 282 afpChain.setAlnbeg1(alnbeg1); 283 afpChain.setAlnbeg2(alnbeg2); 284 afpChain.setAlnLength(alnLength); 285 afpChain.setGapLen(alnLength-optLength); 286 } 287 288 private static char getOneLetter(Group g){ 289 return StructureTools.get1LetterCode(g.getPDBName()); 290 } 291 292 public static int aaScore(char a, char b){ 293 if (a == 'x') 294 a= '-'; 295 if (b == 'x') 296 b='-'; 297 if ( a == 'X') 298 a = '-'; 299 if ( b == 'X') 300 b = '-'; 301 302 int pos1 = aa1List.indexOf(a); 303 int pos2 = aa1List.indexOf(b); 304 305 306 // SEC an PYL amino acids are not supported as of yet... 307 308 if ( pos1 < 0) { 309 logger.warn("unknown char " + a); 310 return 0; 311 } 312 if ( pos2 < 0) { 313 logger.warn("unknown char " + b); 314 return 0; 315 } 316 317 return aaMatrix[pos1][pos2]; 318 } 319 320 public static Map<String,Double> calcIdSimilarity(char[] seq1, char[] seq2, int alnLength){ 321 double identity = 0.0; 322 double similarity = 0.0; 323 324 if ( seq1 == null || seq2 == null){ 325 logger.warn("Can't calc %ID for an empty alignment! "); 326 Map<String, Double> m = new HashMap<String, Double>(); 327 m.put("similarity", similarity); 328 m.put("identity", identity); 329 return m; 330 } 331 332 int i; 333 334 for(i = 0; i < alnLength; i ++) { 335 336 if(seq1[i] == seq2[i]) { 337 338 identity += 1.0; 339 } else if(seq1[i] == '-' || seq1[i] == '*' || seq1[i] == '.' || 340 seq2[i] == '-' || seq2[i] == '*' || seq2[i] == '.' ) { 341 } else if(aaScore(seq1[i], seq2[i]) > 0) similarity += 1.0; 342 } 343 344 345 if ( alnLength > 0){ 346 similarity = (identity + similarity) / alnLength; 347 identity = identity/alnLength; 348 } 349 Map<String, Double> m = new HashMap<String, Double>(); 350 m.put("similarity", similarity); 351 m.put("identity", identity); 352 353 return m; 354 } 355 356 357 /** 358 * 359 * @param afpChain 360 * @param ca1 361 * @param ca2 362 * @return 363 * @throws ClassNotFoundException If an error occurs when invoking jmol 364 * @throws NoSuchMethodException If an error occurs when invoking jmol 365 * @throws InvocationTargetException If an error occurs when invoking jmol 366 * @throws IllegalAccessException If an error occurs when invoking jmol 367 */ 368 public static Structure createArtificalStructure(AFPChain afpChain, Atom[] ca1, 369 Atom[] ca2) throws ClassNotFoundException, NoSuchMethodException, 370 InvocationTargetException, IllegalAccessException 371 { 372 373 if ( afpChain.getNrEQR() < 1){ 374 return GuiWrapper.getAlignedStructure(ca1, ca2); 375 } 376 377 Group[] twistedGroups = GuiWrapper.prepareGroupsForDisplay(afpChain,ca1, ca2); 378 379 List<Atom> twistedAs = new ArrayList<Atom>(); 380 381 for ( Group g: twistedGroups){ 382 if ( g == null ) 383 continue; 384 if ( g.size() < 1) 385 continue; 386 Atom a = g.getAtom(0); 387 twistedAs.add(a); 388 } 389 Atom[] twistedAtoms = twistedAs.toArray(new Atom[twistedAs.size()]); 390 391 List<Group> hetatms = new ArrayList<Group>(); 392 List<Group> nucs1 = new ArrayList<Group>(); 393 Group g1 = ca1[0].getGroup(); 394 Chain c1 = null; 395 if ( g1 != null) { 396 c1 = g1.getChain(); 397 if ( c1 != null){ 398 hetatms = c1.getAtomGroups(GroupType.HETATM);; 399 nucs1 = c1.getAtomGroups(GroupType.NUCLEOTIDE); 400 } 401 } 402 List<Group> hetatms2 = new ArrayList<Group>(); 403 List<Group> nucs2 = new ArrayList<Group>(); 404 Group g2 = ca2[0].getGroup(); 405 Chain c2 = null; 406 if ( g2 != null){ 407 c2 = g2.getChain(); 408 if ( c2 != null){ 409 hetatms2 = c2.getAtomGroups(GroupType.HETATM); 410 nucs2 = c2.getAtomGroups(GroupType.NUCLEOTIDE); 411 } 412 } 413 Atom[] arr1 = GuiWrapper.getAtomArray(ca1, hetatms, nucs1); 414 Atom[] arr2 = GuiWrapper.getAtomArray(twistedAtoms, hetatms2, nucs2); 415 416 return GuiWrapper.getAlignedStructure(arr1,arr2); 417 } 418 419 /** get the block number for an aligned position 420 * 421 * @param afpChain 422 * @param aligPos 423 * @return 424 */ 425 public static int getBlockNrForAlignPos(AFPChain afpChain, int aligPos){ 426 // moved here from DisplayAFP; 427 428 int blockNum = afpChain.getBlockNum(); 429 430 int[] optLen = afpChain.getOptLen(); 431 int[][][] optAln = afpChain.getOptAln(); 432 433 int len = 0; 434 int p1b=0; 435 int p2b=0; 436 437 for(int i = 0; i < blockNum; i ++) { 438 439 for(int j = 0; j < optLen[i]; j ++) { 440 441 int p1 = optAln[i][0][j]; 442 int p2 = optAln[i][1][j]; 443 444 if (len != 0) { 445 // check for gapped region 446 int lmax = (p1 - p1b - 1)>(p2 - p2b - 1)?(p1 - p1b - 1):(p2 - p2b - 1); 447 for(int k = 0; k < lmax; k ++) { 448 len++; 449 } 450 } 451 452 453 p1b = p1; 454 p2b = p2; 455 if ( len >= aligPos) { 456 457 return i; 458 } 459 len++; 460 } 461 } 462 463 return blockNum; 464 465 } 466}