001/*
002 *                    PDB web development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 *
015 * Created on Jun 17, 2009
016 * Created by ap3
017 *
018 */
019
020package org.biojava.nbio.structure.align.util;
021
022import org.biojava.nbio.structure.*;
023import org.biojava.nbio.structure.align.ce.GuiWrapper;
024import org.biojava.nbio.structure.align.model.AFPChain;
025import org.biojava.nbio.structure.geometry.Matrices;
026import org.biojava.nbio.structure.geometry.SuperPositions;
027import org.biojava.nbio.structure.jama.Matrix;
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030
031import java.lang.reflect.InvocationTargetException;
032import java.util.*;
033
034import javax.vecmath.Matrix4d;
035
036
037public class AFPAlignmentDisplay
038{
039        private static final Logger logger = LoggerFactory.getLogger(AFPAlignmentDisplay.class);
040
041        private static final int[][] aaMatrix = new int[][] {{6,0,-2,-3,-2,0,-1,0,-2,-2,-2,-2,-2,-3,-4,-4,-3,-3,-3,-2,-4},
042                        {0,4,-1,0,0,1,-2,-2,-1,-1,-1,-2,-1,0,-1,-1,-1,-2,-2,-3,-4},
043                        {-2,-1,7,-3,-1,-1,-1,-2,-1,-1,-1,-2,-2,-2,-3,-3,-2,-4,-3,-4,-4},
044                        {-3,0,-3,9,-1,-1,-3,-3,-4,-3,-3,-3,-3,-1,-1,-1,-1,-2,-2,-2,-4},
045                        {-2,0,-1,-1,5,1,-1,0,-1,-1,-1,-2,-1,0,-1,-1,-1,-2,-2,-2,-4},
046                        {0,1,-1,-1,1,4,0,1,0,0,0,-1,-1,-2,-2,-2,-1,-2,-2,-3,-4},
047                        {-1,-2,-1,-3,-1,0,6,1,2,0,-1,-1,-2,-3,-3,-4,-3,-3,-3,-4,-4},
048                        {0,-2,-2,-3,0,1,1,6,0,0,0,1,0,-3,-3,-3,-2,-3,-2,-4,-4},
049                        {-2,-1,-1,-4,-1,0,2,0,5,2,1,0,0,-2,-3,-3,-2,-3,-2,-3,-4},
050                        {-2,-1,-1,-3,-1,0,0,0,2,5,1,0,1,-2,-3,-2,0,-3,-1,-2,-4},
051                        {-2,-1,-1,-3,-1,0,-1,0,1,1,5,-1,2,-2,-3,-2,-1,-3,-2,-3,-4},
052                        {-2,-2,-2,-3,-2,-1,-1,1,0,0,-1,8,0,-3,-3,-3,-2,-1,2,-2,-4},
053                        {-2,-1,-2,-3,-1,-1,-2,0,0,1,2,0,5,-3,-3,-2,-1,-3,-2,-3,-4},
054                        {-3,0,-2,-1,0,-2,-3,-3,-2,-2,-2,-3,-3,4,3,1,1,-1,-1,-3,-4},
055                        {-4,-1,-3,-1,-1,-2,-3,-3,-3,-3,-3,-3,-3,3,4,2,1,0,-1,-3,-4},
056                        {-4,-1,-3,-1,-1,-2,-4,-3,-3,-2,-2,-3,-2,1,2,4,2,0,-1,-2,-4},
057                        {-3,-1,-2,-1,-1,-1,-3,-2,-2,0,-1,-2,-1,1,1,2,5,0,-1,-1,-4},
058                        {-3,-2,-4,-2,-2,-2,-3,-3,-3,-3,-3,-1,-3,-1,0,0,0,6,3,1,-4},
059                        {-3,-2,-3,-2,-2,-2,-3,-2,-2,-1,-2,2,-2,-1,-1,-1,-1,3,7,2,-4},
060                        {-2,-3,-4,-2,-2,-3,-4,-4,-3,-2,-3,-2,-3,-3,-3,-2,-1,1,2,11,-4},
061                        {-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,1}};
062
063        private static Character[] aa1 = new Character[]{  'G',   'A',   'P',   'C',   'T',   'S','D',   'N',   'E',   'Q',   'K',   'H',   'R', 'V',   'I',   'L',   'M',   'F',   'Y',   'W', '-'};
064
065        private static final List<Character> aa1List = Arrays.asList(aa1);
066
067        public static Matrix getRotMax(AFPChain afpChain,Atom[] ca1,Atom[] ca2) throws StructureException{
068
069                Atom[] a1 = getAlignedAtoms1(afpChain,ca1);
070                Atom[] a2 = getAlignedAtoms2(afpChain,ca2);
071
072                Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(a1), 
073                                Calc.atomsToPoints(a2));
074                
075                return Matrices.getRotationJAMA(trans);
076
077        }
078
079        public static Atom getTranslation(AFPChain afpChain,Atom[] ca1,Atom[] ca2) throws StructureException{
080
081
082                Atom[] a1 = getAlignedAtoms1(afpChain,ca1);
083                Atom[] a2 = getAlignedAtoms2(afpChain,ca2);
084
085                Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(a1), 
086                                Calc.atomsToPoints(a2));
087                
088                return Calc.getTranslationVector(trans);
089
090        }
091
092        public static Atom[] getAlignedAtoms1(AFPChain afpChain,Atom[] ca1){
093                List<Atom> atoms = new ArrayList<Atom>();
094
095                int blockNum = afpChain.getBlockNum();
096
097                int[] optLen = afpChain.getOptLen();
098                int[][][] optAln = afpChain.getOptAln();
099
100
101                for(int bk = 0; bk < blockNum; bk ++)       {
102
103
104                        for ( int i=0;i< optLen[bk];i++){
105                                int pos = optAln[bk][0][i];
106                                atoms.add(ca1[pos]);
107                        }
108
109                }
110                return atoms.toArray(new Atom[atoms.size()]);
111        }
112        public static Atom[] getAlignedAtoms2(AFPChain afpChain,Atom[] ca2){
113
114                List<Atom> atoms = new ArrayList<Atom>();
115
116                int blockNum = afpChain.getBlockNum();
117
118                int[] optLen = afpChain.getOptLen();
119                int[][][] optAln = afpChain.getOptAln();
120
121
122                for(int bk = 0; bk < blockNum; bk ++)       {
123
124
125                        for ( int i=0;i< optLen[bk];i++){
126                                int pos = optAln[bk][1][i];
127                                atoms.add(ca2[pos]);
128                        }
129
130                }
131                return atoms.toArray(new Atom[atoms.size()]);
132        }
133
134
135
136        /**
137         * Extract the alignment output
138         * <p>eg<pre>
139         * STWNTWACTWHLKQP--WSTILILA
140         * 111111111111     22222222
141         * SQNNTYACSWKLKSWNNNSTILILG
142         * </pre>
143         * Those position pairs labeled by 1 and 2 are equivalent positions, belongs to
144         * two blocks 1 and 2. The residues between labeled residues are non-equivalent,
145         * with '-' filling in their resulting gaps.
146         * <p>
147         * The three lines can be accessed using
148         * {@link AFPChain#getAlnseq1()}, {@link AFPChain#getAlnsymb()},
149         * and {@link AFPChain#getAlnseq2()}.
150         *
151         */
152        public static void getAlign(AFPChain afpChain,Atom[] ca1,Atom[] ca2) {
153                getAlign(afpChain, ca1, ca2, false);
154        }
155
156        /**
157         * Sets the following properties:
158         * <ul>
159         * <li>The alignment strings {@link AFPChain#setAlnseq1(char[]) alnseq1},
160         *  {@link AFPChain#setAlnseq2(char[]) alnseq2},
161         *  and {@link AFPChain#setAlnsymb(char[]) alnsymb}</li>
162         * <li>{@link AFPChain#setAlnbeg1(int) alnbeg1} and 2</li>
163         * <li>{@link AFPChain#setAlnLength(int) alnLength} and
164         *  {@link AFPChain#setGapLen(int) gapLen}</li>
165         * </ul>
166         * <p>
167         * Expects the following properties to be previously computed:
168         * <ul>
169         * <li>{@link AFPChain#getOptAln()} and lengths
170         * </ul>
171         *
172         * <section>Known Bugs</section>
173         * Expects the alignment to have linear topology. May give odd results
174         * for circular permutations and other complicated topologies.
175         *
176         * @param afpChain Alignment between ca1 and ca2
177         * @param ca1 CA atoms of the first protein
178         * @param ca2 CA atoms of the second protein
179         * @param showSeq Use symbols reflecting sequence similarity: '|' for identical,
180         *  ':' for similar, '.' for dissimilar. Otherwise, use the block number
181         *  to show aligned residues.
182         */
183        public static void getAlign(AFPChain afpChain,Atom[] ca1,Atom[] ca2, boolean showSeq) {
184
185                char[] alnsymb = afpChain.getAlnsymb();
186                char[] alnseq1 = afpChain.getAlnseq1();
187                char[] alnseq2 = afpChain.getAlnseq2();
188
189                int     i, j, k, p1, p2, p1b, p2b, lmax;
190
191                int pro1Len = ca1.length;
192                int pro2Len = ca2.length;
193
194                p1b = p2b = 0;
195
196
197
198                if(alnsymb == null)     {
199                        alnseq1 = new char[pro1Len+pro2Len +1];
200                        alnseq2 = new char[pro1Len+pro2Len +1] ;
201                        alnsymb = new char[pro1Len+pro2Len+1];
202
203                        afpChain.setAlnseq1(alnseq1);
204                        afpChain.setAlnseq2(alnseq2);
205                        afpChain.setAlnsymb(alnsymb);
206                }
207
208
209                int blockNum = afpChain.getBlockNum();
210
211                int[] optLen = afpChain.getOptLen();
212                int[][][] optAln = afpChain.getOptAln();
213
214                int alnbeg1 = afpChain.getAlnbeg1(); // immediately overwritten
215                int alnbeg2 = afpChain.getAlnbeg2(); // immediately overwritten
216                int alnLength; // immediately overwritten
217                int optLength = afpChain.getOptLength();
218
219                if ( optLen == null) {
220                        optLen = new int[blockNum];
221                        for (int oi =0 ; oi < blockNum ; oi++)
222                                optLen[oi] = 0;
223                }
224                int     len = 0;
225                for(i = 0; i < blockNum; i ++)  {
226                        for(j = 0; j < optLen[i]; j ++) {
227                                p1 = optAln[i][0][j];
228                                p2 = optAln[i][1][j];
229
230                                // weird, could not find a residue in the Atom array. Did something change in the underlying data?
231                                if (( p1 == -1 ) || ( p2 == -1)) {
232                                        logger.warn("Could not get atom on position " + j );
233                                        continue;
234                                }
235                                if(len > 0)     {
236                                        lmax = (p1 - p1b - 1)>(p2 - p2b - 1)?(p1 - p1b - 1):(p2 - p2b - 1);
237                                        for(k = 0; k < lmax; k ++)      {
238                                                if(k >= (p1 - p1b - 1)) alnseq1[len] = '-';
239                                                else {
240                                                        char oneletter = getOneLetter(ca1[p1b+1+k].getGroup());
241                                                        alnseq1[len] = oneletter;
242                                                }
243                                                if(k >= (p2 - p2b - 1)) alnseq2[len] = '-';
244                                                else  {
245                                                        char oneletter = getOneLetter(ca2[p2b+1+k].getGroup());
246                                                        alnseq2[len] = oneletter;
247                                                }
248                                                alnsymb[len ++] = ' ';
249                                        }
250                                }
251                                else    {
252                                        alnbeg1 = p1; //the first position of sequence in alignment
253                                        alnbeg2 = p2;
254                                }
255
256                                if ( p1 < ca1.length && p2<ca2.length) {
257
258                                        alnseq1[len] = getOneLetter(ca1[p1].getGroup());
259                                        alnseq2[len] = getOneLetter(ca2[p2].getGroup());
260                                } else {
261                                        //TODO handle permutations
262                                        alnseq1[len]='?';
263                                        alnseq2[len]='?';
264                                }
265                                if ( showSeq) {
266                                        if ( alnseq1[len] == alnseq2[len]){
267                                                alnsymb[len ++] = '|';
268                                        } else {
269                                                double score = aaScore(alnseq1[len], alnseq2[len]);
270
271                                                if ( score > 1)
272                                                        alnsymb[len ++] = ':';
273                                                else
274                                                        alnsymb[len ++] = '.';
275                                        }
276                                } else {
277                                        String tmpS = String.format( "%d", i + 1);
278                                        alnsymb[len ++] = tmpS.charAt(0);
279                                }
280                                p1b = p1;
281                                p2b = p2;
282                        }
283                }
284                alnLength = len;
285
286                afpChain.setOptAln(optAln);
287                afpChain.setOptLen(optLen);
288                afpChain.setAlnbeg1(alnbeg1);
289                afpChain.setAlnbeg2(alnbeg2);
290                afpChain.setAlnLength(alnLength);
291                afpChain.setGapLen(alnLength-optLength);
292        }
293
294        private static char getOneLetter(Group g){
295                return StructureTools.get1LetterCode(g.getPDBName());
296        }
297
298        public static int aaScore(char a, char b){
299                if (a == 'x')
300                        a= '-';
301                if (b == 'x')
302                        b='-';
303                if ( a == 'X')
304                        a = '-';
305                if ( b == 'X')
306                        b = '-';
307
308                int pos1 = aa1List.indexOf(a);
309                int pos2 = aa1List.indexOf(b);
310
311
312                // SEC an PYL amino acids are not supported as of yet...
313
314                if ( pos1 < 0) {
315                        logger.warn("unknown char " + a);
316                        return 0;
317                }
318                if ( pos2 < 0) {
319                        logger.warn("unknown char " + b);
320                        return 0;
321                }
322
323                return aaMatrix[pos1][pos2];
324        }
325
326        public static Map<String,Double> calcIdSimilarity(char[] seq1, char[] seq2, int alnLength){
327                double identity = 0.0;
328                double similarity = 0.0;
329
330                if ( seq1 == null || seq2 == null){
331                        logger.warn("Can't calc %ID for an empty alignment! ");
332                        Map<String, Double> m = new HashMap<String, Double>();
333                        m.put("similarity", similarity);
334                        m.put("identity", identity);
335                        return m;
336                }
337
338                int     i;
339                int eqr = 0;
340
341                @SuppressWarnings("unused")
342                int count = 0;
343                for(i = 0; i < alnLength; i ++) {
344
345                        //System.out.println(i + " " + count + " " + seq1[i] + " " + seq2[i]);
346                        // ignore gaps
347                        if(seq1[i] == '-' || seq1[i] == '*' || seq1[i] == '.' ||
348                                        seq2[i] == '-' || seq2[i] == '*' || seq2[i] == '.' )
349                                continue ;
350
351                        eqr++;
352
353                        if(seq1[i] == seq2[i])  {
354
355                                identity   += 1.0;
356                                similarity += 1.0;
357                                count++;
358
359                        } else if(aaScore(seq1[i], seq2[i]) > 0) {
360
361                                similarity += 1.0;
362                                count++;
363
364                        }
365                }
366
367
368                if ( alnLength > 0){
369                        similarity = (similarity) / eqr;
370                        identity = identity/eqr;
371                }
372                Map<String, Double> m = new HashMap<String, Double>();
373                m.put("similarity", similarity);
374                m.put("identity", identity);
375
376                return m;
377        }
378
379
380        /**
381         *
382         * @param afpChain
383         * @param ca1
384         * @param ca2
385         * @return
386         * @throws ClassNotFoundException If an error occurs when invoking jmol
387         * @throws NoSuchMethodException If an error occurs when invoking jmol
388         * @throws InvocationTargetException If an error occurs when invoking jmol
389         * @throws IllegalAccessException If an error occurs when invoking jmol
390         * @throws StructureException 
391         */
392        public static Structure createArtificalStructure(AFPChain afpChain, Atom[] ca1,
393                                                                                                         Atom[] ca2) throws ClassNotFoundException, NoSuchMethodException,
394                        InvocationTargetException, IllegalAccessException, StructureException
395        {
396
397                if ( afpChain.getNrEQR() < 1){
398                        return GuiWrapper.getAlignedStructure(ca1, ca2);
399                }
400
401                Group[] twistedGroups = AlignmentTools.prepareGroupsForDisplay(afpChain,ca1, ca2);
402
403                List<Atom> twistedAs = new ArrayList<Atom>();
404
405                for ( Group g: twistedGroups){
406                        if ( g == null )
407                                continue;
408                        if ( g.size() < 1)
409                                continue;
410                        Atom a = g.getAtom(0);
411                        twistedAs.add(a);
412                }
413                Atom[] twistedAtoms = twistedAs.toArray(new Atom[twistedAs.size()]);
414
415                List<Group> hetatms  = new ArrayList<Group>();
416                List<Group> nucs1    = new ArrayList<Group>();
417                Group g1 = ca1[0].getGroup();
418                Chain c1 = null;
419                if ( g1 != null) {
420                        c1 = g1.getChain();
421                        if ( c1 != null){
422                                hetatms = c1.getAtomGroups(GroupType.HETATM);;
423                                nucs1  = c1.getAtomGroups(GroupType.NUCLEOTIDE);
424                        }
425                }
426                List<Group> hetatms2 = new ArrayList<Group>();
427                List<Group> nucs2    = new ArrayList<Group>();
428                Group g2 = ca2[0].getGroup();
429                Chain c2 = null;
430                if ( g2 != null){
431                        c2 = g2.getChain();
432                        if ( c2 != null){
433                                hetatms2 = c2.getAtomGroups(GroupType.HETATM);
434                                nucs2 = c2.getAtomGroups(GroupType.NUCLEOTIDE);
435                        }
436                }
437                Atom[] arr1 = GuiWrapper.getAtomArray(ca1, hetatms, nucs1);
438                Atom[] arr2 = GuiWrapper.getAtomArray(twistedAtoms, hetatms2, nucs2);
439
440                return GuiWrapper.getAlignedStructure(arr1,arr2);
441        }
442
443        /** get the block number for an aligned position
444         *
445         * @param afpChain
446         * @param aligPos
447         * @return
448         */
449        public static int getBlockNrForAlignPos(AFPChain afpChain, int aligPos){
450                // moved here from DisplayAFP;
451
452                int blockNum = afpChain.getBlockNum();
453
454                int[] optLen = afpChain.getOptLen();
455                int[][][] optAln = afpChain.getOptAln();
456
457                int len = 0;
458                int p1b=0;
459                int p2b=0;
460
461                for(int i = 0; i < blockNum; i ++)  {
462
463                        for(int j = 0; j < optLen[i]; j ++) {
464
465                                int p1 = optAln[i][0][j];
466                                int p2 = optAln[i][1][j];
467
468                                if (len != 0) {
469                                        // check for gapped region
470                                        int lmax = (p1 - p1b - 1)>(p2 - p2b - 1)?(p1 - p1b - 1):(p2 - p2b - 1);
471                                        for(int k = 0; k < lmax; k ++)      {
472                                                len++;
473                                        }
474                                }
475
476
477                                p1b = p1;
478                                p2b = p2;
479                                if ( len >= aligPos) {
480
481                                        return i;
482                                }
483                                len++;
484                        }
485                }
486
487                return blockNum;
488
489        }
490}