001/*
002 *                    PDB web development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 *
015 * Created on Jun 17, 2009
016 * Created by ap3
017 *
018 */
019
020package org.biojava.nbio.structure.align.util;
021
022import org.biojava.nbio.structure.*;
023import org.biojava.nbio.structure.align.ce.GuiWrapper;
024import org.biojava.nbio.structure.align.model.AFPChain;
025import org.biojava.nbio.structure.geometry.Matrices;
026import org.biojava.nbio.structure.geometry.SuperPositions;
027import org.biojava.nbio.structure.jama.Matrix;
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030
031import java.lang.reflect.InvocationTargetException;
032import java.util.*;
033
034import javax.vecmath.Matrix4d;
035
036
037public class AFPAlignmentDisplay
038{
039        private static final Logger logger = LoggerFactory.getLogger(AFPAlignmentDisplay.class);
040
041        private static final int[][] aaMatrix = new int[][] {{6,0,-2,-3,-2,0,-1,0,-2,-2,-2,-2,-2,-3,-4,-4,-3,-3,-3,-2,-4},
042                        {0,4,-1,0,0,1,-2,-2,-1,-1,-1,-2,-1,0,-1,-1,-1,-2,-2,-3,-4},
043                        {-2,-1,7,-3,-1,-1,-1,-2,-1,-1,-1,-2,-2,-2,-3,-3,-2,-4,-3,-4,-4},
044                        {-3,0,-3,9,-1,-1,-3,-3,-4,-3,-3,-3,-3,-1,-1,-1,-1,-2,-2,-2,-4},
045                        {-2,0,-1,-1,5,1,-1,0,-1,-1,-1,-2,-1,0,-1,-1,-1,-2,-2,-2,-4},
046                        {0,1,-1,-1,1,4,0,1,0,0,0,-1,-1,-2,-2,-2,-1,-2,-2,-3,-4},
047                        {-1,-2,-1,-3,-1,0,6,1,2,0,-1,-1,-2,-3,-3,-4,-3,-3,-3,-4,-4},
048                        {0,-2,-2,-3,0,1,1,6,0,0,0,1,0,-3,-3,-3,-2,-3,-2,-4,-4},
049                        {-2,-1,-1,-4,-1,0,2,0,5,2,1,0,0,-2,-3,-3,-2,-3,-2,-3,-4},
050                        {-2,-1,-1,-3,-1,0,0,0,2,5,1,0,1,-2,-3,-2,0,-3,-1,-2,-4},
051                        {-2,-1,-1,-3,-1,0,-1,0,1,1,5,-1,2,-2,-3,-2,-1,-3,-2,-3,-4},
052                        {-2,-2,-2,-3,-2,-1,-1,1,0,0,-1,8,0,-3,-3,-3,-2,-1,2,-2,-4},
053                        {-2,-1,-2,-3,-1,-1,-2,0,0,1,2,0,5,-3,-3,-2,-1,-3,-2,-3,-4},
054                        {-3,0,-2,-1,0,-2,-3,-3,-2,-2,-2,-3,-3,4,3,1,1,-1,-1,-3,-4},
055                        {-4,-1,-3,-1,-1,-2,-3,-3,-3,-3,-3,-3,-3,3,4,2,1,0,-1,-3,-4},
056                        {-4,-1,-3,-1,-1,-2,-4,-3,-3,-2,-2,-3,-2,1,2,4,2,0,-1,-2,-4},
057                        {-3,-1,-2,-1,-1,-1,-3,-2,-2,0,-1,-2,-1,1,1,2,5,0,-1,-1,-4},
058                        {-3,-2,-4,-2,-2,-2,-3,-3,-3,-3,-3,-1,-3,-1,0,0,0,6,3,1,-4},
059                        {-3,-2,-3,-2,-2,-2,-3,-2,-2,-1,-2,2,-2,-1,-1,-1,-1,3,7,2,-4},
060                        {-2,-3,-4,-2,-2,-3,-4,-4,-3,-2,-3,-2,-3,-3,-3,-2,-1,1,2,11,-4},
061                        {-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,1}};
062
063        private static Character[] aa1 = new Character[]{  'G',   'A',   'P',   'C',   'T',   'S','D',   'N',   'E',   'Q',   'K',   'H',   'R', 'V',   'I',   'L',   'M',   'F',   'Y',   'W', '-'};
064
065        private static final List<Character> aa1List = Arrays.asList(aa1);
066
067        public static Matrix getRotMax(AFPChain afpChain,Atom[] ca1,Atom[] ca2) throws StructureException{
068
069                Atom[] a1 = getAlignedAtoms1(afpChain,ca1);
070                Atom[] a2 = getAlignedAtoms2(afpChain,ca2);
071
072                Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(a1),
073                                Calc.atomsToPoints(a2));
074
075                return Matrices.getRotationJAMA(trans);
076
077        }
078
079        public static Atom getTranslation(AFPChain afpChain,Atom[] ca1,Atom[] ca2) throws StructureException{
080
081
082                Atom[] a1 = getAlignedAtoms1(afpChain,ca1);
083                Atom[] a2 = getAlignedAtoms2(afpChain,ca2);
084
085                Matrix4d trans = SuperPositions.superpose(Calc.atomsToPoints(a1),
086                                Calc.atomsToPoints(a2));
087
088                return Calc.getTranslationVector(trans);
089
090        }
091
092        public static Atom[] getAlignedAtoms1(AFPChain afpChain,Atom[] ca1){
093                List<Atom> atoms = new ArrayList<>();
094
095                int blockNum = afpChain.getBlockNum();
096
097                int[] optLen = afpChain.getOptLen();
098                int[][][] optAln = afpChain.getOptAln();
099
100
101                for(int bk = 0; bk < blockNum; bk ++)       {
102
103
104                        for ( int i=0;i< optLen[bk];i++){
105                                int pos = optAln[bk][0][i];
106                                atoms.add(ca1[pos]);
107                        }
108
109                }
110                return atoms.toArray(new Atom[atoms.size()]);
111        }
112        public static Atom[] getAlignedAtoms2(AFPChain afpChain,Atom[] ca2){
113
114                List<Atom> atoms = new ArrayList<>();
115
116                int blockNum = afpChain.getBlockNum();
117
118                int[] optLen = afpChain.getOptLen();
119                int[][][] optAln = afpChain.getOptAln();
120
121
122                for(int bk = 0; bk < blockNum; bk ++)       {
123
124
125                        for ( int i=0;i< optLen[bk];i++){
126                                int pos = optAln[bk][1][i];
127                                atoms.add(ca2[pos]);
128                        }
129
130                }
131                return atoms.toArray(new Atom[atoms.size()]);
132        }
133
134
135
136        /**
137         * Extract the alignment output
138         * <p>eg<pre>
139         * STWNTWACTWHLKQP--WSTILILA
140         * 111111111111     22222222
141         * SQNNTYACSWKLKSWNNNSTILILG
142         * </pre>
143         * Those position pairs labeled by 1 and 2 are equivalent positions, belongs to
144         * two blocks 1 and 2. The residues between labeled residues are non-equivalent,
145         * with '-' filling in their resulting gaps.
146         * <p>
147         * The three lines can be accessed using
148         * {@link AFPChain#getAlnseq1()}, {@link AFPChain#getAlnsymb()},
149         * and {@link AFPChain#getAlnseq2()}.
150         *
151         */
152        public static void getAlign(AFPChain afpChain,Atom[] ca1,Atom[] ca2) {
153                getAlign(afpChain, ca1, ca2, false);
154        }
155
156        /**
157         * Sets the following properties:
158         * <ul>
159         * <li>The alignment strings {@link AFPChain#setAlnseq1(char[]) alnseq1},
160         *  {@link AFPChain#setAlnseq2(char[]) alnseq2},
161         *  and {@link AFPChain#setAlnsymb(char[]) alnsymb}</li>
162         * <li>{@link AFPChain#setAlnbeg1(int) alnbeg1} and 2</li>
163         * <li>{@link AFPChain#setAlnLength(int) alnLength} and
164         *  {@link AFPChain#setGapLen(int) gapLen}</li>
165         * </ul>
166         * <p>
167         * Expects the following properties to be previously computed:
168         * <ul>
169         * <li>{@link AFPChain#getOptAln()} and lengths
170         * </ul>
171         * <p>
172         * Known Bugs:
173         * <p>
174         * Expects the alignment to have linear topology. May give odd results
175         * for circular permutations and other complicated topologies.
176         *
177         * @param afpChain Alignment between ca1 and ca2
178         * @param ca1 CA atoms of the first protein
179         * @param ca2 CA atoms of the second protein
180         * @param showSeq Use symbols reflecting sequence similarity: '|' for identical,
181         *  ':' for similar, '.' for dissimilar. Otherwise, use the block number
182         *  to show aligned residues.
183         */
184        public static void getAlign(AFPChain afpChain,Atom[] ca1,Atom[] ca2, boolean showSeq) {
185
186                char[] alnsymb = afpChain.getAlnsymb();
187                char[] alnseq1 = afpChain.getAlnseq1();
188                char[] alnseq2 = afpChain.getAlnseq2();
189
190                int     i, j, k, p1, p2, p1b, p2b, lmax;
191
192                int pro1Len = ca1.length;
193                int pro2Len = ca2.length;
194
195                p1b = p2b = 0;
196
197
198
199                if(alnsymb == null)     {
200                        alnseq1 = new char[pro1Len+pro2Len +1];
201                        alnseq2 = new char[pro1Len+pro2Len +1] ;
202                        alnsymb = new char[pro1Len+pro2Len+1];
203
204                        afpChain.setAlnseq1(alnseq1);
205                        afpChain.setAlnseq2(alnseq2);
206                        afpChain.setAlnsymb(alnsymb);
207                }
208
209
210                int blockNum = afpChain.getBlockNum();
211
212                int[] optLen = afpChain.getOptLen();
213                int[][][] optAln = afpChain.getOptAln();
214
215                int alnbeg1 = afpChain.getAlnbeg1(); // immediately overwritten
216                int alnbeg2 = afpChain.getAlnbeg2(); // immediately overwritten
217                int alnLength; // immediately overwritten
218                int optLength = afpChain.getOptLength();
219
220                if ( optLen == null) {
221                        optLen = new int[blockNum];
222                        for (int oi =0 ; oi < blockNum ; oi++)
223                                optLen[oi] = 0;
224                }
225                int     len = 0;
226                for(i = 0; i < blockNum; i ++)  {
227                        for(j = 0; j < optLen[i]; j ++) {
228                                p1 = optAln[i][0][j];
229                                p2 = optAln[i][1][j];
230
231                                // weird, could not find a residue in the Atom array. Did something change in the underlying data?
232                                if (( p1 == -1 ) || ( p2 == -1)) {
233                                        logger.warn("Could not get atom on position " + j );
234                                        continue;
235                                }
236                                if(len > 0)     {
237                                        lmax = (p1 - p1b - 1)>(p2 - p2b - 1)?(p1 - p1b - 1):(p2 - p2b - 1);
238                                        for(k = 0; k < lmax; k ++)      {
239                                                if(k >= (p1 - p1b - 1)) alnseq1[len] = '-';
240                                                else {
241                                                        char oneletter = getOneLetter(ca1[p1b+1+k].getGroup());
242                                                        alnseq1[len] = oneletter;
243                                                }
244                                                if(k >= (p2 - p2b - 1)) alnseq2[len] = '-';
245                                                else  {
246                                                        char oneletter = getOneLetter(ca2[p2b+1+k].getGroup());
247                                                        alnseq2[len] = oneletter;
248                                                }
249                                                alnsymb[len ++] = ' ';
250                                        }
251                                }
252                                else    {
253                                        alnbeg1 = p1; //the first position of sequence in alignment
254                                        alnbeg2 = p2;
255                                }
256
257                                if ( p1 < ca1.length && p2<ca2.length) {
258
259                                        alnseq1[len] = getOneLetter(ca1[p1].getGroup());
260                                        alnseq2[len] = getOneLetter(ca2[p2].getGroup());
261                                } else {
262                                        //TODO handle permutations
263                                        alnseq1[len]='?';
264                                        alnseq2[len]='?';
265                                }
266                                if ( showSeq) {
267                                        if ( alnseq1[len] == alnseq2[len]){
268                                                alnsymb[len ++] = '|';
269                                        } else {
270                                                double score = aaScore(alnseq1[len], alnseq2[len]);
271
272                                                if ( score > 1)
273                                                        alnsymb[len ++] = ':';
274                                                else
275                                                        alnsymb[len ++] = '.';
276                                        }
277                                } else {
278                                        String tmpS = String.format( "%d", i + 1);
279                                        alnsymb[len ++] = tmpS.charAt(0);
280                                }
281                                p1b = p1;
282                                p2b = p2;
283                        }
284                }
285                alnLength = len;
286
287                afpChain.setOptAln(optAln);
288                afpChain.setOptLen(optLen);
289                afpChain.setAlnbeg1(alnbeg1);
290                afpChain.setAlnbeg2(alnbeg2);
291                afpChain.setAlnLength(alnLength);
292                afpChain.setGapLen(alnLength-optLength);
293        }
294
295        private static char getOneLetter(Group g){
296                return StructureTools.get1LetterCode(g.getPDBName());
297        }
298
299        public static int aaScore(char a, char b){
300                if (a == 'x')
301                        a= '-';
302                if (b == 'x')
303                        b='-';
304                if ( a == 'X')
305                        a = '-';
306                if ( b == 'X')
307                        b = '-';
308
309                int pos1 = aa1List.indexOf(a);
310                int pos2 = aa1List.indexOf(b);
311
312
313                // SEC an PYL amino acids are not supported as of yet...
314
315                if ( pos1 < 0) {
316                        logger.warn("unknown char " + a);
317                        return 0;
318                }
319                if ( pos2 < 0) {
320                        logger.warn("unknown char " + b);
321                        return 0;
322                }
323
324                return aaMatrix[pos1][pos2];
325        }
326
327        public static Map<String,Double> calcIdSimilarity(char[] seq1, char[] seq2, int alnLength){
328                double identity = 0.0;
329                double similarity = 0.0;
330
331                if ( seq1 == null || seq2 == null){
332                        logger.warn("Can't calc %ID for an empty alignment! ");
333                        Map<String, Double> m = new HashMap<>();
334                        m.put("similarity", similarity);
335                        m.put("identity", identity);
336                        return m;
337                }
338
339                int     i;
340                int eqr = 0;
341
342                @SuppressWarnings("unused")
343                int count = 0;
344                for(i = 0; i < alnLength; i ++) {
345
346                        //System.out.println(i + " " + count + " " + seq1[i] + " " + seq2[i]);
347                        // ignore gaps
348                        if(seq1[i] == '-' || seq1[i] == '*' || seq1[i] == '.' ||
349                                        seq2[i] == '-' || seq2[i] == '*' || seq2[i] == '.' )
350                                continue ;
351
352                        eqr++;
353
354                        if(seq1[i] == seq2[i])  {
355
356                                identity   += 1.0;
357                                similarity += 1.0;
358                                count++;
359
360                        } else if(aaScore(seq1[i], seq2[i]) > 0) {
361
362                                similarity += 1.0;
363                                count++;
364
365                        }
366                }
367
368
369                if ( alnLength > 0){
370                        similarity = (similarity) / eqr;
371                        identity = identity/eqr;
372                }
373                Map<String, Double> m = new HashMap<>();
374                m.put("similarity", similarity);
375                m.put("identity", identity);
376
377                return m;
378        }
379
380
381        /**
382         *
383         * @param afpChain
384         * @param ca1
385         * @param ca2
386         * @return
387         * @throws ClassNotFoundException If an error occurs when invoking jmol
388         * @throws NoSuchMethodException If an error occurs when invoking jmol
389         * @throws InvocationTargetException If an error occurs when invoking jmol
390         * @throws IllegalAccessException If an error occurs when invoking jmol
391         * @throws StructureException
392         */
393        public static Structure createArtificalStructure(AFPChain afpChain, Atom[] ca1,
394                                                                                                         Atom[] ca2) throws ClassNotFoundException, NoSuchMethodException,
395                        InvocationTargetException, IllegalAccessException, StructureException
396        {
397
398                if ( afpChain.getNrEQR() < 1){
399                        return GuiWrapper.getAlignedStructure(ca1, ca2);
400                }
401
402                Group[] twistedGroups = AlignmentTools.prepareGroupsForDisplay(afpChain,ca1, ca2);
403
404                List<Atom> twistedAs = new ArrayList<>();
405
406                for ( Group g: twistedGroups){
407                        if ( g == null )
408                                continue;
409                        if ( g.size() < 1)
410                                continue;
411                        Atom a = g.getAtom(0);
412                        twistedAs.add(a);
413                }
414                Atom[] twistedAtoms = twistedAs.toArray(new Atom[twistedAs.size()]);
415
416                List<Group> hetatms  = new ArrayList<>();
417                List<Group> nucs1    = new ArrayList<>();
418                Group g1 = ca1[0].getGroup();
419                Chain c1 = null;
420                if ( g1 != null) {
421                        c1 = g1.getChain();
422                        if ( c1 != null){
423                                hetatms = c1.getAtomGroups(GroupType.HETATM);;
424                                nucs1  = c1.getAtomGroups(GroupType.NUCLEOTIDE);
425                        }
426                }
427                List<Group> hetatms2 = new ArrayList<>();
428                List<Group> nucs2    = new ArrayList<>();
429                Group g2 = ca2[0].getGroup();
430                Chain c2 = null;
431                if ( g2 != null){
432                        c2 = g2.getChain();
433                        if ( c2 != null){
434                                hetatms2 = c2.getAtomGroups(GroupType.HETATM);
435                                nucs2 = c2.getAtomGroups(GroupType.NUCLEOTIDE);
436                        }
437                }
438                Atom[] arr1 = GuiWrapper.getAtomArray(ca1, hetatms, nucs1);
439                Atom[] arr2 = GuiWrapper.getAtomArray(twistedAtoms, hetatms2, nucs2);
440
441                return GuiWrapper.getAlignedStructure(arr1,arr2);
442        }
443
444        /** get the block number for an aligned position
445         *
446         * @param afpChain
447         * @param aligPos
448         * @return
449         */
450        public static int getBlockNrForAlignPos(AFPChain afpChain, int aligPos){
451                // moved here from DisplayAFP;
452
453                int blockNum = afpChain.getBlockNum();
454
455                int[] optLen = afpChain.getOptLen();
456                int[][][] optAln = afpChain.getOptAln();
457
458                int len = 0;
459                int p1b=0;
460                int p2b=0;
461
462                for(int i = 0; i < blockNum; i ++)  {
463
464                        for(int j = 0; j < optLen[i]; j ++) {
465
466                                int p1 = optAln[i][0][j];
467                                int p2 = optAln[i][1][j];
468
469                                if (len != 0) {
470                                        // check for gapped region
471                                        int lmax = (p1 - p1b - 1)>(p2 - p2b - 1)?(p1 - p1b - 1):(p2 - p2b - 1);
472                                        for(int k = 0; k < lmax; k ++)      {
473                                                len++;
474                                        }
475                                }
476
477
478                                p1b = p1;
479                                p2b = p2;
480                                if ( len >= aligPos) {
481
482                                        return i;
483                                }
484                                len++;
485                        }
486                }
487
488                return blockNum;
489
490        }
491}