001/*
002 *                    PDB web development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 *
015 * Created on Jun 17, 2009
016 * Created by ap3
017 *
018 */
019
020package org.biojava.nbio.structure.align.util;
021
022import org.biojava.nbio.structure.*;
023import org.biojava.nbio.structure.align.ce.GuiWrapper;
024import org.biojava.nbio.structure.align.model.AFPChain;
025import org.biojava.nbio.structure.jama.Matrix;
026import org.slf4j.Logger;
027import org.slf4j.LoggerFactory;
028
029import java.lang.reflect.InvocationTargetException;
030import java.util.*;
031
032
033public class AFPAlignmentDisplay
034{
035        private static final Logger logger = LoggerFactory.getLogger(AFPAlignmentDisplay.class);
036
037        private static final int[][] aaMatrix = new int[][] {{6,0,-2,-3,-2,0,-1,0,-2,-2,-2,-2,-2,-3,-4,-4,-3,-3,-3,-2,-4},
038                {0,4,-1,0,0,1,-2,-2,-1,-1,-1,-2,-1,0,-1,-1,-1,-2,-2,-3,-4},
039                {-2,-1,7,-3,-1,-1,-1,-2,-1,-1,-1,-2,-2,-2,-3,-3,-2,-4,-3,-4,-4},
040                {-3,0,-3,9,-1,-1,-3,-3,-4,-3,-3,-3,-3,-1,-1,-1,-1,-2,-2,-2,-4},
041                {-2,0,-1,-1,5,1,-1,0,-1,-1,-1,-2,-1,0,-1,-1,-1,-2,-2,-2,-4},
042                {0,1,-1,-1,1,4,0,1,0,0,0,-1,-1,-2,-2,-2,-1,-2,-2,-3,-4},
043                {-1,-2,-1,-3,-1,0,6,1,2,0,-1,-1,-2,-3,-3,-4,-3,-3,-3,-4,-4},
044                {0,-2,-2,-3,0,1,1,6,0,0,0,1,0,-3,-3,-3,-2,-3,-2,-4,-4},
045                {-2,-1,-1,-4,-1,0,2,0,5,2,1,0,0,-2,-3,-3,-2,-3,-2,-3,-4},
046                {-2,-1,-1,-3,-1,0,0,0,2,5,1,0,1,-2,-3,-2,0,-3,-1,-2,-4},
047                {-2,-1,-1,-3,-1,0,-1,0,1,1,5,-1,2,-2,-3,-2,-1,-3,-2,-3,-4},
048                {-2,-2,-2,-3,-2,-1,-1,1,0,0,-1,8,0,-3,-3,-3,-2,-1,2,-2,-4},
049                {-2,-1,-2,-3,-1,-1,-2,0,0,1,2,0,5,-3,-3,-2,-1,-3,-2,-3,-4},
050                {-3,0,-2,-1,0,-2,-3,-3,-2,-2,-2,-3,-3,4,3,1,1,-1,-1,-3,-4},
051                {-4,-1,-3,-1,-1,-2,-3,-3,-3,-3,-3,-3,-3,3,4,2,1,0,-1,-3,-4},
052                {-4,-1,-3,-1,-1,-2,-4,-3,-3,-2,-2,-3,-2,1,2,4,2,0,-1,-2,-4},
053                {-3,-1,-2,-1,-1,-1,-3,-2,-2,0,-1,-2,-1,1,1,2,5,0,-1,-1,-4},
054                {-3,-2,-4,-2,-2,-2,-3,-3,-3,-3,-3,-1,-3,-1,0,0,0,6,3,1,-4},
055                {-3,-2,-3,-2,-2,-2,-3,-2,-2,-1,-2,2,-2,-1,-1,-1,-1,3,7,2,-4},
056                {-2,-3,-4,-2,-2,-3,-4,-4,-3,-2,-3,-2,-3,-3,-3,-2,-1,1,2,11,-4},
057                {-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,1}};
058
059        private static Character[] aa1 = new Character[]{  'G',   'A',   'P',   'C',   'T',   'S','D',   'N',   'E',   'Q',   'K',   'H',   'R', 'V',   'I',   'L',   'M',   'F',   'Y',   'W', '-'};
060
061        private static final List<Character> aa1List = Arrays.asList(aa1);
062
063        public static Matrix getRotMax(AFPChain afpChain,Atom[] ca1,Atom[] ca2) throws StructureException{
064
065                Atom[] a1 = getAlignedAtoms1(afpChain,ca1);
066                Atom[] a2 = getAlignedAtoms2(afpChain,ca2);
067
068                SVDSuperimposer svd = new SVDSuperimposer(a1,a2);
069
070                return svd.getRotation();
071
072        }
073
074        public static Atom getTranslation(AFPChain afpChain,Atom[] ca1,Atom[] ca2) throws StructureException{
075
076
077                Atom[] a1 = getAlignedAtoms1(afpChain,ca1);
078                Atom[] a2 = getAlignedAtoms2(afpChain,ca2);
079
080                SVDSuperimposer svd = new SVDSuperimposer(a1,a2);
081
082                return svd.getTranslation();
083
084        }
085
086        public static Atom[] getAlignedAtoms1(AFPChain afpChain,Atom[] ca1){
087                List<Atom> atoms = new ArrayList<Atom>();
088
089                int blockNum = afpChain.getBlockNum();
090
091                int[] optLen = afpChain.getOptLen();
092                int[][][] optAln = afpChain.getOptAln();
093
094
095                for(int bk = 0; bk < blockNum; bk ++)       {
096
097
098                        for ( int i=0;i< optLen[bk];i++){
099                                int pos = optAln[bk][0][i];
100                                atoms.add(ca1[pos]);
101                        }
102
103                }
104                return atoms.toArray(new Atom[atoms.size()]);
105        }
106        public static Atom[] getAlignedAtoms2(AFPChain afpChain,Atom[] ca2){
107
108                List<Atom> atoms = new ArrayList<Atom>();
109
110                int blockNum = afpChain.getBlockNum();
111
112                int[] optLen = afpChain.getOptLen();
113                int[][][] optAln = afpChain.getOptAln();
114
115
116                for(int bk = 0; bk < blockNum; bk ++)       {
117
118
119                        for ( int i=0;i< optLen[bk];i++){
120                                int pos = optAln[bk][1][i];
121                                atoms.add(ca2[pos]);
122                        }
123
124                }
125                return atoms.toArray(new Atom[atoms.size()]);
126        }
127
128
129
130        /**
131         * Extract the alignment output
132         * <p>eg<pre>
133         * STWNTWACTWHLKQP--WSTILILA
134         * 111111111111     22222222
135         * SQNNTYACSWKLKSWNNNSTILILG
136         * </pre>
137         * Those position pairs labeled by 1 and 2 are equivalent positions, belongs to
138         * two blocks 1 and 2. The residues between labeled residues are non-equivalent,
139         * with '-' filling in their resulting gaps.
140         * <p>
141         * The three lines can be accessed using
142         * {@link AFPChain#getAlnseq1()}, {@link AFPChain#getAlnsymb()},
143         * and {@link AFPChain#getAlnseq2()}.
144         *
145         */
146        public static void getAlign(AFPChain afpChain,Atom[] ca1,Atom[] ca2) {
147                getAlign(afpChain, ca1, ca2, false);
148        }
149
150        /**
151         * Sets the following properties:
152         * <ul>
153         * <li>The alignment strings {@link AFPChain#setAlnseq1(char[]) alnseq1},
154         *  {@link AFPChain#setAlnseq2(char[]) alnseq2},
155         *  and {@link AFPChain#setAlnsymb(char[]) alnsymb}</li>
156         * <li>{@link AFPChain#setAlnbeg1(int) alnbeg1} and 2</li>
157         * <li>{@link AFPChain#setAlnLength(int) alnLength} and
158         *  {@link AFPChain#setGapLen(int) gapLen}</li>
159         * </ul>
160         * <p>
161         * Expects the following properties to be previously computed:
162         * <ul>
163         * <li>{@link AFPChain#getOptAln()} and lengths
164         * </ul>
165         *
166         * <section>Known Bugs</section>
167         * Expects the alignment to have linear topology. May give odd results
168         * for circular permutations and other complicated topologies.
169         *
170         * @param afpChain Alignment between ca1 and ca2
171         * @param ca1 CA atoms of the first protein
172         * @param ca2 CA atoms of the second protein
173         * @param showSeq Use symbols reflecting sequence similarity: '|' for identical,
174         *  ':' for similar, '.' for dissimilar. Otherwise, use the block number
175         *  to show aligned residues.
176         */
177        public static void getAlign(AFPChain afpChain,Atom[] ca1,Atom[] ca2, boolean showSeq) {
178
179                char[] alnsymb = afpChain.getAlnsymb();
180                char[] alnseq1 = afpChain.getAlnseq1();
181                char[] alnseq2 = afpChain.getAlnseq2();
182
183                int     i, j, k, p1, p2, p1b, p2b, lmax;
184
185                int pro1Len = ca1.length;
186                int pro2Len = ca2.length;
187
188                p1b = p2b = 0;
189
190
191
192                if(alnsymb == null)     {
193                        alnseq1 = new char[pro1Len+pro2Len +1];
194                        alnseq2 = new char[pro1Len+pro2Len +1] ;
195                        alnsymb = new char[pro1Len+pro2Len+1];
196
197                        afpChain.setAlnseq1(alnseq1);
198                        afpChain.setAlnseq2(alnseq2);
199                        afpChain.setAlnsymb(alnsymb);
200                }
201
202
203                int blockNum = afpChain.getBlockNum();
204
205                int[] optLen = afpChain.getOptLen();
206                int[][][] optAln = afpChain.getOptAln();
207
208                int alnbeg1 = afpChain.getAlnbeg1(); // immediately overwritten
209                int alnbeg2 = afpChain.getAlnbeg2(); // immediately overwritten
210                int alnLength; // immediately overwritten
211                int optLength = afpChain.getOptLength();
212
213                if ( optLen == null) {
214                        optLen = new int[blockNum];
215                        for (int oi =0 ; oi < blockNum ; oi++)
216                                optLen[oi] = 0;
217                }
218                int     len = 0;
219                for(i = 0; i < blockNum; i ++)  {
220                        for(j = 0; j < optLen[i]; j ++) {
221                                p1 = optAln[i][0][j];
222                                p2 = optAln[i][1][j];
223
224                                // weird, could not find a residue in the Atom array. Did something change in the underlying data?
225                                if (( p1 == -1 ) || ( p2 == -1)) {
226                                        logger.warn("Could not get atom on position " + j );
227                                        continue;
228                                }
229                                if(len > 0)     {
230                                        lmax = (p1 - p1b - 1)>(p2 - p2b - 1)?(p1 - p1b - 1):(p2 - p2b - 1);
231                                        for(k = 0; k < lmax; k ++)      {
232                                                if(k >= (p1 - p1b - 1)) alnseq1[len] = '-';
233                                                else {
234                                                        char oneletter = getOneLetter(ca1[p1b+1+k].getGroup());
235                                                        alnseq1[len] = oneletter;
236                                                }
237                                                if(k >= (p2 - p2b - 1)) alnseq2[len] = '-';
238                                                else  {
239                                                        char oneletter = getOneLetter(ca2[p2b+1+k].getGroup());
240                                                        alnseq2[len] = oneletter;
241                                                }
242                                                alnsymb[len ++] = ' ';
243                                        }
244                                }
245                                else    {
246                                        alnbeg1 = p1; //the first position of sequence in alignment
247                                        alnbeg2 = p2;
248                                }
249
250                                if ( p1 < ca1.length && p2<ca2.length) {
251
252                                        alnseq1[len] = getOneLetter(ca1[p1].getGroup());
253                                        alnseq2[len] = getOneLetter(ca2[p2].getGroup());
254                                } else {
255                                        //TODO handle permutations
256                                        alnseq1[len]='?';
257                                        alnseq2[len]='?';
258                                }
259                                if ( showSeq) {
260                                        if ( alnseq1[len] == alnseq2[len]){
261                                                alnsymb[len ++] = '|';
262                                        } else {
263                                                double score = aaScore(alnseq1[len], alnseq2[len]);
264
265                                                if ( score > 1)
266                                                        alnsymb[len ++] = ':';
267                                                else
268                                                        alnsymb[len ++] = '.';
269                                        }
270                                } else {
271                                        String tmpS = String.format( "%d", i + 1);
272                                        alnsymb[len ++] = tmpS.charAt(0);
273                                }
274                                p1b = p1;
275                                p2b = p2;
276                        }
277                }
278                alnLength = len;
279
280                afpChain.setOptAln(optAln);
281                afpChain.setOptLen(optLen);
282                afpChain.setAlnbeg1(alnbeg1);
283                afpChain.setAlnbeg2(alnbeg2);
284                afpChain.setAlnLength(alnLength);
285                afpChain.setGapLen(alnLength-optLength);
286        }
287
288        private static char getOneLetter(Group g){
289                return StructureTools.get1LetterCode(g.getPDBName());
290        }
291
292        public static int aaScore(char a, char b){
293                if (a == 'x')
294                        a= '-';
295                if (b == 'x')
296                        b='-';
297                if ( a == 'X')
298                        a = '-';
299                if ( b == 'X')
300                        b = '-';
301
302                int pos1 = aa1List.indexOf(a);
303                int pos2 = aa1List.indexOf(b);
304
305
306                // SEC an PYL amino acids are not supported as of yet...
307
308                if ( pos1 < 0) {
309                        logger.warn("unknown char " + a);
310                        return 0;
311                }
312                if ( pos2 < 0) {
313                        logger.warn("unknown char " + b);
314                        return 0;
315                }
316
317                return aaMatrix[pos1][pos2];
318        }
319
320        public static Map<String,Double> calcIdSimilarity(char[] seq1, char[] seq2, int alnLength){
321                double identity = 0.0;
322                double similarity = 0.0;
323
324                if ( seq1 == null || seq2 == null){
325                        logger.warn("Can't calc %ID for an empty alignment! ");
326                        Map<String, Double> m = new HashMap<String, Double>();
327                        m.put("similarity", similarity);
328                        m.put("identity", identity);
329                        return m;
330                }
331
332                int     i;
333
334                for(i = 0; i < alnLength; i ++) {
335
336                                if(seq1[i] == seq2[i])  {
337
338                                        identity += 1.0;
339                                } else if(seq1[i] == '-' || seq1[i] == '*' || seq1[i] == '.' ||
340                                                seq2[i] == '-' || seq2[i] == '*' || seq2[i] == '.' ) {
341                                } else if(aaScore(seq1[i], seq2[i]) > 0)  similarity += 1.0;
342                }
343
344
345                if ( alnLength > 0){
346                        similarity = (identity + similarity) / alnLength;
347                        identity = identity/alnLength;
348                }
349                Map<String, Double> m = new HashMap<String, Double>();
350                m.put("similarity", similarity);
351                m.put("identity", identity);
352
353                return m;
354        }
355
356
357        /**
358         *
359         * @param afpChain
360         * @param ca1
361         * @param ca2
362         * @return
363         * @throws ClassNotFoundException If an error occurs when invoking jmol
364         * @throws NoSuchMethodException If an error occurs when invoking jmol
365         * @throws InvocationTargetException If an error occurs when invoking jmol
366         * @throws IllegalAccessException If an error occurs when invoking jmol
367         */
368        public static Structure createArtificalStructure(AFPChain afpChain, Atom[] ca1,
369                        Atom[] ca2) throws ClassNotFoundException, NoSuchMethodException,
370                        InvocationTargetException, IllegalAccessException
371        {
372
373                if ( afpChain.getNrEQR() < 1){
374                        return GuiWrapper.getAlignedStructure(ca1, ca2);
375                }
376
377                Group[] twistedGroups = GuiWrapper.prepareGroupsForDisplay(afpChain,ca1, ca2);
378
379                List<Atom> twistedAs = new ArrayList<Atom>();
380
381                for ( Group g: twistedGroups){
382                        if ( g == null )
383                                continue;
384                        if ( g.size() < 1)
385                                continue;
386                        Atom a = g.getAtom(0);
387                        twistedAs.add(a);
388                }
389                Atom[] twistedAtoms = twistedAs.toArray(new Atom[twistedAs.size()]);
390
391                List<Group> hetatms  = new ArrayList<Group>();
392                List<Group> nucs1    = new ArrayList<Group>();
393                Group g1 = ca1[0].getGroup();
394                Chain c1 = null;
395                if ( g1 != null) {
396                        c1 = g1.getChain();
397                        if ( c1 != null){
398                                hetatms = c1.getAtomGroups(GroupType.HETATM);;
399                                nucs1  = c1.getAtomGroups(GroupType.NUCLEOTIDE);
400                        }
401                }
402                List<Group> hetatms2 = new ArrayList<Group>();
403                List<Group> nucs2    = new ArrayList<Group>();
404                Group g2 = ca2[0].getGroup();
405                Chain c2 = null;
406                if ( g2 != null){
407                        c2 = g2.getChain();
408                        if ( c2 != null){
409                                hetatms2 = c2.getAtomGroups(GroupType.HETATM);
410                                nucs2 = c2.getAtomGroups(GroupType.NUCLEOTIDE);
411                        }
412                }
413                Atom[] arr1 = GuiWrapper.getAtomArray(ca1, hetatms, nucs1);
414                Atom[] arr2 = GuiWrapper.getAtomArray(twistedAtoms, hetatms2, nucs2);
415
416                return GuiWrapper.getAlignedStructure(arr1,arr2);
417        }
418
419        /** get the block number for an aligned position
420         *
421         * @param afpChain
422         * @param aligPos
423         * @return
424         */
425        public static int getBlockNrForAlignPos(AFPChain afpChain, int aligPos){
426                // moved here from DisplayAFP;
427
428                int blockNum = afpChain.getBlockNum();
429
430                int[] optLen = afpChain.getOptLen();
431                int[][][] optAln = afpChain.getOptAln();
432
433                int len = 0;
434                int p1b=0;
435                int p2b=0;
436
437                for(int i = 0; i < blockNum; i ++)  {
438
439                        for(int j = 0; j < optLen[i]; j ++) {
440
441                                int p1 = optAln[i][0][j];
442                                int p2 = optAln[i][1][j];
443
444                                if (len != 0) {
445                                        // check for gapped region
446                                        int lmax = (p1 - p1b - 1)>(p2 - p2b - 1)?(p1 - p1b - 1):(p2 - p2b - 1);
447                                        for(int k = 0; k < lmax; k ++)      {
448                                                len++;
449                                        }
450                                }
451
452
453                                p1b = p1;
454                                p2b = p2;
455                                if ( len >= aligPos) {
456
457                                        return i;
458                                }
459                                len++;
460                        }
461                }
462
463                return blockNum;
464
465        }
466}