001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.align.seq;
022
023import org.biojava.nbio.alignment.Alignments;
024import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType;
025import org.biojava.nbio.alignment.SimpleGapPenalty;
026import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper;
027import org.biojava.nbio.alignment.template.GapPenalty;
028import org.biojava.nbio.alignment.template.PairwiseSequenceAligner;
029import org.biojava.nbio.core.alignment.template.SequencePair;
030import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
031import org.biojava.nbio.structure.Atom;
032import org.biojava.nbio.structure.Group;
033import org.biojava.nbio.structure.StructureException;
034import org.biojava.nbio.structure.StructureTools;
035import org.biojava.nbio.structure.align.AbstractStructureAlignment;
036import org.biojava.nbio.structure.align.StructureAlignment;
037import org.biojava.nbio.structure.align.ce.CECalculator;
038import org.biojava.nbio.structure.align.ce.CeParameters;
039import org.biojava.nbio.structure.align.ce.ConfigStrucAligParams;
040import org.biojava.nbio.structure.align.ce.UserArgumentProcessor;
041import org.biojava.nbio.structure.align.model.AFPChain;
042import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay;
043import org.biojava.nbio.structure.align.util.AlignmentTools;
044import org.biojava.nbio.structure.align.util.ConfigurationException;
045import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
046import org.biojava.nbio.core.sequence.ProteinSequence;
047import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
048import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
049import org.biojava.nbio.core.sequence.template.Compound;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052
053/**
054 * Provides a 3D superimposition of two structures based on their sequence
055 * alignment.
056 * <p>
057 * This algorithm includes a final step to iteratively drop columns of the
058 * alignment until a maximum RMSD threshold of the superimposition, or the
059 * minimum alignment length theshold, are fulfilled, similar to what pymol align
060 * algorithm does.
061 * 
062 * @author Andreas Prlic
063 * @author Aleix Lafita
064 *
065 */
066public class SmithWaterman3Daligner extends AbstractStructureAlignment implements StructureAlignment {
067
068        public static final String algorithmName = "Smith-Waterman superposition";
069
070        private static final Logger logger = LoggerFactory.getLogger(SmithWaterman3Daligner.class);
071
072        /**
073         *  version history:
074         *  1.1 - Added a maxRMSD and minLen parameters
075         *  1.0 - Initial version
076         */
077        private static final String version = "1.1";
078        private SmithWaterman3DParameters params;
079
080        public static void main(String[] args) throws ConfigurationException {
081                //args = new String[]{"-pdb1","1cdg.A","-pdb2","1tim.A","-pdbFilePath","/tmp/","-show3d","-printFatCat"};
082                UserArgumentProcessor processor = new SmithWatermanUserArgumentProcessor();
083                processor.process(args);
084        }
085
086        public SmithWaterman3Daligner(){
087                params = new SmithWaterman3DParameters();
088        }
089
090        @Override
091        public AFPChain align(Atom[] ca1, Atom[] ca2) throws StructureException {
092                if ( params == null)
093                        params = new SmithWaterman3DParameters();
094                return align(ca1,ca2,params);
095        }
096
097        @Override
098        public AFPChain align(Atom[] ca1, Atom[] ca2, Object parameters)
099        throws StructureException {
100
101                if ( parameters == null) {
102                        throw new IllegalArgumentException("Got null instead of SmithWaterman3DParameters!");
103                }
104                if ( ! (parameters instanceof SmithWaterman3DParameters))
105                        throw new IllegalArgumentException("provided parameter object is not of type SmithWaterman3DParameters, but " + parameters.getClass().getName());
106
107                params = (SmithWaterman3DParameters) parameters;
108                AFPChain afpChain = new AFPChain(algorithmName);
109
110
111                // covert input to sequences
112                String seq1 = StructureTools.convertAtomsToSeq(ca1);
113                String seq2 = StructureTools.convertAtomsToSeq(ca2);
114
115                ProteinSequence s1 = null;
116                ProteinSequence s2 = null;
117
118                try {
119                        s1 = new ProteinSequence(seq1);
120                        s2 = new ProteinSequence(seq2);
121                } catch (CompoundNotFoundException e){
122
123                        throw new StructureException(e.getMessage(),e);
124                }
125
126                // default blosum62
127                SubstitutionMatrix<AminoAcidCompound> matrix = SubstitutionMatrixHelper.getBlosum65();
128
129                GapPenalty penalty = new SimpleGapPenalty();
130
131                penalty.setOpenPenalty(params.getGapOpen());
132                penalty.setExtensionPenalty(params.getGapExtend());
133
134                PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> smithWaterman =
135                        Alignments.getPairwiseAligner(s1, s2, PairwiseSequenceAlignerType.LOCAL, penalty, matrix);
136
137                SequencePair<ProteinSequence, AminoAcidCompound> pair = smithWaterman.getPair();
138
139                if (pair.getTarget().toString().isEmpty() || pair.getQuery().toString().isEmpty()) {
140                        throw new StructureException("Empty alignment for sequences "+s1+" and "+s2);
141                }
142
143                logger.debug("Smith-Waterman alignment is: "+pair.toString(100));
144
145                // convert to a 3D alignment...
146                afpChain = convert(ca1,ca2,pair, smithWaterman);
147                
148                // Perform an iterative dropping of the columns
149                while (afpChain.getOptLength() > params.getMinLen()
150                                && afpChain.getTotalRmsdOpt() > params.getMaxRmsd()) {
151                        afpChain = AlignmentTools.deleteHighestDistanceColumn(afpChain, ca1, ca2);
152                }
153
154                return afpChain;
155        }
156
157        /**
158         * Converts a sequence alignment into a structural alignment
159         * @param smithWaterman The sequence aligner
160         * @param ca1 CA atoms from the query sequence
161         * @param ca2 CA atoms from the target sequence
162         * @param smithWaterman pairwise Sequence aligner
163         * @param pair The sequence alignment calculated by aligner
164         * @return an AFPChain encapsulating the alignment in aligPair
165         * @throws StructureException
166         */
167        private AFPChain convert(Atom[] ca1, Atom[] ca2,  SequencePair<ProteinSequence,
168                        AminoAcidCompound> pair, PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> smithWaterman) throws StructureException {
169                AFPChain afpChain = new AFPChain(algorithmName);
170                int ca1Length = ca1.length;
171                int ca2Length = ca2.length;
172
173                afpChain.setAlignScore(smithWaterman.getScore());
174                afpChain.setCa1Length(ca1Length);
175                afpChain.setCa2Length(ca2Length);
176
177                int nrCols = pair.getLength();
178                int nAtom=0;
179                int nGaps=0;
180
181                Atom[] strBuf1 = new Atom[nrCols];
182                Atom[] strBuf2 = new Atom[nrCols];
183                char[] alnseq1 = new char[ca1Length+ca2Length+1];
184                char[] alnseq2 = new char[ca1Length+ca2Length+1] ;
185                char[] alnsymb = new char[ca1Length+ca2Length+1];
186
187                Compound gapSymbol =  AminoAcidCompoundSet.getAminoAcidCompoundSet().getCompoundForString("-");
188
189                int pos = 0 ; // aligned positions
190
191                int nrIdent = 0;
192                int nrSim   = 0;
193
194                int[] align_se1 = new int[nrCols+1];
195                int[] align_se2 = new int[nrCols+1];
196
197                for ( int i = 1 ; i <= nrCols; i ++){
198
199                        int myI = i-1;
200
201                        Compound s1 =  pair.getCompoundAt(1, i);
202                        Compound s2 =  pair.getCompoundAt(2,i);
203
204                        // alignment is using internal index start at 1...
205                        int pos1 = pair.getIndexInQueryAt(i)  -1;
206                        int pos2 = pair.getIndexInTargetAt(i) -1;
207
208                        if ( ( ! s1.equals(gapSymbol) )&&  (! s2.equals(gapSymbol))){
209                                strBuf1[nAtom] = ca1[pos1];
210                                strBuf2[nAtom] = ca2[pos2];
211                                //
212                                char l1 = getOneLetter(ca1[pos1].getGroup());
213                                char l2 = getOneLetter(ca2[pos2].getGroup());
214                                //
215                                alnseq1[myI] = Character.toUpperCase(l1);
216                                alnseq2[myI] = Character.toUpperCase(l2);
217                                alnsymb[myI] = ' ';
218                                //
219                                if ( l1 == l2) {
220                                        nrIdent++;
221                                        nrSim++;
222                                        alnsymb[myI] = '|';
223
224                                } else if ( AFPAlignmentDisplay.aaScore(l1, l2) > 0){
225
226                                        nrSim++;
227                                        alnsymb[myI] = ':';
228                                }
229                                //
230                                align_se1[myI] = pos1;
231                                align_se2[myI] = pos2;
232                                //
233                                pos++;
234                                nAtom++;
235
236                        } else {
237                                // there is a gap at this position
238                                nGaps++;
239
240                                alnsymb[myI] = ' ';
241                                align_se1[myI] = -1;
242                                align_se2[myI] = -1;
243
244                                if ( s1.equals(gapSymbol)){
245                                        alnseq1[myI] = '-';
246
247                                } else {
248                                        char l1 = getOneLetter(ca1[pos1].getGroup());
249                                        alnseq1[myI] = Character.toUpperCase(l1);
250                                        align_se1[myI] = pos1;
251                                }
252                                if ( s2.equals(gapSymbol)){
253                                        alnseq2[myI] = '-';
254
255                                } else {
256                                        char l2 = getOneLetter(ca2[pos2].getGroup());
257                                        alnseq2[myI] = Character.toUpperCase(l2);
258                                        align_se2[myI] = pos2;
259
260                                }
261                        }
262
263                }
264
265                afpChain.setAlnbeg1(pair.getIndexInQueryAt(1)-1);
266                afpChain.setAlnbeg2(pair.getIndexInTargetAt(1)-1);
267
268                afpChain.setGapLen(nGaps);
269                afpChain.setAlnseq1(alnseq1);
270
271                afpChain.setAlnseq2(alnseq2);
272                afpChain.setAlnsymb(alnsymb);
273
274                // CE uses the aligned pairs as reference not the whole alignment including gaps...
275                afpChain.setIdentity(nrIdent*1.0/pos);
276                afpChain.setSimilarity(nrSim*1.0/pos);
277
278                afpChain.setAlnLength(nrCols);
279                afpChain.setOptLength(nAtom);
280                int[] optLen = new int[]{nAtom};
281                afpChain.setOptLen(optLen);
282
283                if(nAtom<4)
284                        return afpChain;
285
286                CeParameters params = new CeParameters();
287                CECalculator cecalc = new CECalculator(params);
288
289                // here we don't store the rotation matrix for the user!
290
291                double rmsd= cecalc.calc_rmsd(strBuf1, strBuf2, nAtom,true);
292
293                afpChain.setBlockRmsd(new double[]{rmsd});
294                afpChain.setOptRmsd(new double[]{rmsd});
295                afpChain.setTotalRmsdOpt(rmsd);
296                afpChain.setChainRmsd(rmsd);
297
298                // let's hijack the CE implementation
299                // and use some utilities from there to
300                // build up the afpChain object
301
302                cecalc.setnAtom(nAtom);
303
304                cecalc.setAlign_se1(align_se1);
305                cecalc.setAlign_se2(align_se2);
306
307                cecalc.setLcmp(nrCols  );
308
309                cecalc.convertAfpChain(afpChain, ca1, ca2);
310
311                afpChain.setAlgorithmName(algorithmName);
312                afpChain.setVersion(version);
313
314                return afpChain;
315        }
316
317        private static char getOneLetter(Group g){
318
319                if (g==null) return StructureTools.UNKNOWN_GROUP_LABEL;
320
321                return StructureTools.get1LetterCode(g.getPDBName());
322
323        }
324
325        @Override
326        public String getAlgorithmName() {
327                return algorithmName;
328        }
329
330        @Override
331        public ConfigStrucAligParams getParameters() {
332                return params;
333        }
334
335        @Override
336        public String getVersion() {
337                return version;
338        }
339
340        @Override
341        public void setParameters(ConfigStrucAligParams parameters) {
342                if ( ! (parameters instanceof SmithWaterman3DParameters))
343                        throw new IllegalArgumentException("provided parameter object is not of type SmithWaterman3DParameters");
344                params = (SmithWaterman3DParameters)parameters;
345        }
346
347}