001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.align.seq;
022
023
024import org.biojava.nbio.alignment.Alignments;
025import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType;
026import org.biojava.nbio.alignment.SimpleGapPenalty;
027import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper;
028import org.biojava.nbio.alignment.template.GapPenalty;
029import org.biojava.nbio.alignment.template.PairwiseSequenceAligner;
030import org.biojava.nbio.core.alignment.template.SequencePair;
031import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
032import org.biojava.nbio.structure.Atom;
033import org.biojava.nbio.structure.Group;
034import org.biojava.nbio.structure.StructureException;
035import org.biojava.nbio.structure.StructureTools;
036import org.biojava.nbio.structure.align.AbstractStructureAlignment;
037import org.biojava.nbio.structure.align.StructureAlignment;
038import org.biojava.nbio.structure.align.ce.CECalculator;
039import org.biojava.nbio.structure.align.ce.CeParameters;
040import org.biojava.nbio.structure.align.ce.ConfigStrucAligParams;
041import org.biojava.nbio.structure.align.ce.UserArgumentProcessor;
042import org.biojava.nbio.structure.align.model.AFPChain;
043import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay;
044import org.biojava.nbio.structure.align.util.ConfigurationException;
045import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
046import org.biojava.nbio.core.sequence.ProteinSequence;
047import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
048import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
049import org.biojava.nbio.core.sequence.template.Compound;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052
053
054/** provides a 3D superimposition based on the sequence alignment
055 *
056 * @author Andreas Prlic
057 *
058 */
059public class SmithWaterman3Daligner extends AbstractStructureAlignment implements StructureAlignment {
060
061        public static final String algorithmName = "Smith-Waterman superposition";
062
063        private static final Logger logger = LoggerFactory.getLogger(SmithWaterman3Daligner.class);
064
065        /**
066         *  version history:
067         *  1.1 - Added more parameters to the command line, including -maxOptRMSD
068         *  1.0 - Initial version
069         */
070        private static final String version = "1.1";
071        SmithWaterman3DParameters params;
072
073        public static void main(String[] args) throws ConfigurationException {
074                //args = new String[]{"-pdb1","1cdg.A","-pdb2","1tim.A","-pdbFilePath","/tmp/","-show3d","-printFatCat"};
075                UserArgumentProcessor processor = new SmithWatermanUserArgumentProcessor();
076                processor.process(args);
077        }
078
079        public SmithWaterman3Daligner(){
080                params = new SmithWaterman3DParameters();
081        }
082
083        @Override
084        public AFPChain align(Atom[] ca1, Atom[] ca2) throws StructureException {
085                if ( params == null)
086                        params = new SmithWaterman3DParameters();
087                return align(ca1,ca2,params);
088        }
089
090        @Override
091        public AFPChain align(Atom[] ca1, Atom[] ca2, Object parameters)
092        throws StructureException {
093
094                if ( parameters == null) {
095                        throw new IllegalArgumentException("Got null instead of SmithWaterman3DParameters!");
096                }
097                if ( ! (parameters instanceof SmithWaterman3DParameters))
098                        throw new IllegalArgumentException("provided parameter object is not of type SmithWaterman3DParameters, but " + parameters.getClass().getName());
099
100                params = (SmithWaterman3DParameters) parameters;
101                AFPChain afpChain = new AFPChain(algorithmName);
102
103
104                        // covert input to sequences
105                        String seq1 = StructureTools.convertAtomsToSeq(ca1);
106                        String seq2 = StructureTools.convertAtomsToSeq(ca2);
107
108                        ProteinSequence s1 = null;
109                        ProteinSequence s2 = null;
110
111                        try {
112                                s1 = new ProteinSequence(seq1);
113                                s2 = new ProteinSequence(seq2);
114                        } catch (CompoundNotFoundException e){
115
116                                throw new StructureException(e.getMessage(),e);
117                        }
118
119                        // default blosum62
120                        SubstitutionMatrix<AminoAcidCompound> matrix = SubstitutionMatrixHelper.getBlosum65();
121
122                        GapPenalty penalty = new SimpleGapPenalty();
123
124                        penalty.setOpenPenalty(params.getGapOpen());
125                        penalty.setExtensionPenalty(params.getGapExtend());
126
127                        PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> smithWaterman =
128                                Alignments.getPairwiseAligner(s1, s2, PairwiseSequenceAlignerType.LOCAL, penalty, matrix);
129
130                        SequencePair<ProteinSequence, AminoAcidCompound> pair = smithWaterman.getPair();
131
132                        if (pair.getTarget().toString().isEmpty() || pair.getQuery().toString().isEmpty()) {
133                                throw new StructureException("Empty alignment for sequences "+s1+" and "+s2);
134                        }
135
136                        logger.debug("Smith-Waterman alignment is: "+pair.toString(100));
137
138                        // convert to a 3D alignment...
139                        afpChain = convert(ca1,ca2,pair, smithWaterman);
140
141
142
143                return afpChain;
144        }
145
146        /**
147         * Converts a sequence alignment into a structural alignment
148         * @param smithWaterman The sequence aligner
149         * @param ca1 CA atoms from the query sequence
150         * @param ca2 CA atoms from the target sequence
151         * @param smithWaterman pairwise Sequence aligner
152         * @param pair The sequence alignment calculated by aligner
153         * @return an AFPChain encapsulating the alignment in aligPair
154         * @throws StructureException
155         */
156        private AFPChain convert(Atom[] ca1, Atom[] ca2,  SequencePair<ProteinSequence,
157                        AminoAcidCompound> pair, PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> smithWaterman) throws StructureException {
158                AFPChain afpChain = new AFPChain(algorithmName);
159                int ca1Length = ca1.length;
160                int ca2Length = ca2.length;
161
162                afpChain.setAlignScore(smithWaterman.getScore());
163                afpChain.setCa1Length(ca1Length);
164                afpChain.setCa2Length(ca2Length);
165
166                int nrCols = pair.getLength();
167                int nAtom=0;
168                int nGaps=0;
169
170                Atom[] strBuf1 = new Atom[nrCols];
171                Atom[] strBuf2 = new Atom[nrCols];
172                char[] alnseq1 = new char[ca1Length+ca2Length+1];
173                char[] alnseq2 = new char[ca1Length+ca2Length+1] ;
174                char[] alnsymb = new char[ca1Length+ca2Length+1];
175
176                Compound gapSymbol =  AminoAcidCompoundSet.getAminoAcidCompoundSet().getCompoundForString("-");
177
178                int pos = 0 ; // aligned positions
179
180                int nrIdent = 0;
181                int nrSim   = 0;
182
183                int[] align_se1 = new int[nrCols+1];
184                int[] align_se2 = new int[nrCols+1];
185
186                for ( int i = 1 ; i <= nrCols; i ++){
187
188                        int myI = i-1;
189
190                        Compound s1 =  pair.getCompoundAt(1, i);
191                        Compound s2 =  pair.getCompoundAt(2,i);
192
193                        // alignment is using internal index start at 1...
194                        int pos1 = pair.getIndexInQueryAt(i)  -1;
195                        int pos2 = pair.getIndexInTargetAt(i) -1;
196
197                        if ( ( ! s1.equals(gapSymbol) )&&  (! s2.equals(gapSymbol))){
198                                strBuf1[nAtom] = ca1[pos1];
199                                strBuf2[nAtom] = ca2[pos2];
200                                //
201                                char l1 = getOneLetter(ca1[pos1].getGroup());
202                                char l2 = getOneLetter(ca2[pos2].getGroup());
203                                //
204                                alnseq1[myI] = Character.toUpperCase(l1);
205                                alnseq2[myI] = Character.toUpperCase(l2);
206                                alnsymb[myI] = ' ';
207                                //
208                                if ( l1 == l2) {
209                                        nrIdent++;
210                                        nrSim++;
211                                        alnsymb[myI] = '|';
212
213                                } else if ( AFPAlignmentDisplay.aaScore(l1, l2) > 0){
214
215                                        nrSim++;
216                                        alnsymb[myI] = ':';
217                                }
218                                //
219                                align_se1[myI] = pos1;
220                                align_se2[myI] = pos2;
221                                //
222                                pos++;
223                                nAtom++;
224
225                        } else {
226                                // there is a gap at this position
227                                nGaps++;
228
229                                alnsymb[myI] = ' ';
230                                align_se1[myI] = -1;
231                                align_se2[myI] = -1;
232
233                                if ( s1.equals(gapSymbol)){
234                                        alnseq1[myI] = '-';
235
236                                } else {
237                                        char l1 = getOneLetter(ca1[pos1].getGroup());
238                                        alnseq1[myI] = Character.toUpperCase(l1);
239                                        align_se1[myI] = pos1;
240                                }
241                                if ( s2.equals(gapSymbol)){
242                                        alnseq2[myI] = '-';
243
244                                } else {
245                                        char l2 = getOneLetter(ca2[pos2].getGroup());
246                                        alnseq2[myI] = Character.toUpperCase(l2);
247                                        align_se2[myI] = pos2;
248
249                                }
250                        }
251
252                }
253
254                afpChain.setAlnbeg1(pair.getIndexInQueryAt(1)-1);
255                afpChain.setAlnbeg2(pair.getIndexInTargetAt(1)-1);
256
257                afpChain.setGapLen(nGaps);
258                afpChain.setAlnseq1(alnseq1);
259
260                afpChain.setAlnseq2(alnseq2);
261                afpChain.setAlnsymb(alnsymb);
262
263                // CE uses the aligned pairs as reference not the whole alignment including gaps...
264                afpChain.setIdentity(nrIdent*1.0/pos);
265                afpChain.setSimilarity(nrSim*1.0/pos);
266
267                afpChain.setAlnLength(nrCols);
268                afpChain.setOptLength(nAtom);
269                int[] optLen = new int[]{nAtom};
270                afpChain.setOptLen(optLen);
271
272                if(nAtom<4)
273                        return afpChain;
274
275                CeParameters params = new CeParameters();
276                CECalculator cecalc = new CECalculator(params);
277
278                // here we don't store the rotation matrix for the user!
279
280                double rmsd= cecalc.calc_rmsd(strBuf1, strBuf2, nAtom,true);
281
282                afpChain.setBlockRmsd(new double[]{rmsd});
283                afpChain.setOptRmsd(new double[]{rmsd});
284                afpChain.setTotalRmsdOpt(rmsd);
285                afpChain.setChainRmsd(rmsd);
286
287                // let's hijack the CE implementation
288                // and use some utilities from there to
289                // build up the afpChain object
290
291                cecalc.setnAtom(nAtom);
292
293                cecalc.setAlign_se1(align_se1);
294                cecalc.setAlign_se2(align_se2);
295
296                cecalc.setLcmp(nrCols  );
297
298                cecalc.convertAfpChain(afpChain, ca1, ca2);
299
300                afpChain.setAlgorithmName(algorithmName);
301                afpChain.setVersion(version);
302
303                return afpChain;
304        }
305
306        private static char getOneLetter(Group g){
307
308                if (g==null) return StructureTools.UNKNOWN_GROUP_LABEL;
309
310                return StructureTools.get1LetterCode(g.getPDBName());
311
312        }
313
314        @Override
315        public String getAlgorithmName() {
316                return algorithmName;
317        }
318
319        @Override
320        public ConfigStrucAligParams getParameters() {
321                return params;
322        }
323
324        @Override
325        public String getVersion() {
326                return version;
327        }
328
329        @Override
330        public void setParameters(ConfigStrucAligParams parameters) {
331                if ( ! (parameters instanceof SmithWaterman3DParameters))
332                        throw new IllegalArgumentException("provided parameter object is not of type SmithWaterman3DParameters");
333                params = (SmithWaterman3DParameters)parameters;
334        }
335
336
337
338}