001/*
002 *                  BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Jan 29, 2006
021 *
022 */
023package org.biojava.nbio.structure.align.pairwise;
024
025
026import org.biojava.nbio.structure.align.helper.AligMatEl;
027import org.biojava.nbio.structure.align.helper.GapArray;
028import org.biojava.nbio.structure.align.helper.IndexPair;
029
030import java.util.ArrayList;
031import java.util.List;
032
033/** a class to perform Gotoh algorithm
034 *
035 * @author Andreas Prlic (Java), Peter Lackner (original C code)
036 * @since 10:56:53 AM
037 * @version %I% %G%
038 */
039public class Gotoh {
040        public static int ALIGFACTOR = 1000; // constant to shift floats to ints
041
042        Alignable a;
043
044        int k,openPen,elgPen,rowDim,colDim,openVal,elgVal;
045
046        AligMatEl currentCell;
047        GapArray currentGap;
048
049
050
051        public Gotoh(Alignable alignable) {
052                super();
053                a = alignable;
054
055                align();
056
057        }
058
059
060
061        private void align() {
062
063                rowDim = a.getRows()+1;
064                colDim = a.getCols()+1;
065
066                openPen = Math.round(ALIGFACTOR * a.getGapOpenCol());
067                elgPen  = Math.round(ALIGFACTOR * a.getGapExtCol());
068
069                GapArray[] gapCol = new GapArray[colDim];
070                GapArray[] gapRow = new GapArray[rowDim];
071
072                // init the arrays
073                for ( int i = 0 ; i< colDim;i++){
074                        gapCol[i] = new GapArray();
075                }
076                for ( int j = 0 ; j< rowDim;j++){
077                        gapRow[j] = new GapArray();
078                }
079                currentGap = new GapArray();
080
081                AligMatEl[][]aligmat = a.getAligMat();
082                int lastValue = aligmat[rowDim-1][colDim-1].getValue();
083
084//      first cell
085                aligmat[0][0].setValue(0);
086                gapCol[0].setValue(0);
087                gapCol[0].setIndex(0);
088                gapRow[0].setValue(0);
089                gapRow[0].setIndex(0);
090
091                // set row 0 margin
092                for(int j=1;j<colDim;j++) {
093                        aligmat[0][j].setValue(0);
094                        gapCol[j].setValue(-(openPen+elgPen));
095                        gapCol[j].setIndex(0);
096                }
097
098
099                for(int rowCounter=1;rowCounter<rowDim;rowCounter++){
100
101                        // set column 0 margin
102                        aligmat[rowCounter][0].setValue(0);
103                        gapRow[rowCounter].setValue(-(openPen+elgPen));
104                        gapRow[rowCounter].setIndex(0);
105
106                        for(int colCounter=1;colCounter<colDim;colCounter++) {
107
108                                currentCell = aligmat[rowCounter][colCounter];
109
110                                // no gap
111                                currentCell.setValue( aligmat[rowCounter-1][colCounter-1].getValue() + currentCell.getValue());
112                                currentCell.setRow((short)(rowCounter-1));
113                                currentCell.setCol((short)(colCounter-1));
114
115                                // scan column j for gap, gap in seqB
116                                openVal = aligmat[rowCounter-1][colCounter].getValue() - (openPen+elgPen);
117                                elgVal  = gapCol[colCounter].getValue()-elgPen;
118
119                                currentGap = new GapArray();
120
121                                if ( openVal >= elgVal){
122                                        currentGap.setValue(openVal);
123                                        currentGap.setIndex(rowCounter-1);
124                                } else {
125                                        currentGap.setValue(elgVal);
126                                        currentGap.setIndex(gapCol[colCounter].index);
127                                }
128                                gapCol[colCounter] = currentGap;
129
130                                if (currentGap.getValue() > currentCell.getValue()){
131                                        if ( currentGap.getIndex() >= rowDim)
132                                                System.err.println("col gap at" + rowCounter + " " + colCounter + " to " + currentGap.getIndex());
133                                        currentCell.setValue( currentGap.getValue());
134                                        currentCell.setRow((short)currentGap.getIndex());
135                                        currentCell.setCol((short)colCounter);
136                                }
137
138//              scan row i for gap, gap in row
139                                openVal = aligmat[rowCounter][colCounter-1].getValue()-(openPen+elgPen);
140                                elgVal  = gapRow[rowCounter].getValue() - elgPen;
141
142                                currentGap = new GapArray();
143
144                                if (openVal >= elgVal){
145                                        currentGap.setValue(openVal);
146                                        currentGap.setIndex(colCounter-1);
147                                } else {
148                                        currentGap.setValue(elgVal);
149                                        currentGap.setIndex(gapRow[rowCounter].getIndex());
150                                }
151                                gapRow[rowCounter] = currentGap;
152
153
154                                if ( currentGap.getValue() > currentCell.getValue() ) {
155                                        if ( currentGap.getIndex() >= colDim)
156                                                System.err.println("row gap at" + rowCounter + " " + colCounter + " to " + currentGap.getIndex());
157                                        currentCell.setValue(currentGap.getValue());
158                                        currentCell.setRow((short)rowCounter);
159                                        currentCell.setCol((short)currentGap.getIndex());
160                                }
161
162                                aligmat[rowCounter][colCounter]=currentCell;
163                        }
164
165                }
166
167
168                // last cell in alignment matrix
169                // do not penalize end gaps
170                int rowCount = rowDim -1;
171                int colCount = colDim -1;
172                currentCell = aligmat[rowCount][colCount];
173
174                // no gap
175                currentCell.setValue(aligmat[rowCount-1][colCount-1].getValue() + lastValue);
176                currentCell.setRow((short)(rowCount-1));
177                currentCell.setCol((short)(colCount-1));
178
179                // scan last column j for gap, gap in seqB
180                // do not penalyze gaps
181                for (int k=1;k<=rowCount;k++) {
182                        int val = aligmat[rowCount-k][colCount].getValue();
183                        if ( val>currentCell.getValue()){
184                                currentCell.setValue(val);
185                                //System.out.println("setting row to " + (rowCount ) );
186                                currentCell.setRow((short)(rowCount-k ));
187                                currentCell.setCol((short)(colCount  ));
188                        }
189                }
190
191                // scan row i for gap, gap in SeqA
192                // do not penalyze gaps
193                for (int k=1;k<=colCount;k++) {
194                        int val = aligmat[rowCount][colCount-k].getValue();
195                        if ( val > currentCell.getValue()){
196                                currentCell.setValue(val);
197                                currentCell.setRow((short) (rowCount ) );
198                                currentCell.setCol((short)(colCount-k  ));
199                        }
200                }
201                a.setScore(aligmat[rowDim-1][colDim-1].getValue() / (float)ALIGFACTOR);
202                setPath();
203
204        }
205
206        private void setPath(){
207
208                // dmyersturnbull: TODO fix exception handling and logging
209
210                int  n;
211                IndexPair[] backId = new IndexPair[a.getRows()+1+a.getCols()+1];
212                List<IndexPair> path = new ArrayList<>();
213
214                backId[0] = new IndexPair((short)(a.getRows()),(short)(a.getCols()));
215
216                // backtrace, get backId indices, the indices in diagonal store in path
217
218
219                int pathsize = 0;
220
221                AligMatEl[][] aligmat = a.getAligMat();
222
223
224                n=1;
225                while ( (backId[n-1].getRow()>=1) &&
226                                (backId[n-1].getCol()>=1)
227                                )
228                {
229                        // get backtrace index
230                        int x = backId[n-1].getRow();
231                        int y = backId[n-1].getCol();
232
233                        try {
234
235                                AligMatEl el = null ;
236                                try {
237                                        el =aligmat[x][y];
238                                } catch(Exception e){
239
240
241                                        e.printStackTrace();
242                                        for (int f=0; f< n;f++){
243                                                System.out.println(backId[f]);
244                                        }
245
246                                }
247
248                                if ( el == null)
249                                        System.out.println("el = null! x:"+ x + " y " + y);
250                                backId[n] = el;
251                        } catch (Exception e){
252                                e.printStackTrace();
253                                System.out.println("x " + x);
254                                System.out.println("y " + y);
255                                System.out.println(backId[n-2]);
256                                System.exit(0);
257                        }
258                        // get diagonal indeces into path
259                        if (((backId[n-1].getRow() - backId[n].getRow()) == 1)
260                                        && (( backId[n-1].getCol() - backId[n].getCol()) == 1)) {
261                                path.add(backId[n-1]);
262                                pathsize++;
263
264                        }
265                        n++;
266                }
267
268                // path is reverse order..
269                // switch order
270                IndexPair[] newpath = new IndexPair[pathsize];
271                for (int i = 0 ; i < pathsize; i++){
272                        IndexPair o = path.get(pathsize-1-i);
273                        IndexPair np = new IndexPair((short)(o.getRow()-1),(short)(o.getCol()-1));
274                        newpath[i] = np;
275                }
276
277                a.setPath(newpath);
278                a.setPathSize(pathsize);
279
280
281        }
282}