001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Jan 29, 2006 021 * 022 */ 023package org.biojava.nbio.structure.align.pairwise; 024 025 026import org.biojava.nbio.structure.align.helper.AligMatEl; 027import org.biojava.nbio.structure.align.helper.GapArray; 028import org.biojava.nbio.structure.align.helper.IndexPair; 029 030import java.util.ArrayList; 031import java.util.List; 032 033/** a class to perform Gotoh algorithm 034 * 035 * @author Andreas Prlic (Java), Peter Lackner (original C code) 036 * @since 10:56:53 AM 037 * @version %I% %G% 038 */ 039public class Gotoh { 040 public static int ALIGFACTOR = 1000; // constant to shift floats to ints 041 042 Alignable a; 043 044 int k,openPen,elgPen,rowDim,colDim,openVal,elgVal; 045 046 AligMatEl currentCell; 047 GapArray currentGap; 048 049 050 051 public Gotoh(Alignable alignable) { 052 super(); 053 a = alignable; 054 055 align(); 056 057 } 058 059 060 061 private void align() { 062 063 rowDim = a.getRows()+1; 064 colDim = a.getCols()+1; 065 066 openPen = Math.round(ALIGFACTOR * a.getGapOpenCol()); 067 elgPen = Math.round(ALIGFACTOR * a.getGapExtCol()); 068 069 GapArray[] gapCol = new GapArray[colDim]; 070 GapArray[] gapRow = new GapArray[rowDim]; 071 072 // init the arrays 073 for ( int i = 0 ; i< colDim;i++){ 074 gapCol[i] = new GapArray(); 075 } 076 for ( int j = 0 ; j< rowDim;j++){ 077 gapRow[j] = new GapArray(); 078 } 079 currentGap = new GapArray(); 080 081 AligMatEl[][]aligmat = a.getAligMat(); 082 int lastValue = aligmat[rowDim-1][colDim-1].getValue(); 083 084// first cell 085 aligmat[0][0].setValue(0); 086 gapCol[0].setValue(0); 087 gapCol[0].setIndex(0); 088 gapRow[0].setValue(0); 089 gapRow[0].setIndex(0); 090 091 // set row 0 margin 092 for(int j=1;j<colDim;j++) { 093 aligmat[0][j].setValue(0); 094 gapCol[j].setValue(-(openPen+elgPen)); 095 gapCol[j].setIndex(0); 096 } 097 098 099 for(int rowCounter=1;rowCounter<rowDim;rowCounter++){ 100 101 // set column 0 margin 102 aligmat[rowCounter][0].setValue(0); 103 gapRow[rowCounter].setValue(-(openPen+elgPen)); 104 gapRow[rowCounter].setIndex(0); 105 106 for(int colCounter=1;colCounter<colDim;colCounter++) { 107 108 currentCell = aligmat[rowCounter][colCounter]; 109 110 // no gap 111 currentCell.setValue( aligmat[rowCounter-1][colCounter-1].getValue() + currentCell.getValue()); 112 currentCell.setRow((short)(rowCounter-1)); 113 currentCell.setCol((short)(colCounter-1)); 114 115 // scan column j for gap, gap in seqB 116 openVal = aligmat[rowCounter-1][colCounter].getValue() - (openPen+elgPen); 117 elgVal = gapCol[colCounter].getValue()-elgPen; 118 119 currentGap = new GapArray(); 120 121 if ( openVal >= elgVal){ 122 currentGap.setValue(openVal); 123 currentGap.setIndex(rowCounter-1); 124 } else { 125 currentGap.setValue(elgVal); 126 currentGap.setIndex(gapCol[colCounter].index); 127 } 128 gapCol[colCounter] = currentGap; 129 130 if (currentGap.getValue() > currentCell.getValue()){ 131 if ( currentGap.getIndex() >= rowDim) 132 System.err.println("col gap at" + rowCounter + " " + colCounter + " to " + currentGap.getIndex()); 133 currentCell.setValue( currentGap.getValue()); 134 currentCell.setRow((short)currentGap.getIndex()); 135 currentCell.setCol((short)colCounter); 136 } 137 138// scan row i for gap, gap in row 139 openVal = aligmat[rowCounter][colCounter-1].getValue()-(openPen+elgPen); 140 elgVal = gapRow[rowCounter].getValue() - elgPen; 141 142 currentGap = new GapArray(); 143 144 if (openVal >= elgVal){ 145 currentGap.setValue(openVal); 146 currentGap.setIndex(colCounter-1); 147 } else { 148 currentGap.setValue(elgVal); 149 currentGap.setIndex(gapRow[rowCounter].getIndex()); 150 } 151 gapRow[rowCounter] = currentGap; 152 153 154 if ( currentGap.getValue() > currentCell.getValue() ) { 155 if ( currentGap.getIndex() >= colDim) 156 System.err.println("row gap at" + rowCounter + " " + colCounter + " to " + currentGap.getIndex()); 157 currentCell.setValue(currentGap.getValue()); 158 currentCell.setRow((short)rowCounter); 159 currentCell.setCol((short)currentGap.getIndex()); 160 } 161 162 aligmat[rowCounter][colCounter]=currentCell; 163 } 164 165 } 166 167 168 // last cell in alignment matrix 169 // do not penalize end gaps 170 int rowCount = rowDim -1; 171 int colCount = colDim -1; 172 currentCell = aligmat[rowCount][colCount]; 173 174 // no gap 175 currentCell.setValue(aligmat[rowCount-1][colCount-1].getValue() + lastValue); 176 currentCell.setRow((short)(rowCount-1)); 177 currentCell.setCol((short)(colCount-1)); 178 179 // scan last column j for gap, gap in seqB 180 // do not penalyze gaps 181 for (int k=1;k<=rowCount;k++) { 182 int val = aligmat[rowCount-k][colCount].getValue(); 183 if ( val>currentCell.getValue()){ 184 currentCell.setValue(val); 185 //System.out.println("setting row to " + (rowCount ) ); 186 currentCell.setRow((short)(rowCount-k )); 187 currentCell.setCol((short)(colCount )); 188 } 189 } 190 191 // scan row i for gap, gap in SeqA 192 // do not penalyze gaps 193 for (int k=1;k<=colCount;k++) { 194 int val = aligmat[rowCount][colCount-k].getValue(); 195 if ( val > currentCell.getValue()){ 196 currentCell.setValue(val); 197 currentCell.setRow((short) (rowCount ) ); 198 currentCell.setCol((short)(colCount-k )); 199 } 200 } 201 a.setScore(aligmat[rowDim-1][colDim-1].getValue() / (float)ALIGFACTOR); 202 setPath(); 203 204 } 205 206 private void setPath(){ 207 208 // dmyersturnbull: TODO fix exception handling and logging 209 210 int n; 211 IndexPair[] backId = new IndexPair[a.getRows()+1+a.getCols()+1]; 212 List<IndexPair> path = new ArrayList<>(); 213 214 backId[0] = new IndexPair((short)(a.getRows()),(short)(a.getCols())); 215 216 // backtrace, get backId indices, the indices in diagonal store in path 217 218 219 int pathsize = 0; 220 221 AligMatEl[][] aligmat = a.getAligMat(); 222 223 224 n=1; 225 while ( (backId[n-1].getRow()>=1) && 226 (backId[n-1].getCol()>=1) 227 ) 228 { 229 // get backtrace index 230 int x = backId[n-1].getRow(); 231 int y = backId[n-1].getCol(); 232 233 try { 234 235 AligMatEl el = null ; 236 try { 237 el =aligmat[x][y]; 238 } catch(Exception e){ 239 240 241 e.printStackTrace(); 242 for (int f=0; f< n;f++){ 243 System.out.println(backId[f]); 244 } 245 246 } 247 248 if ( el == null) 249 System.out.println("el = null! x:"+ x + " y " + y); 250 backId[n] = el; 251 } catch (Exception e){ 252 e.printStackTrace(); 253 System.out.println("x " + x); 254 System.out.println("y " + y); 255 System.out.println(backId[n-2]); 256 System.exit(0); 257 } 258 // get diagonal indeces into path 259 if (((backId[n-1].getRow() - backId[n].getRow()) == 1) 260 && (( backId[n-1].getCol() - backId[n].getCol()) == 1)) { 261 path.add(backId[n-1]); 262 pathsize++; 263 264 } 265 n++; 266 } 267 268 // path is reverse order.. 269 // switch order 270 IndexPair[] newpath = new IndexPair[pathsize]; 271 for (int i = 0 ; i < pathsize; i++){ 272 IndexPair o = path.get(pathsize-1-i); 273 IndexPair np = new IndexPair((short)(o.getRow()-1),(short)(o.getCol()-1)); 274 newpath[i] = np; 275 } 276 277 a.setPath(newpath); 278 a.setPathSize(pathsize); 279 280 281 } 282}