001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.seq; 022 023 024import org.biojava.nbio.alignment.Alignments; 025import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType; 026import org.biojava.nbio.alignment.SimpleGapPenalty; 027import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper; 028import org.biojava.nbio.alignment.template.GapPenalty; 029import org.biojava.nbio.alignment.template.PairwiseSequenceAligner; 030import org.biojava.nbio.core.alignment.template.SequencePair; 031import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 032import org.biojava.nbio.structure.Atom; 033import org.biojava.nbio.structure.Group; 034import org.biojava.nbio.structure.StructureException; 035import org.biojava.nbio.structure.StructureTools; 036import org.biojava.nbio.structure.align.AbstractStructureAlignment; 037import org.biojava.nbio.structure.align.StructureAlignment; 038import org.biojava.nbio.structure.align.ce.CECalculator; 039import org.biojava.nbio.structure.align.ce.CeParameters; 040import org.biojava.nbio.structure.align.ce.ConfigStrucAligParams; 041import org.biojava.nbio.structure.align.ce.UserArgumentProcessor; 042import org.biojava.nbio.structure.align.model.AFPChain; 043import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay; 044import org.biojava.nbio.structure.align.util.ConfigurationException; 045import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 046import org.biojava.nbio.core.sequence.ProteinSequence; 047import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 048import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 049import org.biojava.nbio.core.sequence.template.Compound; 050import org.slf4j.Logger; 051import org.slf4j.LoggerFactory; 052 053 054/** provides a 3D superimposition based on the sequence alignment 055 * 056 * @author Andreas Prlic 057 * 058 */ 059public class SmithWaterman3Daligner extends AbstractStructureAlignment implements StructureAlignment { 060 061 public static final String algorithmName = "Smith-Waterman superposition"; 062 063 private static final Logger logger = LoggerFactory.getLogger(SmithWaterman3Daligner.class); 064 065 /** 066 * version history: 067 * 1.1 - Added more parameters to the command line, including -maxOptRMSD 068 * 1.0 - Initial version 069 */ 070 private static final String version = "1.1"; 071 SmithWaterman3DParameters params; 072 073 public static void main(String[] args) throws ConfigurationException { 074 //args = new String[]{"-pdb1","1cdg.A","-pdb2","1tim.A","-pdbFilePath","/tmp/","-show3d","-printFatCat"}; 075 UserArgumentProcessor processor = new SmithWatermanUserArgumentProcessor(); 076 processor.process(args); 077 } 078 079 public SmithWaterman3Daligner(){ 080 params = new SmithWaterman3DParameters(); 081 } 082 083 @Override 084 public AFPChain align(Atom[] ca1, Atom[] ca2) throws StructureException { 085 if ( params == null) 086 params = new SmithWaterman3DParameters(); 087 return align(ca1,ca2,params); 088 } 089 090 @Override 091 public AFPChain align(Atom[] ca1, Atom[] ca2, Object parameters) 092 throws StructureException { 093 094 if ( parameters == null) { 095 throw new IllegalArgumentException("Got null instead of SmithWaterman3DParameters!"); 096 } 097 if ( ! (parameters instanceof SmithWaterman3DParameters)) 098 throw new IllegalArgumentException("provided parameter object is not of type SmithWaterman3DParameters, but " + parameters.getClass().getName()); 099 100 params = (SmithWaterman3DParameters) parameters; 101 AFPChain afpChain = new AFPChain(algorithmName); 102 103 104 // covert input to sequences 105 String seq1 = StructureTools.convertAtomsToSeq(ca1); 106 String seq2 = StructureTools.convertAtomsToSeq(ca2); 107 108 ProteinSequence s1 = null; 109 ProteinSequence s2 = null; 110 111 try { 112 s1 = new ProteinSequence(seq1); 113 s2 = new ProteinSequence(seq2); 114 } catch (CompoundNotFoundException e){ 115 116 throw new StructureException(e.getMessage(),e); 117 } 118 119 // default blosum62 120 SubstitutionMatrix<AminoAcidCompound> matrix = SubstitutionMatrixHelper.getBlosum65(); 121 122 GapPenalty penalty = new SimpleGapPenalty(); 123 124 penalty.setOpenPenalty(params.getGapOpen()); 125 penalty.setExtensionPenalty(params.getGapExtend()); 126 127 PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> smithWaterman = 128 Alignments.getPairwiseAligner(s1, s2, PairwiseSequenceAlignerType.LOCAL, penalty, matrix); 129 130 SequencePair<ProteinSequence, AminoAcidCompound> pair = smithWaterman.getPair(); 131 132 if (pair.getTarget().toString().isEmpty() || pair.getQuery().toString().isEmpty()) { 133 throw new StructureException("Empty alignment for sequences "+s1+" and "+s2); 134 } 135 136 logger.debug("Smith-Waterman alignment is: "+pair.toString(100)); 137 138 // convert to a 3D alignment... 139 afpChain = convert(ca1,ca2,pair, smithWaterman); 140 141 142 143 return afpChain; 144 } 145 146 /** 147 * Converts a sequence alignment into a structural alignment 148 * @param smithWaterman The sequence aligner 149 * @param ca1 CA atoms from the query sequence 150 * @param ca2 CA atoms from the target sequence 151 * @param smithWaterman pairwise Sequence aligner 152 * @param pair The sequence alignment calculated by aligner 153 * @return an AFPChain encapsulating the alignment in aligPair 154 * @throws StructureException 155 */ 156 private AFPChain convert(Atom[] ca1, Atom[] ca2, SequencePair<ProteinSequence, 157 AminoAcidCompound> pair, PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> smithWaterman) throws StructureException { 158 AFPChain afpChain = new AFPChain(algorithmName); 159 int ca1Length = ca1.length; 160 int ca2Length = ca2.length; 161 162 afpChain.setAlignScore(smithWaterman.getScore()); 163 afpChain.setCa1Length(ca1Length); 164 afpChain.setCa2Length(ca2Length); 165 166 int nrCols = pair.getLength(); 167 int nAtom=0; 168 int nGaps=0; 169 170 Atom[] strBuf1 = new Atom[nrCols]; 171 Atom[] strBuf2 = new Atom[nrCols]; 172 char[] alnseq1 = new char[ca1Length+ca2Length+1]; 173 char[] alnseq2 = new char[ca1Length+ca2Length+1] ; 174 char[] alnsymb = new char[ca1Length+ca2Length+1]; 175 176 Compound gapSymbol = AminoAcidCompoundSet.getAminoAcidCompoundSet().getCompoundForString("-"); 177 178 int pos = 0 ; // aligned positions 179 180 int nrIdent = 0; 181 int nrSim = 0; 182 183 int[] align_se1 = new int[nrCols+1]; 184 int[] align_se2 = new int[nrCols+1]; 185 186 for ( int i = 1 ; i <= nrCols; i ++){ 187 188 int myI = i-1; 189 190 Compound s1 = pair.getCompoundAt(1, i); 191 Compound s2 = pair.getCompoundAt(2,i); 192 193 // alignment is using internal index start at 1... 194 int pos1 = pair.getIndexInQueryAt(i) -1; 195 int pos2 = pair.getIndexInTargetAt(i) -1; 196 197 if ( ( ! s1.equals(gapSymbol) )&& (! s2.equals(gapSymbol))){ 198 strBuf1[nAtom] = ca1[pos1]; 199 strBuf2[nAtom] = ca2[pos2]; 200 // 201 char l1 = getOneLetter(ca1[pos1].getGroup()); 202 char l2 = getOneLetter(ca2[pos2].getGroup()); 203 // 204 alnseq1[myI] = Character.toUpperCase(l1); 205 alnseq2[myI] = Character.toUpperCase(l2); 206 alnsymb[myI] = ' '; 207 // 208 if ( l1 == l2) { 209 nrIdent++; 210 nrSim++; 211 alnsymb[myI] = '|'; 212 213 } else if ( AFPAlignmentDisplay.aaScore(l1, l2) > 0){ 214 215 nrSim++; 216 alnsymb[myI] = ':'; 217 } 218 // 219 align_se1[myI] = pos1; 220 align_se2[myI] = pos2; 221 // 222 pos++; 223 nAtom++; 224 225 } else { 226 // there is a gap at this position 227 nGaps++; 228 229 alnsymb[myI] = ' '; 230 align_se1[myI] = -1; 231 align_se2[myI] = -1; 232 233 if ( s1.equals(gapSymbol)){ 234 alnseq1[myI] = '-'; 235 236 } else { 237 char l1 = getOneLetter(ca1[pos1].getGroup()); 238 alnseq1[myI] = Character.toUpperCase(l1); 239 align_se1[myI] = pos1; 240 } 241 if ( s2.equals(gapSymbol)){ 242 alnseq2[myI] = '-'; 243 244 } else { 245 char l2 = getOneLetter(ca2[pos2].getGroup()); 246 alnseq2[myI] = Character.toUpperCase(l2); 247 align_se2[myI] = pos2; 248 249 } 250 } 251 252 } 253 254 afpChain.setAlnbeg1(pair.getIndexInQueryAt(1)-1); 255 afpChain.setAlnbeg2(pair.getIndexInTargetAt(1)-1); 256 257 afpChain.setGapLen(nGaps); 258 afpChain.setAlnseq1(alnseq1); 259 260 afpChain.setAlnseq2(alnseq2); 261 afpChain.setAlnsymb(alnsymb); 262 263 // CE uses the aligned pairs as reference not the whole alignment including gaps... 264 afpChain.setIdentity(nrIdent*1.0/pos); 265 afpChain.setSimilarity(nrSim*1.0/pos); 266 267 afpChain.setAlnLength(nrCols); 268 afpChain.setOptLength(nAtom); 269 int[] optLen = new int[]{nAtom}; 270 afpChain.setOptLen(optLen); 271 272 if(nAtom<4) 273 return afpChain; 274 275 CeParameters params = new CeParameters(); 276 CECalculator cecalc = new CECalculator(params); 277 278 // here we don't store the rotation matrix for the user! 279 280 double rmsd= cecalc.calc_rmsd(strBuf1, strBuf2, nAtom,true); 281 282 afpChain.setBlockRmsd(new double[]{rmsd}); 283 afpChain.setOptRmsd(new double[]{rmsd}); 284 afpChain.setTotalRmsdOpt(rmsd); 285 afpChain.setChainRmsd(rmsd); 286 287 // let's hijack the CE implementation 288 // and use some utilities from there to 289 // build up the afpChain object 290 291 cecalc.setnAtom(nAtom); 292 293 cecalc.setAlign_se1(align_se1); 294 cecalc.setAlign_se2(align_se2); 295 296 cecalc.setLcmp(nrCols ); 297 298 cecalc.convertAfpChain(afpChain, ca1, ca2); 299 300 afpChain.setAlgorithmName(algorithmName); 301 afpChain.setVersion(version); 302 303 return afpChain; 304 } 305 306 private static char getOneLetter(Group g){ 307 308 if (g==null) return StructureTools.UNKNOWN_GROUP_LABEL; 309 310 return StructureTools.get1LetterCode(g.getPDBName()); 311 312 } 313 314 @Override 315 public String getAlgorithmName() { 316 return algorithmName; 317 } 318 319 @Override 320 public ConfigStrucAligParams getParameters() { 321 return params; 322 } 323 324 @Override 325 public String getVersion() { 326 return version; 327 } 328 329 @Override 330 public void setParameters(ConfigStrucAligParams parameters) { 331 if ( ! (parameters instanceof SmithWaterman3DParameters)) 332 throw new IllegalArgumentException("provided parameter object is not of type SmithWaterman3DParameters"); 333 params = (SmithWaterman3DParameters)parameters; 334 } 335 336 337 338}