001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.seq; 022 023import org.biojava.nbio.alignment.Alignments; 024import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType; 025import org.biojava.nbio.alignment.SimpleGapPenalty; 026import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper; 027import org.biojava.nbio.alignment.template.GapPenalty; 028import org.biojava.nbio.alignment.template.PairwiseSequenceAligner; 029import org.biojava.nbio.core.alignment.template.SequencePair; 030import org.biojava.nbio.core.alignment.template.SubstitutionMatrix; 031import org.biojava.nbio.structure.Atom; 032import org.biojava.nbio.structure.Group; 033import org.biojava.nbio.structure.StructureException; 034import org.biojava.nbio.structure.StructureTools; 035import org.biojava.nbio.structure.align.AbstractStructureAlignment; 036import org.biojava.nbio.structure.align.StructureAlignment; 037import org.biojava.nbio.structure.align.ce.CECalculator; 038import org.biojava.nbio.structure.align.ce.CeParameters; 039import org.biojava.nbio.structure.align.ce.ConfigStrucAligParams; 040import org.biojava.nbio.structure.align.ce.UserArgumentProcessor; 041import org.biojava.nbio.structure.align.model.AFPChain; 042import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay; 043import org.biojava.nbio.structure.align.util.AlignmentTools; 044import org.biojava.nbio.structure.align.util.ConfigurationException; 045import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 046import org.biojava.nbio.core.sequence.ProteinSequence; 047import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 048import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 049import org.biojava.nbio.core.sequence.template.Compound; 050import org.slf4j.Logger; 051import org.slf4j.LoggerFactory; 052 053/** 054 * Provides a 3D superimposition of two structures based on their sequence 055 * alignment. 056 * <p> 057 * This algorithm includes a final step to iteratively drop columns of the 058 * alignment until a maximum RMSD threshold of the superimposition, or the 059 * minimum alignment length theshold, are fulfilled, similar to what pymol align 060 * algorithm does. 061 * 062 * @author Andreas Prlic 063 * @author Aleix Lafita 064 * 065 */ 066public class SmithWaterman3Daligner extends AbstractStructureAlignment implements StructureAlignment { 067 068 public static final String algorithmName = "Smith-Waterman superposition"; 069 070 private static final Logger logger = LoggerFactory.getLogger(SmithWaterman3Daligner.class); 071 072 /** 073 * version history: 074 * 1.1 - Added a maxRMSD and minLen parameters 075 * 1.0 - Initial version 076 */ 077 private static final String version = "1.1"; 078 private SmithWaterman3DParameters params; 079 080 public static void main(String[] args) throws ConfigurationException { 081 //args = new String[]{"-pdb1","1cdg.A","-pdb2","1tim.A","-pdbFilePath","/tmp/","-show3d","-printFatCat"}; 082 UserArgumentProcessor processor = new SmithWatermanUserArgumentProcessor(); 083 processor.process(args); 084 } 085 086 public SmithWaterman3Daligner(){ 087 params = new SmithWaterman3DParameters(); 088 } 089 090 @Override 091 public AFPChain align(Atom[] ca1, Atom[] ca2) throws StructureException { 092 if ( params == null) 093 params = new SmithWaterman3DParameters(); 094 return align(ca1,ca2,params); 095 } 096 097 @Override 098 public AFPChain align(Atom[] ca1, Atom[] ca2, Object parameters) 099 throws StructureException { 100 101 if ( parameters == null) { 102 throw new IllegalArgumentException("Got null instead of SmithWaterman3DParameters!"); 103 } 104 if ( ! (parameters instanceof SmithWaterman3DParameters)) 105 throw new IllegalArgumentException("provided parameter object is not of type SmithWaterman3DParameters, but " + parameters.getClass().getName()); 106 107 params = (SmithWaterman3DParameters) parameters; 108 AFPChain afpChain = new AFPChain(algorithmName); 109 110 111 // covert input to sequences 112 String seq1 = StructureTools.convertAtomsToSeq(ca1); 113 String seq2 = StructureTools.convertAtomsToSeq(ca2); 114 115 ProteinSequence s1 = null; 116 ProteinSequence s2 = null; 117 118 try { 119 s1 = new ProteinSequence(seq1); 120 s2 = new ProteinSequence(seq2); 121 } catch (CompoundNotFoundException e){ 122 123 throw new StructureException(e.getMessage(),e); 124 } 125 126 // default blosum62 127 SubstitutionMatrix<AminoAcidCompound> matrix = SubstitutionMatrixHelper.getBlosum65(); 128 129 GapPenalty penalty = new SimpleGapPenalty(); 130 131 penalty.setOpenPenalty(params.getGapOpen()); 132 penalty.setExtensionPenalty(params.getGapExtend()); 133 134 PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> smithWaterman = 135 Alignments.getPairwiseAligner(s1, s2, PairwiseSequenceAlignerType.LOCAL, penalty, matrix); 136 137 SequencePair<ProteinSequence, AminoAcidCompound> pair = smithWaterman.getPair(); 138 139 if (pair.getTarget().toString().isEmpty() || pair.getQuery().toString().isEmpty()) { 140 throw new StructureException("Empty alignment for sequences "+s1+" and "+s2); 141 } 142 143 logger.debug("Smith-Waterman alignment is: "+pair.toString(100)); 144 145 // convert to a 3D alignment... 146 afpChain = convert(ca1,ca2,pair, smithWaterman); 147 148 // Perform an iterative dropping of the columns 149 while (afpChain.getOptLength() > params.getMinLen() 150 && afpChain.getTotalRmsdOpt() > params.getMaxRmsd()) { 151 afpChain = AlignmentTools.deleteHighestDistanceColumn(afpChain, ca1, ca2); 152 } 153 154 return afpChain; 155 } 156 157 /** 158 * Converts a sequence alignment into a structural alignment 159 * @param smithWaterman The sequence aligner 160 * @param ca1 CA atoms from the query sequence 161 * @param ca2 CA atoms from the target sequence 162 * @param smithWaterman pairwise Sequence aligner 163 * @param pair The sequence alignment calculated by aligner 164 * @return an AFPChain encapsulating the alignment in aligPair 165 * @throws StructureException 166 */ 167 private AFPChain convert(Atom[] ca1, Atom[] ca2, SequencePair<ProteinSequence, 168 AminoAcidCompound> pair, PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> smithWaterman) throws StructureException { 169 AFPChain afpChain = new AFPChain(algorithmName); 170 int ca1Length = ca1.length; 171 int ca2Length = ca2.length; 172 173 afpChain.setAlignScore(smithWaterman.getScore()); 174 afpChain.setCa1Length(ca1Length); 175 afpChain.setCa2Length(ca2Length); 176 177 int nrCols = pair.getLength(); 178 int nAtom=0; 179 int nGaps=0; 180 181 Atom[] strBuf1 = new Atom[nrCols]; 182 Atom[] strBuf2 = new Atom[nrCols]; 183 char[] alnseq1 = new char[ca1Length+ca2Length+1]; 184 char[] alnseq2 = new char[ca1Length+ca2Length+1] ; 185 char[] alnsymb = new char[ca1Length+ca2Length+1]; 186 187 Compound gapSymbol = AminoAcidCompoundSet.getAminoAcidCompoundSet().getCompoundForString("-"); 188 189 int pos = 0 ; // aligned positions 190 191 int nrIdent = 0; 192 int nrSim = 0; 193 194 int[] align_se1 = new int[nrCols+1]; 195 int[] align_se2 = new int[nrCols+1]; 196 197 for ( int i = 1 ; i <= nrCols; i ++){ 198 199 int myI = i-1; 200 201 Compound s1 = pair.getCompoundAt(1, i); 202 Compound s2 = pair.getCompoundAt(2,i); 203 204 // alignment is using internal index start at 1... 205 int pos1 = pair.getIndexInQueryAt(i) -1; 206 int pos2 = pair.getIndexInTargetAt(i) -1; 207 208 if ( ( ! s1.equals(gapSymbol) )&& (! s2.equals(gapSymbol))){ 209 strBuf1[nAtom] = ca1[pos1]; 210 strBuf2[nAtom] = ca2[pos2]; 211 // 212 char l1 = getOneLetter(ca1[pos1].getGroup()); 213 char l2 = getOneLetter(ca2[pos2].getGroup()); 214 // 215 alnseq1[myI] = Character.toUpperCase(l1); 216 alnseq2[myI] = Character.toUpperCase(l2); 217 alnsymb[myI] = ' '; 218 // 219 if ( l1 == l2) { 220 nrIdent++; 221 nrSim++; 222 alnsymb[myI] = '|'; 223 224 } else if ( AFPAlignmentDisplay.aaScore(l1, l2) > 0){ 225 226 nrSim++; 227 alnsymb[myI] = ':'; 228 } 229 // 230 align_se1[myI] = pos1; 231 align_se2[myI] = pos2; 232 // 233 pos++; 234 nAtom++; 235 236 } else { 237 // there is a gap at this position 238 nGaps++; 239 240 alnsymb[myI] = ' '; 241 align_se1[myI] = -1; 242 align_se2[myI] = -1; 243 244 if ( s1.equals(gapSymbol)){ 245 alnseq1[myI] = '-'; 246 247 } else { 248 char l1 = getOneLetter(ca1[pos1].getGroup()); 249 alnseq1[myI] = Character.toUpperCase(l1); 250 align_se1[myI] = pos1; 251 } 252 if ( s2.equals(gapSymbol)){ 253 alnseq2[myI] = '-'; 254 255 } else { 256 char l2 = getOneLetter(ca2[pos2].getGroup()); 257 alnseq2[myI] = Character.toUpperCase(l2); 258 align_se2[myI] = pos2; 259 260 } 261 } 262 263 } 264 265 afpChain.setAlnbeg1(pair.getIndexInQueryAt(1)-1); 266 afpChain.setAlnbeg2(pair.getIndexInTargetAt(1)-1); 267 268 afpChain.setGapLen(nGaps); 269 afpChain.setAlnseq1(alnseq1); 270 271 afpChain.setAlnseq2(alnseq2); 272 afpChain.setAlnsymb(alnsymb); 273 274 // CE uses the aligned pairs as reference not the whole alignment including gaps... 275 afpChain.setIdentity(nrIdent*1.0/pos); 276 afpChain.setSimilarity(nrSim*1.0/pos); 277 278 afpChain.setAlnLength(nrCols); 279 afpChain.setOptLength(nAtom); 280 int[] optLen = new int[]{nAtom}; 281 afpChain.setOptLen(optLen); 282 283 if(nAtom<4) 284 return afpChain; 285 286 CeParameters params = new CeParameters(); 287 CECalculator cecalc = new CECalculator(params); 288 289 // here we don't store the rotation matrix for the user! 290 291 double rmsd= cecalc.calc_rmsd(strBuf1, strBuf2, nAtom,true); 292 293 afpChain.setBlockRmsd(new double[]{rmsd}); 294 afpChain.setOptRmsd(new double[]{rmsd}); 295 afpChain.setTotalRmsdOpt(rmsd); 296 afpChain.setChainRmsd(rmsd); 297 298 // let's hijack the CE implementation 299 // and use some utilities from there to 300 // build up the afpChain object 301 302 cecalc.setnAtom(nAtom); 303 304 cecalc.setAlign_se1(align_se1); 305 cecalc.setAlign_se2(align_se2); 306 307 cecalc.setLcmp(nrCols ); 308 309 cecalc.convertAfpChain(afpChain, ca1, ca2); 310 311 afpChain.setAlgorithmName(algorithmName); 312 afpChain.setVersion(version); 313 314 return afpChain; 315 } 316 317 private static char getOneLetter(Group g){ 318 319 if (g==null) return StructureTools.UNKNOWN_GROUP_LABEL; 320 321 return StructureTools.get1LetterCode(g.getPDBName()); 322 323 } 324 325 @Override 326 public String getAlgorithmName() { 327 return algorithmName; 328 } 329 330 @Override 331 public ConfigStrucAligParams getParameters() { 332 return params; 333 } 334 335 @Override 336 public String getVersion() { 337 return version; 338 } 339 340 @Override 341 public void setParameters(ConfigStrucAligParams parameters) { 342 if ( ! (parameters instanceof SmithWaterman3DParameters)) 343 throw new IllegalArgumentException("provided parameter object is not of type SmithWaterman3DParameters"); 344 params = (SmithWaterman3DParameters)parameters; 345 } 346 347}