001/* 002 003 * BioJava development code 004 005 * 006 007 * This code may be freely distributed and modified under the 008 009 * terms of the GNU Lesser General Public Licence. This should 010 011 * be distributed with the code. If you do not have a copy, 012 013 * see: 014 015 * 016 017 * http://www.gnu.org/copyleft/lesser.html 018 019 * 020 021 * Copyright for this code is held jointly by the individual 022 023 * authors. These should be listed in @author doc comments. 024 025 * 026 027 * For more information on the BioJava project and its aims, 028 029 * or to join the biojava-l mailing list, visit the home page 030 031 * at: 032 033 * 034 035 * http://www.biojava.org/ 036 037 * 038 039 */ 040 041 042 043package org.biojava.bio.proteomics; 044 045 046 047import java.util.LinkedList; 048import java.util.List; 049import java.util.ListIterator; 050 051import org.biojava.bio.Annotation; 052import org.biojava.bio.BioException; 053import org.biojava.bio.SmallAnnotation; 054import org.biojava.bio.seq.Feature; 055import org.biojava.bio.seq.Sequence; 056import org.biojava.bio.symbol.Location; 057import org.biojava.bio.symbol.RangeLocation; 058import org.biojava.bio.symbol.Symbol; 059import org.biojava.utils.ChangeVetoException; 060 061 062 063 064 065/** 066 * This class contains methods for calculating the results of proteolytic digestion 067 * of a protein sequence 068 * 069 * <b> this class is not designed to be thread safe </b> 070 * 071 * @author Michael Jones 072 * @author Mark Schreiber (refactoring, some documentation) 073 */ 074 075public class Digest { 076 077 078 079 private Protease protease; 080 081 082 083 private Sequence sequence; 084 085 086 087 private int maxMissedCleavages = 0; 088 089 090 091 public static String PEPTIDE_FEATURE_TYPE = "Peptide"; 092 093 094 095 private LinkedList peptideQue; 096 097 /** Creates a new Digest Bean*/ 098 099 public Digest() { 100 101 /*try{ 102 103 protease = new Protease(); 104 105 }catch (Exception e){ 106 107 //Should never happen 108 109 e.printStackTrace(); 110 111 }*/ 112 113 } 114 115 116 117 public void setProtease(Protease protease) { 118 119 this.protease = protease; 120 121 } 122 123 124 125 public void setSequence(Sequence sequence) { 126 127 this.sequence = sequence; 128 129 } 130 131 132 133 public Sequence getSequence() { 134 135 return sequence; 136 137 } 138 139 140 /** 141 * Sets the maximum number of partial digest products to be annotated. 142 * @param maxMissedCleavages the max number of partial digest products 143 */ 144 public void setMaxMissedCleavages(int maxMissedCleavages) { 145 146 this.maxMissedCleavages = maxMissedCleavages; 147 148 } 149 150 151 152 /** Adds peptides as features to the Sequence in this class. The feature will 153 * contain a small annotation specifying the protease with the key "protease". 154 155 * For Example: 156 157 * <PRE> 158 159 * 160 161 * Sequence sequence = ... 162 163 * Digest bioJavaDigest = new Digest(); 164 165 * 166 167 * bioJavaDigest.setMaxMissedCleavages(2); 168 169 * bioJavaDigest.setProtease(ProteaseManager.getProteaseByName(Protease.ASP_N)); 170 171 * bioJavaDigest.setSequence(sequence); 172 173 * bioJavaDigest.addDigestFeatures(); 174 175 * </PRE> 176 177 * @throws BioException if the Protease or Sequence are null. 178 179 */ 180 181 public void addDigestFeatures() throws BioException, ChangeVetoException { 182 183 peptideQue = new LinkedList(); 184 185 if(protease == null){ 186 throw new BioException("Protease is null, use Digest.setProtease()"); 187 } 188 if(sequence == null){ 189 throw new BioException("Sequence is null, use Digest.setSequence()"); 190 } 191 192 193 List cleaveSites = protease.getCleaveageResidues().toList(); 194 boolean endoProtease = protease.isEndoProtease(); 195 196 List notCleave = protease.getNotCleaveResidues().toList(); 197 //Returns null if the list is empty 198 199 if(notCleave == null){ 200 notCleave = new LinkedList(); 201 } 202 203 int nTerm = 1; 204 205 if(cleaveSites == null || notCleave == null){ 206 throw new BioException("Protease contains null parameter"); 207 } 208 209 210 for (int j = 1; j <= sequence.length(); j++) { 211 Symbol aa = sequence.symbolAt(j); 212 213 if(cleaveSites.contains(aa)){ 214 if (endoProtease) { 215 boolean cleave = true; 216 if (j < sequence.length()) { 217 Symbol nextAA = sequence.symbolAt(j+1); 218 if(notCleave.contains(nextAA)){ 219 cleave = false; 220 } 221 } 222 223 if (cleave) { 224 Location loc = new RangeLocation(nTerm, j); 225 peptideQue.add(loc); 226 nTerm = j + 1; 227 } 228 229 } else { 230 if (j > 1) { 231 Location loc = new RangeLocation(nTerm, j-1); 232 peptideQue.add(loc); 233 //System.out.println(peptideQue); 234 nTerm = j; 235 } 236 } 237 } 238 } 239 240 if (nTerm <= sequence.length()) { 241 Location loc = new RangeLocation(nTerm, sequence.length()); 242 peptideQue.add(loc); 243 } 244 245 addMissedCleavages(); 246 247 //Now add the locations as Peptide freatures to the Sequence 248 for(ListIterator li = peptideQue.listIterator(); li.hasNext(); ){ 249 createPeptideFeature((Location)li.next()); 250 } 251 } 252 253 254 255 private void addMissedCleavages() throws BioException { 256 LinkedList missedList = new LinkedList(); 257 258 if(maxMissedCleavages>0){ 259 for(ListIterator li = peptideQue.listIterator(); li.hasNext(); ){ 260 Location loc = (Location)li.next(); 261 Location loc2 = null; 262 int min = loc.getMin(); 263 int max = 0; 264 265 //Get the numMissedCleavages location ahead of the current location 266 int numAdvanced = 0; 267 for(int i=0; i<maxMissedCleavages; i++){ 268 if(li.hasNext()) { 269 numAdvanced++; 270 loc2 = ((Location)li.next()); 271 max = loc2.getMax(); 272 missedList.add(new RangeLocation(min, max)); 273 } 274 } 275 //Revert back to the original location 276 for(int i=0; i<numAdvanced; i++){ 277 loc = ((Location)li.previous()); 278 } 279 } 280 281 //Add all the missed peptides to the overall list 282 peptideQue.addAll(missedList); 283 } 284 } 285 286 287 288 private void createPeptideFeature(Location loc) 289 290 throws BioException, ChangeVetoException { 291 Annotation anno = new SmallAnnotation(); 292 anno.setProperty("Protease",this.protease.getName()); 293 294 Feature.Template template = new Feature.Template(); 295 296 template.type = PEPTIDE_FEATURE_TYPE; 297 298 template.source = this.getClass().getName(); 299 300 template.location = loc; 301 302 template.annotation = anno; 303 304 sequence.createFeature(template); 305 306 } 307 308} 309