001/*
002
003 *                    BioJava development code
004
005 *
006
007 * This code may be freely distributed and modified under the
008
009 * terms of the GNU Lesser General Public Licence.  This should
010
011 * be distributed with the code.  If you do not have a copy,
012
013 * see:
014
015 *
016
017 *      http://www.gnu.org/copyleft/lesser.html
018
019 *
020
021 * Copyright for this code is held jointly by the individual
022
023 * authors.  These should be listed in @author doc comments.
024
025 *
026
027 * For more information on the BioJava project and its aims,
028
029 * or to join the biojava-l mailing list, visit the home page
030
031 * at:
032
033 *
034
035 *      http://www.biojava.org/
036
037 *
038
039 */
040
041
042
043package org.biojava.bio.proteomics;
044
045
046
047import java.util.LinkedList;
048import java.util.List;
049import java.util.ListIterator;
050
051import org.biojava.bio.Annotation;
052import org.biojava.bio.BioException;
053import org.biojava.bio.SmallAnnotation;
054import org.biojava.bio.seq.Feature;
055import org.biojava.bio.seq.Sequence;
056import org.biojava.bio.symbol.Location;
057import org.biojava.bio.symbol.RangeLocation;
058import org.biojava.bio.symbol.Symbol;
059import org.biojava.utils.ChangeVetoException;
060
061
062
063
064
065/**
066 * This class contains methods for calculating the results of proteolytic digestion
067 * of a protein sequence
068 *
069 * <b> this class is not designed to be thread safe </b>
070 *
071 * @author Michael Jones
072 * @author Mark Schreiber (refactoring, some documentation)
073 */
074
075public class Digest {
076
077
078
079    private Protease protease;
080
081
082
083    private Sequence sequence;
084
085
086
087    private int maxMissedCleavages = 0;
088
089
090
091    public static String PEPTIDE_FEATURE_TYPE = "Peptide";
092
093
094
095    private LinkedList peptideQue;
096
097    /** Creates a new Digest Bean*/
098
099    public Digest() {
100
101        /*try{
102
103            protease = new Protease();
104
105        }catch (Exception e){
106
107            //Should never happen
108
109            e.printStackTrace();
110
111            }*/
112
113    }
114
115
116
117    public void setProtease(Protease protease) {
118
119        this.protease = protease;
120
121    }
122
123
124
125    public void setSequence(Sequence sequence) {
126
127        this.sequence = sequence;
128
129    }
130
131
132
133    public Sequence getSequence() {
134
135        return sequence;
136
137    }
138
139
140    /**
141     * Sets the maximum number of partial digest products to be annotated.
142     * @param maxMissedCleavages the max number of partial digest products
143     */
144    public void setMaxMissedCleavages(int maxMissedCleavages) {
145
146        this.maxMissedCleavages = maxMissedCleavages;
147
148    }
149
150
151
152    /** Adds peptides as features to the Sequence in this class. The feature will
153     * contain a small annotation specifying the protease with the key "protease".
154
155     * For Example:
156
157     * <PRE>
158
159     *
160
161     *         Sequence sequence = ...
162
163     *         Digest bioJavaDigest = new Digest();
164
165     *
166
167     *         bioJavaDigest.setMaxMissedCleavages(2);
168
169     *         bioJavaDigest.setProtease(ProteaseManager.getProteaseByName(Protease.ASP_N));
170
171     *         bioJavaDigest.setSequence(sequence);
172
173     *         bioJavaDigest.addDigestFeatures();
174
175     * </PRE>
176
177     * @throws BioException if the Protease or Sequence are null.
178
179     */
180
181    public void addDigestFeatures() throws BioException, ChangeVetoException {
182
183        peptideQue = new LinkedList();
184
185        if(protease == null){
186            throw new BioException("Protease is null, use Digest.setProtease()");
187        }
188        if(sequence == null){
189            throw new BioException("Sequence is null, use Digest.setSequence()");
190        }
191
192
193        List cleaveSites = protease.getCleaveageResidues().toList();
194        boolean endoProtease = protease.isEndoProtease();
195
196        List notCleave = protease.getNotCleaveResidues().toList();
197        //Returns null if the list is empty
198
199        if(notCleave == null){
200            notCleave = new LinkedList();
201        }
202
203        int nTerm = 1;
204
205        if(cleaveSites == null || notCleave == null){
206            throw new BioException("Protease contains null parameter");
207        }
208
209
210        for (int j = 1; j <= sequence.length(); j++) {
211            Symbol aa = sequence.symbolAt(j);
212
213            if(cleaveSites.contains(aa)){
214                if (endoProtease) {
215                    boolean cleave = true;
216                    if (j < sequence.length())  {
217                        Symbol nextAA = sequence.symbolAt(j+1);
218                        if(notCleave.contains(nextAA)){
219                            cleave = false;
220                        }
221                    }
222
223                    if (cleave)  {
224                        Location loc = new RangeLocation(nTerm, j);
225                        peptideQue.add(loc);
226                        nTerm = j + 1;
227                    }
228
229                } else {
230                    if (j > 1) {
231                        Location loc = new RangeLocation(nTerm, j-1);
232                        peptideQue.add(loc);
233                        //System.out.println(peptideQue);
234                        nTerm = j;
235                    }
236                }
237            }
238        }
239
240        if (nTerm <= sequence.length()) {
241            Location loc = new RangeLocation(nTerm, sequence.length());
242            peptideQue.add(loc);
243        }
244
245        addMissedCleavages();
246
247        //Now add the locations as Peptide freatures to the Sequence
248        for(ListIterator li = peptideQue.listIterator(); li.hasNext(); ){
249            createPeptideFeature((Location)li.next());
250        }
251    }
252
253
254
255    private void addMissedCleavages() throws BioException {
256        LinkedList missedList = new LinkedList();
257
258        if(maxMissedCleavages>0){
259            for(ListIterator li = peptideQue.listIterator(); li.hasNext(); ){
260                Location loc = (Location)li.next();
261                Location loc2 = null;
262                int min = loc.getMin();
263                int max = 0;
264
265                //Get the numMissedCleavages location ahead of the current location
266                int numAdvanced = 0;
267                for(int i=0; i<maxMissedCleavages; i++){
268                    if(li.hasNext()) {
269                        numAdvanced++;
270                        loc2 = ((Location)li.next());
271                        max = loc2.getMax();
272                        missedList.add(new RangeLocation(min, max));
273                    }
274                }
275                //Revert back to the original location
276                for(int i=0; i<numAdvanced; i++){
277                    loc = ((Location)li.previous());
278                }
279            }
280
281            //Add all the missed peptides to the overall list
282            peptideQue.addAll(missedList);
283        }
284    }
285
286
287
288    private void createPeptideFeature(Location loc)
289
290    throws BioException, ChangeVetoException {
291        Annotation anno = new SmallAnnotation();
292        anno.setProperty("Protease",this.protease.getName());
293
294        Feature.Template template = new Feature.Template();
295
296        template.type = PEPTIDE_FEATURE_TYPE;
297
298        template.source = this.getClass().getName();
299
300        template.location = loc;
301
302        template.annotation = anno;
303
304        sequence.createFeature(template);
305
306    }
307
308}
309