001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.molbio;
023
024import java.util.ArrayList;
025import java.util.Collection;
026import java.util.HashSet;
027import java.util.List;
028
029import java.util.concurrent.ExecutionException;
030import java.util.logging.Level;
031import java.util.logging.Logger;
032import org.biojava.bio.seq.Sequence;
033import org.biojava.bio.seq.SequenceAnnotator;
034import org.biojava.bio.seq.impl.ViewSequence;
035import org.biojava.utils.ThreadPool;
036import java.util.concurrent.Callable;
037import java.util.concurrent.ExecutorService;
038import java.util.concurrent.Future;
039
040/**
041 * <p><code>RestrictionMapper</code> is a class for annotating
042 * <code>Sequence</code>s with <code>Feature</code>s which represent
043 * restriction sites. Calling <code>annotate(Sequence sequence)</code>
044 * will annotate the <code>Sequence</code> with the sites of any
045 * <code>RestrictionEnzyme</code>s which have been added to the
046 * <code>RestrictionMapper</code>. The returned <code>Sequence</code>
047 * is a <code>ViewSequence</code> wrapping the original.</p>
048 *
049 * <p>The <code>Feature</code>s created are
050 * <code>RestrictionSite</code>s which have a flyweight
051 * <code>Annotation</code> containing a single <code>String</code>
052 * property "dbxref" whose value is "REBASE:" plus name of the enzyme
053 * (e.g. EcoRI).</p>
054 *
055 * <p>The mapper will by default map only those sites which have both
056 * their recognition sites and their cut sites within the
057 * <code>Sequence</code>. This behaviour may be changed to map all
058 * sites which have their recognition sites within the
059 * <code>Sequence</code> using the <code>setMapAll(boolean
060 * on)</code> method.</p>
061 *
062 * <p>The current implementation requires that
063 * <code>RestrictionEnzyme</code>s to be searched must first be
064 * registered with the <code>RestrictionEnzymeManager</code>.</p>
065 *
066 * @author Keith James
067 * @since 1.3
068 */
069public class RestrictionMapper implements SequenceAnnotator
070{
071    /**
072     * <code>SITE_FEATURE_SOURCE</code> the source <code>String</code>
073     * used by <code>RestrictionMapper</code> when creating
074     * restriction site <code>Feature</code>s. This is the
075     * <code>String</code> which is returned when a
076     * <code>Feature</code>'s <code>getSource()</code> method is
077     * called.
078     */
079    public static final String SITE_FEATURE_SOURCE = "regex";
080
081    /**
082     * <code>SITE_FEATURE_TYPE</code> the type <code>String</code>
083     * used by <code>RestrictionMapper</code> when creating
084     * restriction site <code>Feature</code>s. This is the
085     * <code>String</code> which is returned when a
086     * <code>Feature</code>'s <code>getType()</code> method is called.
087     */
088    public static final String SITE_FEATURE_TYPE = "misc_binding";
089
090    private List restrictionEnzymes;
091    private boolean mapAll;
092    private ThreadPool threadPool;
093    private ExecutorService tserv;
094
095    /**
096     * <p>Creates a new <code>RestrictionMapper</code> which will use
097     * the specified <code>ThreadPool</code>. Do not share one pool
098     * between a number of <code>RestrictionMapper</code>s because
099     * <code>annotate(Sequence sequence)</code> waits for all threads
100     * in the pool to finish work before returning and this will lead
101     * to a race condition between mappers. One mapper could end up
102     * waiting for another mapper's threads before returning.</p>
103     *
104     * @param threadPool a <code>ThreadPool</code>.
105     */
106    public RestrictionMapper(ThreadPool threadPool) {
107        this();
108        this.threadPool = threadPool;
109    } 
110    
111    /**
112     * <p>Creates a new <code>RestrictionMapper</code> which will use
113     * the specified <code>ExecutorService</code>.
114     *
115     * @param xser a <code>ExecutorService</code>, e.g. ExecutorService.newCachedThreadPool() 
116     * @since 1.8.1
117     * @author George Waldon
118     */
119    public RestrictionMapper(ExecutorService xser) {
120        this();
121        tserv = xser;
122    }
123    
124    private RestrictionMapper() {
125        restrictionEnzymes = new ArrayList();
126        mapAll = false;
127    }
128
129    /**
130     * <code>annotate</code> adds <code>Feature</code>s which
131     * represent restriction sites.
132     *
133     * @param sequence a <code>Sequence</code>.
134     *
135     * @return a <code>Sequence</code> view with restriction sites
136     * marked.
137     */
138    public Sequence annotate(Sequence sequence)
139    {
140        Sequence mapped = new ViewSequence(sequence);
141
142        if (tserv == null) {
143            for (int i = 0; i < restrictionEnzymes.size(); i++) {
144                RestrictionEnzyme enzyme =
145                        (RestrictionEnzyme) restrictionEnzymes.get(i);
146                threadPool.addRequest(new RestrictionSiteFinder(enzyme,
147                        mapAll,
148                        mapped));
149            }
150
151            // Threads will finish work and become idle
152            threadPool.waitForThreads();
153        } else {
154            Collection<CallableSiteFinder> taskPool = new HashSet<CallableSiteFinder>();
155            for (int i = 0; i < restrictionEnzymes.size(); i++) {
156                RestrictionEnzyme enzyme =
157                        (RestrictionEnzyme) restrictionEnzymes.get(i);
158                taskPool.add(new CallableSiteFinder(new RestrictionSiteFinder(enzyme,
159                        mapAll,
160                        mapped)));
161            }
162            List<Future<RestrictionEnzyme>> lFut;
163            try {
164                lFut = tserv.invokeAll(taskPool);
165                for (Future<RestrictionEnzyme> finder : lFut) {
166                    RestrictionEnzyme re = null;
167                    try {
168                        re = finder.get();
169                    } catch (ExecutionException ex) {
170                        Logger.getLogger(RestrictionMapper.class.getName()).log(Level.SEVERE, 
171                                "An error occurred during the mapping with the enzyme: " 
172                                + re!=null? re.toString():"unknown" 
173                                + " of the sequence: "
174                                + sequence.getName(), ex);
175                    }
176                }
177            } catch (InterruptedException ex) {
178                Logger.getLogger(RestrictionMapper.class.getName()).log(
179                        Level.SEVERE, 
180                        "Restriction mapping interrupted for sequence: "+sequence.getName(), 
181                        ex);
182            }
183        }
184
185        return mapped;
186    }
187
188    /**
189     * <code>getMapAll</code> returns whether all sites should be
190     * marked, including those which have recognition sites within the
191     * sequence, but cut outside it. The default is false, indicating
192     * only sites which can actually be cut are mapped.
193     *
194     * @return a <code>boolean</code>.
195     */
196    public boolean getMapAll()
197    {
198        return mapAll;
199    }
200
201    /**
202     * <code>setMapAll</code> sets whether all sites should be marked,
203     * including those which have recognition sites within the
204     * sequence, but cut outside it. The default is false, indicating
205     * only sites which can actually be cut are mapped.
206     *
207     * @param on a <code>boolean</code>.
208     */
209    public void setMapAll(boolean on)
210    {
211        mapAll = on;
212    }
213
214    /**
215     * <code>addEnzyme</code> adds an enzyme to be searched for in the
216     * <code>Sequence</code>.
217     *
218     * @param enzyme a <code>RestrictionEnzyme</code>.
219     */
220    public void addEnzyme(RestrictionEnzyme enzyme)
221    {
222        if (restrictionEnzymes.contains(enzyme))
223            throw new IllegalArgumentException("RestrictionMapper is already mapping '"
224                                               + enzyme
225                                               + "'");
226        restrictionEnzymes.add(enzyme);
227    }
228
229    /**
230     * <code>removeEnzyme</code> removes an enzyme from those to be
231     * searched for in the <code>Sequence</code>.
232     *
233     * @param enzyme a <code>RestrictionEnzyme</code>.
234     */
235    public void removeEnzyme(RestrictionEnzyme enzyme)
236    {
237        if (! restrictionEnzymes.contains(enzyme))
238            throw new IllegalArgumentException("RestrictionMapper is not mapping '"
239                                               + enzyme
240                                               + "'");
241
242        restrictionEnzymes.remove(enzyme);
243    }
244
245    /**
246     * <code>clearEnzymes</code> removes all enzymes from those to be
247     * searched for in the <code>Sequence</code>.
248     */
249    public void clearEnzymes()
250    {
251        restrictionEnzymes.clear();
252    }
253    
254    private class CallableSiteFinder implements Callable<RestrictionEnzyme> {
255        
256        RestrictionSiteFinder finder;
257        
258        CallableSiteFinder(RestrictionSiteFinder finder) {
259            this.finder = finder;
260        }
261        
262
263        public RestrictionEnzyme call() throws Exception {
264            finder.run();
265            return finder.getEnzyme();
266        }
267        
268    }
269}