001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.molbio; 023 024import java.util.ArrayList; 025import java.util.Collection; 026import java.util.HashSet; 027import java.util.List; 028 029import java.util.concurrent.ExecutionException; 030import java.util.logging.Level; 031import java.util.logging.Logger; 032import org.biojava.bio.seq.Sequence; 033import org.biojava.bio.seq.SequenceAnnotator; 034import org.biojava.bio.seq.impl.ViewSequence; 035import org.biojava.utils.ThreadPool; 036import java.util.concurrent.Callable; 037import java.util.concurrent.ExecutorService; 038import java.util.concurrent.Future; 039 040/** 041 * <p><code>RestrictionMapper</code> is a class for annotating 042 * <code>Sequence</code>s with <code>Feature</code>s which represent 043 * restriction sites. Calling <code>annotate(Sequence sequence)</code> 044 * will annotate the <code>Sequence</code> with the sites of any 045 * <code>RestrictionEnzyme</code>s which have been added to the 046 * <code>RestrictionMapper</code>. The returned <code>Sequence</code> 047 * is a <code>ViewSequence</code> wrapping the original.</p> 048 * 049 * <p>The <code>Feature</code>s created are 050 * <code>RestrictionSite</code>s which have a flyweight 051 * <code>Annotation</code> containing a single <code>String</code> 052 * property "dbxref" whose value is "REBASE:" plus name of the enzyme 053 * (e.g. EcoRI).</p> 054 * 055 * <p>The mapper will by default map only those sites which have both 056 * their recognition sites and their cut sites within the 057 * <code>Sequence</code>. This behaviour may be changed to map all 058 * sites which have their recognition sites within the 059 * <code>Sequence</code> using the <code>setMapAll(boolean 060 * on)</code> method.</p> 061 * 062 * <p>The current implementation requires that 063 * <code>RestrictionEnzyme</code>s to be searched must first be 064 * registered with the <code>RestrictionEnzymeManager</code>.</p> 065 * 066 * @author Keith James 067 * @since 1.3 068 */ 069public class RestrictionMapper implements SequenceAnnotator 070{ 071 /** 072 * <code>SITE_FEATURE_SOURCE</code> the source <code>String</code> 073 * used by <code>RestrictionMapper</code> when creating 074 * restriction site <code>Feature</code>s. This is the 075 * <code>String</code> which is returned when a 076 * <code>Feature</code>'s <code>getSource()</code> method is 077 * called. 078 */ 079 public static final String SITE_FEATURE_SOURCE = "regex"; 080 081 /** 082 * <code>SITE_FEATURE_TYPE</code> the type <code>String</code> 083 * used by <code>RestrictionMapper</code> when creating 084 * restriction site <code>Feature</code>s. This is the 085 * <code>String</code> which is returned when a 086 * <code>Feature</code>'s <code>getType()</code> method is called. 087 */ 088 public static final String SITE_FEATURE_TYPE = "misc_binding"; 089 090 private List restrictionEnzymes; 091 private boolean mapAll; 092 private ThreadPool threadPool; 093 private ExecutorService tserv; 094 095 /** 096 * <p>Creates a new <code>RestrictionMapper</code> which will use 097 * the specified <code>ThreadPool</code>. Do not share one pool 098 * between a number of <code>RestrictionMapper</code>s because 099 * <code>annotate(Sequence sequence)</code> waits for all threads 100 * in the pool to finish work before returning and this will lead 101 * to a race condition between mappers. One mapper could end up 102 * waiting for another mapper's threads before returning.</p> 103 * 104 * @param threadPool a <code>ThreadPool</code>. 105 */ 106 public RestrictionMapper(ThreadPool threadPool) { 107 this(); 108 this.threadPool = threadPool; 109 } 110 111 /** 112 * <p>Creates a new <code>RestrictionMapper</code> which will use 113 * the specified <code>ExecutorService</code>. 114 * 115 * @param xser a <code>ExecutorService</code>, e.g. ExecutorService.newCachedThreadPool() 116 * @since 1.8.1 117 * @author George Waldon 118 */ 119 public RestrictionMapper(ExecutorService xser) { 120 this(); 121 tserv = xser; 122 } 123 124 private RestrictionMapper() { 125 restrictionEnzymes = new ArrayList(); 126 mapAll = false; 127 } 128 129 /** 130 * <code>annotate</code> adds <code>Feature</code>s which 131 * represent restriction sites. 132 * 133 * @param sequence a <code>Sequence</code>. 134 * 135 * @return a <code>Sequence</code> view with restriction sites 136 * marked. 137 */ 138 public Sequence annotate(Sequence sequence) 139 { 140 Sequence mapped = new ViewSequence(sequence); 141 142 if (tserv == null) { 143 for (int i = 0; i < restrictionEnzymes.size(); i++) { 144 RestrictionEnzyme enzyme = 145 (RestrictionEnzyme) restrictionEnzymes.get(i); 146 threadPool.addRequest(new RestrictionSiteFinder(enzyme, 147 mapAll, 148 mapped)); 149 } 150 151 // Threads will finish work and become idle 152 threadPool.waitForThreads(); 153 } else { 154 Collection<CallableSiteFinder> taskPool = new HashSet<CallableSiteFinder>(); 155 for (int i = 0; i < restrictionEnzymes.size(); i++) { 156 RestrictionEnzyme enzyme = 157 (RestrictionEnzyme) restrictionEnzymes.get(i); 158 taskPool.add(new CallableSiteFinder(new RestrictionSiteFinder(enzyme, 159 mapAll, 160 mapped))); 161 } 162 List<Future<RestrictionEnzyme>> lFut; 163 try { 164 lFut = tserv.invokeAll(taskPool); 165 for (Future<RestrictionEnzyme> finder : lFut) { 166 RestrictionEnzyme re = null; 167 try { 168 re = finder.get(); 169 } catch (ExecutionException ex) { 170 Logger.getLogger(RestrictionMapper.class.getName()).log(Level.SEVERE, 171 "An error occurred during the mapping with the enzyme: " 172 + re!=null? re.toString():"unknown" 173 + " of the sequence: " 174 + sequence.getName(), ex); 175 } 176 } 177 } catch (InterruptedException ex) { 178 Logger.getLogger(RestrictionMapper.class.getName()).log( 179 Level.SEVERE, 180 "Restriction mapping interrupted for sequence: "+sequence.getName(), 181 ex); 182 } 183 } 184 185 return mapped; 186 } 187 188 /** 189 * <code>getMapAll</code> returns whether all sites should be 190 * marked, including those which have recognition sites within the 191 * sequence, but cut outside it. The default is false, indicating 192 * only sites which can actually be cut are mapped. 193 * 194 * @return a <code>boolean</code>. 195 */ 196 public boolean getMapAll() 197 { 198 return mapAll; 199 } 200 201 /** 202 * <code>setMapAll</code> sets whether all sites should be marked, 203 * including those which have recognition sites within the 204 * sequence, but cut outside it. The default is false, indicating 205 * only sites which can actually be cut are mapped. 206 * 207 * @param on a <code>boolean</code>. 208 */ 209 public void setMapAll(boolean on) 210 { 211 mapAll = on; 212 } 213 214 /** 215 * <code>addEnzyme</code> adds an enzyme to be searched for in the 216 * <code>Sequence</code>. 217 * 218 * @param enzyme a <code>RestrictionEnzyme</code>. 219 */ 220 public void addEnzyme(RestrictionEnzyme enzyme) 221 { 222 if (restrictionEnzymes.contains(enzyme)) 223 throw new IllegalArgumentException("RestrictionMapper is already mapping '" 224 + enzyme 225 + "'"); 226 restrictionEnzymes.add(enzyme); 227 } 228 229 /** 230 * <code>removeEnzyme</code> removes an enzyme from those to be 231 * searched for in the <code>Sequence</code>. 232 * 233 * @param enzyme a <code>RestrictionEnzyme</code>. 234 */ 235 public void removeEnzyme(RestrictionEnzyme enzyme) 236 { 237 if (! restrictionEnzymes.contains(enzyme)) 238 throw new IllegalArgumentException("RestrictionMapper is not mapping '" 239 + enzyme 240 + "'"); 241 242 restrictionEnzymes.remove(enzyme); 243 } 244 245 /** 246 * <code>clearEnzymes</code> removes all enzymes from those to be 247 * searched for in the <code>Sequence</code>. 248 */ 249 public void clearEnzymes() 250 { 251 restrictionEnzymes.clear(); 252 } 253 254 private class CallableSiteFinder implements Callable<RestrictionEnzyme> { 255 256 RestrictionSiteFinder finder; 257 258 CallableSiteFinder(RestrictionSiteFinder finder) { 259 this.finder = finder; 260 } 261 262 263 public RestrictionEnzyme call() throws Exception { 264 finder.run(); 265 return finder.getEnzyme(); 266 } 267 268 } 269}