001
002
003
004package org.biojava.utils.regex;
005
006import java.util.HashSet;
007import java.util.Iterator;
008import java.util.Set;
009
010import org.biojava.bio.symbol.FiniteAlphabet;
011import org.biojava.bio.symbol.IllegalAlphabetException;
012import org.biojava.bio.symbol.IllegalSymbolException;
013import org.biojava.bio.symbol.Symbol;
014import org.biojava.bio.symbol.SymbolList;
015
016/**
017 * A utility class to make searching a Sequence with many regex patterns
018 * easier.
019 * @author David Huen
020 * @since 1.4
021 */
022public class Search
023{
024    /**
025     * Interface for a class that will recieve match information
026     * from this class.
027     */
028    public interface Listener
029    {
030        /**
031         * @param seq Sequence on which the search was conducted.
032         * @param pattern Pattern object used to conduct search.
033         * @param start start coordinate of match.
034         * @param end end of match plus one.
035         * @return if false, it indicates the Listener will not accept further data.
036         */
037        public boolean reportMatch(SymbolList seq, Pattern pattern, int start, int end);
038    }
039
040    private class PatternInfo
041    {
042        private String patternString;
043        private Pattern pattern = null;
044        private Matcher matcher = null;
045        private boolean overlap;
046
047        public boolean equals(Object o)
048        {
049            if (o instanceof PatternInfo) {
050                PatternInfo other = (PatternInfo) o;
051                return patternString.equals(other.patternString);
052            }
053            else
054                return false;
055        }
056
057        public int hashCode()
058        {
059            return patternString.hashCode();
060        }
061    }
062
063    private Listener listener = null;
064    private PatternFactory factory;
065    private Set patterns = new HashSet();
066
067    public Search(FiniteAlphabet alfa)
068    {
069        factory = PatternFactory.makeFactory(alfa);
070    }
071
072    public void setListener(Listener listener)
073    {
074        this.listener = listener;
075    }
076
077    /**
078     * add a search pattern to the searches to be conducted
079     * by this object.
080     * @param patternString String representation of the pattern.
081     * @param overlap if true, the search continues at the base following the start to the previous hit.
082     * If false, it continues at the base after the existing hit.
083     * @throws RegexException if the requested pattern is not valid
084     * @throws IllegalAlphabetException if the requested pattern is not valid
085     */
086    public void addPattern(String patternString, boolean overlap)
087        throws RegexException, IllegalAlphabetException
088    {
089        Pattern pattern = factory.compile(patternString);
090        PatternInfo info = new PatternInfo();
091        info.patternString = patternString;
092        info.pattern = pattern;
093        info.overlap = overlap;
094        patterns.add(info);
095    }
096
097    /**
098     * add a search pattern to the searches to be conducted
099     * by this object.
100     * @param patternString String representation of the pattern.
101     * @param overlap if true, the search continues at the base following the start to the previous hit.
102     * If false, it continues at the base after the existing hit.
103     * @throws RegexException if the requested pattern is not valid
104     * @throws IllegalAlphabetException if the requested pattern is not valid
105     */
106    public void addPattern(String label, String patternString, boolean overlap)
107        throws RegexException, IllegalAlphabetException
108    {
109        Pattern pattern = factory.compile(patternString, label);
110        PatternInfo info = new PatternInfo();
111        info.patternString = patternString;
112        info.pattern = pattern;
113        info.overlap = overlap;
114        patterns.add(info);
115    }
116
117    /**
118     * remove all patterns from the pattern cache.
119     */
120    public void clearPatterns()
121    {
122        patterns.clear();
123    }
124
125    public char charValue(Symbol sym)
126        throws IllegalSymbolException
127    {
128        return factory.charValue(sym);
129    }
130
131    /**
132     * search the Sequence with the patterns already registered with this object.
133     */
134    public void search(SymbolList seq)
135    {
136        search(seq, 1, seq.length());
137    }
138
139    /**
140     * search part of the SymbolList with the patterns already registered with this object.
141     * @param loLimit low limit of search range.
142     * @param hiLimit high limit of search range.
143     */
144    public void search(SymbolList seq, int loLimit, int hiLimit)
145    {
146        for (Iterator patternsI = patterns.iterator(); patternsI.hasNext(); ) {
147            PatternInfo info = (PatternInfo) patternsI.next();
148            if (info.matcher == null) {
149                info.matcher = info.pattern.matcher(seq);
150            }
151            else {
152                info.matcher = info.matcher.reset(seq);
153            }
154
155            // now exhaustively search the sequence
156            int begin = loLimit;
157            while (info.matcher.find(begin)) {
158                // got a hit
159                int start = info.matcher.start();
160                int end = info.matcher.end();
161                if ((listener != null) && (start <= hiLimit)) 
162                    if (!listener.reportMatch(seq, info.pattern, start, end)) return;
163                // compute where next search begins
164                if (info.overlap)
165                    begin = start + 1;
166                else
167                    begin = end;
168                if (begin >= hiLimit) break;
169            }
170        }
171    }
172}
173