001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.seq.io;
023
024import org.biojava.bio.BioException;
025import org.biojava.bio.SimpleAnnotation;
026import org.biojava.bio.seq.Feature;
027import org.biojava.bio.seq.StrandedFeature;
028
029/**
030 * Simple parser for feature tables. This is shared between the EMBL
031 * and GENBANK format readers.
032 *
033 * @author Thomas Down
034 * @author Matthew Pocock
035 * @author Greg Cox
036 * @author Keith James
037 * @deprecated Use org.biojavax.bio.seq.io framework instead
038 */
039
040/*
041 * Greg Cox: Changed private fields and methods to protected so that
042 *           SwissProtFeatureTableParser could subclass and snag the
043 *           implementation.
044 *
045 * Thomas Down: Post 1.1, finally got round to refactoring this to be
046 *              a `nice' player in the newio world.  Needless to say,
047 *              this simplified things quite a bit.
048 *
049 * Keith James: Added support for reading fuzzy i.e. (123.567)
050 *              locations in addition to unbounded i.e. <123..567
051 *              locations.
052 */
053
054public class FeatureTableParser {
055    private final static int   WITHOUT = 0;
056    private final static int    WITHIN = 1;
057    private final static int  LOCATION = 2;
058    private final static int ATTRIBUTE = 3;
059
060    private int featureStatus = WITHOUT;
061    private StringBuffer featureBuf;
062    private Feature.Template featureTemplate;
063
064    private String                 featureSource;
065    private SeqIOListener          listener;
066    private EmblLikeLocationParser locParser;
067    private String                 seqID;
068
069    FeatureTableParser(SeqIOListener listener, String source) {
070        this.listener      = listener;
071        this.featureSource = source;
072        //this.seqID = seqID;
073
074        featureBuf = new StringBuffer();
075        locParser  = new EmblLikeLocationParser(seqID);
076    }
077
078    public void setSeqID(String seqID) {
079      this.seqID = seqID;
080    }
081
082    //
083    // Interface which the processors use to call us
084    //
085
086    public void startFeature(String type) throws BioException {
087        featureStatus = LOCATION;
088        featureBuf.setLength(0);
089
090        if (this.featureSource.equals("RefSeq:Protein")) {
091            featureTemplate= new Feature.Template();
092        }
093        else {
094            featureTemplate = new StrandedFeature.Template();
095        }
096        featureTemplate.type = type;
097        featureTemplate.source = featureSource;
098        featureTemplate.annotation = new SimpleAnnotation();
099    }
100
101    public void featureData(String line) throws BioException {
102        switch (featureStatus) {
103            case LOCATION:
104                featureBuf.append(line);
105                if (countChar(featureBuf, '(') == countChar(featureBuf, ')')) {
106                    featureTemplate = locParser.parseLocation(featureBuf.substring(0), featureTemplate);
107                    listener.startFeature(featureTemplate);
108                    featureStatus = WITHIN;
109                }
110                break;
111
112            case WITHIN:
113                if (line.charAt(0) == '/') {
114                    // System.out.println("got '/', quotes = " + countChar(line, '"'));
115                    // attribute either is unquoted and on one line or
116                    // is quoted, and must start & end with a quote
117                    //
118                    // we assume that no attributes have embedded quotes
119                    int eq = line.indexOf("=");
120                    if (line.charAt(eq + 1) != '"' ||
121                        line.charAt(line.length() - 1) == '"'
122                    ) {
123                        processAttribute(line);
124                    } else {
125                        featureBuf.setLength(0);
126                        featureBuf.append(line);
127                        featureStatus = ATTRIBUTE;
128                    }
129                } else {
130                    throw new BioException("Invalid line in feature body: " + line);
131                }
132                break;
133
134            case ATTRIBUTE:
135                // If the attribute contains whitespace it probably
136                // consists of whitespace-delimited words. Therefore a
137                // space should be inserted at EOL otherwise words will
138                // get fused (unless there is a space already there)
139                if (((featureBuf.toString().indexOf(" ") >= 0) ||
140                     (line.toString().indexOf(" ") >= 0)) &&
141                    featureBuf.toString().charAt(featureBuf.length()-1) != ' '){
142                    featureBuf.append(" ");
143                }
144                featureBuf.append(line);
145                
146
147                int eq = featureBuf.toString().indexOf("=");
148                if (featureBuf.charAt(eq + 1) != '"' ||
149                    featureBuf.charAt(featureBuf.length() - 1) == '"'
150                ) {
151                    processAttribute(featureBuf.substring(0));
152                    featureStatus = WITHIN;
153                }
154                break;
155        }
156    }
157
158    public void endFeature()
159        throws BioException {
160        listener.endFeature();
161        featureStatus = WITHOUT;
162    }
163
164    public boolean inFeature() {
165        return (featureStatus != WITHOUT);
166    }
167
168    /**
169     * Process the a string corresponding to a feature-table
170     * attribute, and fire it off to our listener.
171     */
172    private void processAttribute(String attr) throws BioException {
173        // System.err.println(attr);
174        int eqPos = attr.indexOf('=');
175        if (eqPos == -1) {
176            listener.addFeatureProperty(attr.substring(1), Boolean.TRUE);
177        } else {
178            String tag = attr.substring(1, eqPos);
179            eqPos++;
180
181            if (attr.charAt(eqPos) == '"')
182                ++eqPos;
183            int max = attr.length();
184
185            if (attr.charAt(max - 1) == '"')
186                --max;
187            String val = attr.substring(eqPos, max);
188
189            if (val.indexOf('"') >= 0) {
190                StringBuffer sb = new StringBuffer();
191                boolean escape = false;
192                for (int i = 0; i < val.length(); ++i) {
193                    char c = val.charAt(i);
194                    if (c == '"') {
195                        if (escape)
196                            sb.append(c);
197                        escape = !escape;
198                    } else {
199                        sb.append(c);
200                        escape = false;
201                    }
202                }
203                val = sb.substring(0);
204            }
205            listener.addFeatureProperty(tag, val);
206        }
207    }
208
209    private int countChar(StringBuffer s, char c) {
210        int cnt = 0;
211        int length = s.length();
212        for (int i = 0; i < length; ++i)
213            if (s.charAt(i) == c)
214                ++cnt;
215        return cnt;
216    }
217}