001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.seq.io; 023 024import org.biojava.bio.BioException; 025import org.biojava.bio.SimpleAnnotation; 026import org.biojava.bio.seq.Feature; 027import org.biojava.bio.seq.StrandedFeature; 028 029/** 030 * Simple parser for feature tables. This is shared between the EMBL 031 * and GENBANK format readers. 032 * 033 * @author Thomas Down 034 * @author Matthew Pocock 035 * @author Greg Cox 036 * @author Keith James 037 * @deprecated Use org.biojavax.bio.seq.io framework instead 038 */ 039 040/* 041 * Greg Cox: Changed private fields and methods to protected so that 042 * SwissProtFeatureTableParser could subclass and snag the 043 * implementation. 044 * 045 * Thomas Down: Post 1.1, finally got round to refactoring this to be 046 * a `nice' player in the newio world. Needless to say, 047 * this simplified things quite a bit. 048 * 049 * Keith James: Added support for reading fuzzy i.e. (123.567) 050 * locations in addition to unbounded i.e. <123..567 051 * locations. 052 */ 053 054public class FeatureTableParser { 055 private final static int WITHOUT = 0; 056 private final static int WITHIN = 1; 057 private final static int LOCATION = 2; 058 private final static int ATTRIBUTE = 3; 059 060 private int featureStatus = WITHOUT; 061 private StringBuffer featureBuf; 062 private Feature.Template featureTemplate; 063 064 private String featureSource; 065 private SeqIOListener listener; 066 private EmblLikeLocationParser locParser; 067 private String seqID; 068 069 FeatureTableParser(SeqIOListener listener, String source) { 070 this.listener = listener; 071 this.featureSource = source; 072 //this.seqID = seqID; 073 074 featureBuf = new StringBuffer(); 075 locParser = new EmblLikeLocationParser(seqID); 076 } 077 078 public void setSeqID(String seqID) { 079 this.seqID = seqID; 080 } 081 082 // 083 // Interface which the processors use to call us 084 // 085 086 public void startFeature(String type) throws BioException { 087 featureStatus = LOCATION; 088 featureBuf.setLength(0); 089 090 if (this.featureSource.equals("RefSeq:Protein")) { 091 featureTemplate= new Feature.Template(); 092 } 093 else { 094 featureTemplate = new StrandedFeature.Template(); 095 } 096 featureTemplate.type = type; 097 featureTemplate.source = featureSource; 098 featureTemplate.annotation = new SimpleAnnotation(); 099 } 100 101 public void featureData(String line) throws BioException { 102 switch (featureStatus) { 103 case LOCATION: 104 featureBuf.append(line); 105 if (countChar(featureBuf, '(') == countChar(featureBuf, ')')) { 106 featureTemplate = locParser.parseLocation(featureBuf.substring(0), featureTemplate); 107 listener.startFeature(featureTemplate); 108 featureStatus = WITHIN; 109 } 110 break; 111 112 case WITHIN: 113 if (line.charAt(0) == '/') { 114 // System.out.println("got '/', quotes = " + countChar(line, '"')); 115 // attribute either is unquoted and on one line or 116 // is quoted, and must start & end with a quote 117 // 118 // we assume that no attributes have embedded quotes 119 int eq = line.indexOf("="); 120 if (line.charAt(eq + 1) != '"' || 121 line.charAt(line.length() - 1) == '"' 122 ) { 123 processAttribute(line); 124 } else { 125 featureBuf.setLength(0); 126 featureBuf.append(line); 127 featureStatus = ATTRIBUTE; 128 } 129 } else { 130 throw new BioException("Invalid line in feature body: " + line); 131 } 132 break; 133 134 case ATTRIBUTE: 135 // If the attribute contains whitespace it probably 136 // consists of whitespace-delimited words. Therefore a 137 // space should be inserted at EOL otherwise words will 138 // get fused (unless there is a space already there) 139 if (((featureBuf.toString().indexOf(" ") >= 0) || 140 (line.toString().indexOf(" ") >= 0)) && 141 featureBuf.toString().charAt(featureBuf.length()-1) != ' '){ 142 featureBuf.append(" "); 143 } 144 featureBuf.append(line); 145 146 147 int eq = featureBuf.toString().indexOf("="); 148 if (featureBuf.charAt(eq + 1) != '"' || 149 featureBuf.charAt(featureBuf.length() - 1) == '"' 150 ) { 151 processAttribute(featureBuf.substring(0)); 152 featureStatus = WITHIN; 153 } 154 break; 155 } 156 } 157 158 public void endFeature() 159 throws BioException { 160 listener.endFeature(); 161 featureStatus = WITHOUT; 162 } 163 164 public boolean inFeature() { 165 return (featureStatus != WITHOUT); 166 } 167 168 /** 169 * Process the a string corresponding to a feature-table 170 * attribute, and fire it off to our listener. 171 */ 172 private void processAttribute(String attr) throws BioException { 173 // System.err.println(attr); 174 int eqPos = attr.indexOf('='); 175 if (eqPos == -1) { 176 listener.addFeatureProperty(attr.substring(1), Boolean.TRUE); 177 } else { 178 String tag = attr.substring(1, eqPos); 179 eqPos++; 180 181 if (attr.charAt(eqPos) == '"') 182 ++eqPos; 183 int max = attr.length(); 184 185 if (attr.charAt(max - 1) == '"') 186 --max; 187 String val = attr.substring(eqPos, max); 188 189 if (val.indexOf('"') >= 0) { 190 StringBuffer sb = new StringBuffer(); 191 boolean escape = false; 192 for (int i = 0; i < val.length(); ++i) { 193 char c = val.charAt(i); 194 if (c == '"') { 195 if (escape) 196 sb.append(c); 197 escape = !escape; 198 } else { 199 sb.append(c); 200 escape = false; 201 } 202 } 203 val = sb.substring(0); 204 } 205 listener.addFeatureProperty(tag, val); 206 } 207 } 208 209 private int countChar(StringBuffer s, char c) { 210 int cnt = 0; 211 int length = s.length(); 212 for (int i = 0; i < length; ++i) 213 if (s.charAt(i) == c) 214 ++cnt; 215 return cnt; 216 } 217}