001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.seq.io; 023 024import java.io.Serializable; 025import java.util.ArrayList; 026import java.util.List; 027import java.util.StringTokenizer; 028import java.util.Vector; 029 030import org.biojava.bio.BioException; 031import org.biojava.utils.ParseErrorEvent; 032import org.biojava.utils.ParseErrorListener; 033import org.biojava.utils.ParseErrorSource; 034 035/** 036 * Simple filter which handles attribute lines from an EMBL file. This 037 * class delegates creation of <code>Feature</code>s to a 038 * <code>FeatureTableParser</code>, which in turn delegates creation 039 * of <code>Locations</code> to an <code>EmblLikeLocationParser</code> 040 * which is shared with the <code>GenbankProcessor</code>. 041 * 042 * An <code>EmblLikeLocationParser</code> parses EMBL/Genbank style 043 * locations. Supported location forms: 044 * 045 * <pre> 046 * 123 047 * <123 or >123 048 * (123.567) 049 * (123.567)..789 050 * 123..(567.789) 051 * (123.345)..(567.789) 052 * 123..456 053 * <123..567 or 123..>567 or <123..>567 054 * 123^567 055 * AL123465:(123..567) 056 * </pre> 057 * 058 * The only EMBL header information retained over a read/write cycle 059 * is the accession number (all numbers). 060 * 061 * @author Thomas Down 062 * @author Greg Cox 063 * @author Keith James 064 * @since 1.1 065 * @deprecated Use org.biojavax.bio.seq.io framework instead 066 */ 067 068public class EmblProcessor 069 extends 070 SequenceBuilderFilter 071 implements 072 ParseErrorSource 073{ 074 public static final String PROPERTY_EMBL_ACCESSIONS = "embl_accessions"; 075 076 private boolean mBadFeature = false; 077 private Vector mListeners = new Vector(); 078 079 /** 080 * Factory which wraps SequenceBuilders in an EmblProcessor 081 * 082 * @author Thomas Down 083 */ 084 085 public static class Factory implements SequenceBuilderFactory, Serializable { 086 private SequenceBuilderFactory delegateFactory; 087 088 public Factory(SequenceBuilderFactory delegateFactory) { 089 this.delegateFactory = delegateFactory; 090 } 091 092 public SequenceBuilder makeSequenceBuilder() { 093 return new EmblProcessor(delegateFactory.makeSequenceBuilder()); 094 } 095 } 096 097 private FeatureTableParser features; 098 099 public EmblProcessor(SequenceBuilder delegate) { 100 super(delegate); 101 features = new FeatureTableParser(this, "EMBL"); 102 } 103 104 public void endSequence() throws ParseException { 105 // Avoids leaving a null name and null URI if there is no 106 // accession number. If accession number is vital, failure of 107 // test of accessions.size() > 0 should throw a 108 // ParseException. 109 //String id = ""; 110 String uri = ""; 111 if (accessions.size() > 0) { 112 //id = (String) accessions.get(0); 113 uri = "urn:sequence/embl:" + (String) accessions.get(0); 114 getDelegate().addSequenceProperty(PROPERTY_EMBL_ACCESSIONS, accessions); 115 } 116 117 //getDelegate().setName(id); 118 getDelegate().setURI(uri); 119 getDelegate().endSequence(); 120 } 121 122 private List accessions; 123 124 { 125 accessions = new ArrayList(); 126 } 127 128 public void addSequenceProperty(Object key, Object value) 129 throws ParseException 130 { 131 try 132 { 133 if (mBadFeature) 134 { 135 // If this feature is bad in some way, ignore it. 136 if (value != null) 137 { 138 String featureLine = value.toString(); 139 if((key.equals(EmblLikeFormat.FEATURE_TABLE_TAG)) && 140 (featureLine.charAt(0) != ' ')) 141 { 142 // If the offending feature is past, start reading data again 143 mBadFeature = false; 144 features.startFeature(featureLine.substring(0, 15).trim()); 145 features.featureData(featureLine.substring(16)); 146 } 147 } 148 } 149 else 150 { 151 // Tidy up any end-of-block jobbies 152 if (features.inFeature() && 153 !key.equals(EmblLikeFormat.FEATURE_TABLE_TAG)) 154 { 155 features.endFeature(); 156 } 157 158 if (key.equals(EmblLikeFormat.FEATURE_TABLE_TAG)) 159 { 160 String featureLine = value.toString(); 161 if (featureLine.charAt(0) != ' ') 162 { 163 // This is a featuretype field 164 if (features.inFeature()) 165 { 166 features.endFeature(); 167 } 168 169 features.startFeature(featureLine.substring(0, 15).trim()); 170 } 171 features.featureData(featureLine.substring(16)); 172 } 173 else 174 { 175 getDelegate().addSequenceProperty(key, value); 176 177 if (key.equals(EmblLikeFormat.ACCESSION_TAG)) 178 { 179 String acc = value.toString(); 180 StringTokenizer toke = new StringTokenizer(acc, "; "); 181 while (toke.hasMoreTokens()) 182 { 183 accessions.add(toke.nextToken()); 184 } 185 } 186 else if (key.equals(EmblLikeFormat.ID_TAG)) { 187 StringTokenizer toke = new StringTokenizer((String) value); 188 getDelegate().setName(toke.nextToken()); 189 } 190 } 191 } 192 } 193 catch (BioException ex) 194 { 195 // If an exception is thrown, read past the offending feature 196 mBadFeature = true; 197 ParseErrorEvent offendingLineEvent = 198 new ParseErrorEvent(this, "This line could not be parsed: " 199 + value.toString()); 200 this.notifyParseErrorEvent(offendingLineEvent); 201 } 202 catch (IndexOutOfBoundsException ex) 203 { 204 // This occurs when for some line min > max 205 mBadFeature = true; 206 ParseErrorEvent offendingLineEvent = 207 new ParseErrorEvent(this, "From must be less than To: " 208 + value.toString()); 209 this.notifyParseErrorEvent(offendingLineEvent); 210 } 211 } 212 213 /** 214 * Adds a parse error listener to the list of listeners if it isn't already 215 * included. 216 * 217 * @param theListener Listener to be added. 218 */ 219 public synchronized void addParseErrorListener(ParseErrorListener theListener) 220 { 221 if (mListeners.contains(theListener) == false) 222 { 223 mListeners.addElement(theListener); 224 } 225 } 226 227 /** 228 * Removes a parse error listener from the list of listeners if it is 229 * included. 230 * 231 * @param theListener Listener to be removed. 232 */ 233 public synchronized void removeParseErrorListener(ParseErrorListener theListener) 234 { 235 if (mListeners.contains(theListener) == true) 236 { 237 mListeners.removeElement(theListener); 238 } 239 } 240 241 // Protected methods 242 /** 243 * Passes the event on to all the listeners registered for ParseErrorEvents. 244 * 245 * @param theEvent The event to be handed to the listeners. 246 */ 247 protected void notifyParseErrorEvent(ParseErrorEvent theEvent) 248 { 249 Vector listeners; 250 synchronized(this) 251 { 252 listeners = (Vector)mListeners.clone(); 253 } 254 255 for (int index = 0; index < listeners.size(); index++) 256 { 257 ParseErrorListener client = (ParseErrorListener)listeners.elementAt(index); 258 client.BadLineParsed(theEvent); 259 } 260 } 261}