001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.seq.io; 023 024 025import java.io.PrintStream; 026import java.util.ArrayList; 027import java.util.Arrays; 028import java.util.Collection; 029import java.util.Iterator; 030import java.util.List; 031 032import org.biojava.bio.BioException; 033import org.biojava.bio.seq.Feature; 034import org.biojava.bio.seq.StrandedFeature; 035import org.biojava.bio.symbol.Alphabet; 036import org.biojava.bio.symbol.IllegalAlphabetException; 037import org.biojava.bio.symbol.Symbol; 038import org.biojava.bio.taxa.EbiFormat; 039import org.biojava.bio.taxa.Taxon; 040 041/** 042 * <p><code>EmblFileFormer</code> performs the detailed formatting of 043 * EMBL entries for writing to a <code>PrintStream</code>. Currently 044 * the formatting of the header is not correct. This really needs to 045 * be addressed in the parser which is merging fields which should 046 * remain separate.</p> 047 * 048 * <p>The event generator used to feed events to this class should 049 * enforce ordering of those events. This class will stream data 050 * directly to the <code>PrintStream</code></p>. 051 * 052 * <p>This implementation requires that all the symbols be added in 053 * one block as is does not buffer the tokenized symbols between 054 * calls.</p> 055 * 056 * @author Keith James 057 * @author Len Trigg (Taxon output) 058 * @author Lorna Morris 059 * @since 1.2 060 * @deprecated Use org.biojavax.bio.seq.io framework instead 061 */ 062public class EmblFileFormer extends AbstractGenEmblFileFormer 063 implements SeqFileFormer 064{ 065 // Tags which are special cases, not having "XX" after them 066 private static List NON_SEPARATED_TAGS = new ArrayList(); 067 068 static 069 { 070 NON_SEPARATED_TAGS.add(EmblLikeFormat.SOURCE_TAG); 071 NON_SEPARATED_TAGS.add(EmblLikeFormat.REFERENCE_TAG); 072 NON_SEPARATED_TAGS.add(EmblLikeFormat.COORDINATE_TAG); 073 NON_SEPARATED_TAGS.add(EmblLikeFormat.REF_ACCESSION_TAG); 074 NON_SEPARATED_TAGS.add(EmblLikeFormat.AUTHORS_TAG); 075 NON_SEPARATED_TAGS.add(EmblLikeFormat.TITLE_TAG); 076 NON_SEPARATED_TAGS.add(EmblLikeFormat.FEATURE_TAG); 077 NON_SEPARATED_TAGS.add(EmblLikeFormat.JOURNAL_TAG);//Lorna: added 078 NON_SEPARATED_TAGS.add(EmblLikeFormat.REF_XREF_TAG);//RichardH: added 079 NON_SEPARATED_TAGS.add(EmblLikeFormat.SEPARATOR_TAG);//Lorna: added 080 } 081 082 // 19 spaces 083 private static String FT_LEADER = 084 EmblLikeFormat.FEATURE_TABLE_TAG + " "; 085 086 // 3 spaces 087 private static String SQ_LEADER = " "; 088 089 // 80 spaces 090 private static String EMPTY_LINE = 091 " " + 092 " "; 093 094 private PrintStream stream; 095 096 private String accLine; 097 098 /** 099 * Creates a new <code>EmblFileFormer</code> using 100 * <code>System.out</code> stream. 101 */ 102 protected EmblFileFormer() 103 { 104 this(System.out); 105 } 106 107 /** 108 * Creates a new <code>EmblFileFormer</code> using the specified 109 * stream. 110 * 111 * @param stream a <code>PrintStream</code>. 112 */ 113 protected EmblFileFormer(PrintStream stream) 114 { 115 super(); 116 this.stream = stream; 117 } 118 119 public PrintStream getPrintStream() 120 { 121 return stream; 122 } 123 124 public void setPrintStream(PrintStream stream) 125 { 126 this.stream = stream; 127 } 128 129 public void setName(String id) throws ParseException 130 { 131 } 132 133 public void startSequence() throws ParseException 134 { 135 aCount = 0; 136 cCount = 0; 137 gCount = 0; 138 tCount = 0; 139 oCount = 0; 140 } 141 142 public void endSequence() throws ParseException 143 { 144 stream.println(EmblLikeFormat.END_SEQUENCE_TAG); 145 } 146 147 public void setURI(String uri) throws ParseException { } 148 149 public void addSymbols(Alphabet alpha, 150 Symbol [] syms, 151 int start, 152 int length) 153 throws IllegalAlphabetException 154 { 155 try 156 { 157 int end = start + length - 1; 158 159 for (int i = start; i <= end; i++) 160 { 161 Symbol sym = syms[i]; 162 163 if (sym == a) 164 aCount++; 165 else if (sym == c) 166 cCount++; 167 else if (sym == g) 168 gCount++; 169 else if (sym == t) 170 tCount++; 171 else 172 oCount++; 173 } 174 175 StringBuffer sb = new StringBuffer(EmblLikeFormat.SEPARATOR_TAG); 176 sb.append(nl); 177 sb.append("SQ Sequence "); 178 sb.append(length + " BP; "); 179 sb.append(aCount + " A; "); 180 sb.append(cCount + " C; "); 181 sb.append(gCount + " G; "); 182 sb.append(tCount + " T; "); 183 sb.append(oCount + " other;"); 184 185 // Print sequence summary header 186 stream.println(sb); 187 188 int fullLine = length / 60; 189 int partLine = length % 60; 190 191 int lineCount = fullLine; 192 if (partLine > 0) 193 lineCount++; 194 195 int lineLens [] = new int [lineCount]; 196 197 // All lines are 60, except last (if present) 198 Arrays.fill(lineLens, 60); 199 200 if (partLine > 0) 201 lineLens[lineCount - 1] = partLine; 202 203 for (int i = 0; i < lineLens.length; i++) 204 { 205 // Prep the whitespace 206 StringBuffer sq = new StringBuffer(EMPTY_LINE); 207 208 // How long is this chunk? 209 int len = lineLens[i]; 210 // Prepare a Symbol array same length as chunk 211 Symbol [] sa = new Symbol [len]; 212 213 // Get symbols and format into blocks of tokens 214 System.arraycopy(syms, start + (i * 60), sa, 0, len); 215 216 sb = new StringBuffer(); 217 218 String blocks = (formatTokenBlock(sb, sa, 10, 219 alpha.getTokenization("token"))).toString(); 220 221 sq.replace(5, blocks.length() + 5, blocks); 222 223 // Calculate the running residue count and add to the line 224 String count = Integer.toString((i * 60) + len); 225 sq.replace((80 - count.length()), 80, count); 226 227 // Print formatted sequence line 228 stream.println(sq); 229 } 230 } 231 catch (BioException ex) 232 { 233 throw new IllegalAlphabetException(ex, "Alphabet not tokenizing"); 234 } 235 } 236 237 public void addSequenceProperty(Object key, Object value) 238 throws ParseException 239 { 240 StringBuffer sb = new StringBuffer(); 241 242 // Ignore separators if they are sent to us. The parser should 243 // be ignoring these really (lorna: I've changed this so they are ignored in SeqIOEventEmitter) 244 //if (key.equals(EmblLikeFormat.SEPARATOR_TAG)) 245 //return; 246 247 String tag = key.toString(); 248 String leader = tag + SQ_LEADER; 249 String line = ""; 250 int wrapWidth = 85 - leader.length(); 251 252 // Special case: accession number 253 if (key.equals(EmblProcessor.PROPERTY_EMBL_ACCESSIONS)) 254 { 255 accLine = buildPropertyLine((Collection) value, ";", true); 256 return; 257 } 258 else if (key.equals(EmblLikeFormat.ACCESSION_TAG)) 259 { 260 line = accLine; 261 } else if (key.equals(OrganismParser.PROPERTY_ORGANISM)) { 262 Taxon taxon = (Taxon) value; 263 addSequenceProperty(EmblLikeFormat.SOURCE_TAG, taxon); 264 addSequenceProperty(EmblLikeFormat.ORGANISM_TAG, taxon.getParent()); 265 addSequenceProperty(EmblLikeFormat.ORGANISM_XREF_TAG, taxon); 266 return; 267 } 268 if (value instanceof String) 269 { 270 line = (String) value; 271 } 272 else if (value instanceof Collection) 273 { 274 // Special case: date lines 275 if (key.equals(EmblLikeFormat.DATE_TAG)) 276 { 277 line = buildPropertyLine((Collection) value, nl + leader, false); 278 wrapWidth = Integer.MAX_VALUE; 279 } 280 //lorna :added 21.08.03, DR lines are another special case. Each one goes onto a separate line. 281 else if (key.equals(EmblLikeFormat.DR_TAG)) 282 { 283 line = buildPropertyLine((Collection) value, nl + leader, false); 284 wrapWidth = Integer.MAX_VALUE; 285 } 286 else if (key.equals(EmblLikeFormat.AUTHORS_TAG)) 287 { 288 line = buildPropertyLine((Collection) value, nl + leader, false); //lorna: add space here? 289 wrapWidth = Integer.MAX_VALUE; 290 } 291 else if (key.equals(EmblLikeFormat.REF_ACCESSION_TAG)) 292 { 293 line = buildPropertyLine((Collection) value, nl + leader, false); 294 wrapWidth = Integer.MAX_VALUE; 295 } 296 else 297 { 298 line = buildPropertyLine((Collection) value, " ", false); 299 } 300 } else if (value instanceof Taxon) { 301 if (key.equals(EmblLikeFormat.ORGANISM_TAG)) { 302 line = EbiFormat.getInstance().serialize((Taxon) value); 303 } else if (key.equals(EmblLikeFormat.SOURCE_TAG)) { 304 line = EbiFormat.getInstance().serializeSource((Taxon) value); 305 } else if (key.equals(EmblLikeFormat.ORGANISM_XREF_TAG)) { 306 line = EbiFormat.getInstance().serializeXRef((Taxon) value); 307 } 308 } 309 310 if (line.length() == 0) 311 { 312 stream.println(tag); 313 } 314 else 315 { 316 sb = formatSequenceProperty(sb, line, leader, wrapWidth); 317 stream.println(sb); 318 } 319 // Special case: those which don't get separated 320 if (! NON_SEPARATED_TAGS.contains(key)) 321 stream.println(EmblLikeFormat.SEPARATOR_TAG); 322 // Special case: feature header 323 if (key.equals(EmblLikeFormat.FEATURE_TAG)) 324 stream.println(EmblLikeFormat.FEATURE_TAG); 325 } 326 327 328 public void startFeature(Feature.Template templ) 329 throws ParseException 330 { 331 int strand = 0; 332 333 if (templ instanceof StrandedFeature.Template) 334 strand = ((StrandedFeature.Template) templ).strand.getValue(); 335 336 StringBuffer sb = new StringBuffer(FT_LEADER); 337 sb = formatLocationBlock(sb, templ.location, strand, FT_LEADER, 80); 338 sb.replace(5, 5 + templ.type.length(), templ.type); 339 stream.println(sb); 340 } 341 342 public void endFeature() throws ParseException { } 343 344 public void addFeatureProperty(Object key, Object value) 345 { 346 // Don't print internal data structures 347 if (key.equals(Feature.PROPERTY_DATA_KEY)) 348 return; 349 350 StringBuffer fb; 351 StringBuffer sb; 352 353 // The value may be a collection if several qualifiers of the 354 // same type are present in a feature 355 if (value instanceof Collection) 356 { 357 for (Iterator vi = ((Collection) value).iterator(); vi.hasNext();) 358 { 359 fb = new StringBuffer(); 360 sb = new StringBuffer(); 361 362 fb = formatQualifierBlock(fb, 363 formatQualifier(sb, key, vi.next()).substring(0), 364 FT_LEADER, 365 80); 366 stream.println(fb); 367 } 368 } 369 else 370 { 371 fb = new StringBuffer(); 372 sb = new StringBuffer(); 373 374 fb = formatQualifierBlock(fb, 375 formatQualifier(sb, key, value).substring(0), 376 FT_LEADER, 377 80); 378 stream.println(fb); 379 } 380 } 381 382 private String buildPropertyLine(Collection property, 383 String separator, 384 boolean terminate) 385 { 386 StringBuffer sb = new StringBuffer(); 387 388 for (Iterator pi = property.iterator(); pi.hasNext();) 389 { 390 sb.append(pi.next().toString()); 391 sb.append(separator); 392 } 393 394 if (terminate) 395 { 396 return sb.substring(0); 397 } 398 else 399 { 400 return sb.substring(0, sb.length() - separator.length()); 401 } 402 } 403}