001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.seq.io;
023
024
025import java.io.PrintStream;
026import java.util.ArrayList;
027import java.util.Arrays;
028import java.util.Collection;
029import java.util.Iterator;
030import java.util.List;
031
032import org.biojava.bio.BioException;
033import org.biojava.bio.seq.Feature;
034import org.biojava.bio.seq.StrandedFeature;
035import org.biojava.bio.symbol.Alphabet;
036import org.biojava.bio.symbol.IllegalAlphabetException;
037import org.biojava.bio.symbol.Symbol;
038import org.biojava.bio.taxa.EbiFormat;
039import org.biojava.bio.taxa.Taxon;
040
041/**
042 * <p><code>EmblFileFormer</code> performs the detailed formatting of
043 * EMBL entries for writing to a <code>PrintStream</code>. Currently
044 * the formatting of the header is not correct. This really needs to
045 * be addressed in the parser which is merging fields which should
046 * remain separate.</p>
047 *
048 * <p>The event generator used to feed events to this class should
049 * enforce ordering of those events. This class will stream data
050 * directly to the <code>PrintStream</code></p>.
051 *
052 * <p>This implementation requires that all the symbols be added in
053 * one block as is does not buffer the tokenized symbols between
054 * calls.</p>
055 *
056 * @author Keith James
057 * @author Len Trigg (Taxon output)
058 * @author Lorna Morris
059 * @since 1.2
060 * @deprecated Use org.biojavax.bio.seq.io framework instead
061 */
062public class EmblFileFormer extends AbstractGenEmblFileFormer
063    implements SeqFileFormer
064{
065    // Tags which are special cases, not having "XX" after them
066    private static List NON_SEPARATED_TAGS = new ArrayList();
067
068    static
069    {
070        NON_SEPARATED_TAGS.add(EmblLikeFormat.SOURCE_TAG);
071        NON_SEPARATED_TAGS.add(EmblLikeFormat.REFERENCE_TAG);
072        NON_SEPARATED_TAGS.add(EmblLikeFormat.COORDINATE_TAG);
073        NON_SEPARATED_TAGS.add(EmblLikeFormat.REF_ACCESSION_TAG);
074        NON_SEPARATED_TAGS.add(EmblLikeFormat.AUTHORS_TAG);
075        NON_SEPARATED_TAGS.add(EmblLikeFormat.TITLE_TAG);
076        NON_SEPARATED_TAGS.add(EmblLikeFormat.FEATURE_TAG);
077        NON_SEPARATED_TAGS.add(EmblLikeFormat.JOURNAL_TAG);//Lorna: added
078        NON_SEPARATED_TAGS.add(EmblLikeFormat.REF_XREF_TAG);//RichardH: added
079        NON_SEPARATED_TAGS.add(EmblLikeFormat.SEPARATOR_TAG);//Lorna: added
080    }
081
082    // 19 spaces
083    private static String FT_LEADER =
084        EmblLikeFormat.FEATURE_TABLE_TAG + "                   ";
085
086    // 3 spaces
087    private static String SQ_LEADER = "   ";
088
089    // 80 spaces
090    private static String EMPTY_LINE =
091        "                                        " +
092        "                                        ";
093
094    private PrintStream stream;
095
096    private String accLine;
097
098    /**
099     * Creates a new <code>EmblFileFormer</code> using
100     * <code>System.out</code> stream.
101     */
102    protected EmblFileFormer()
103    {
104        this(System.out);
105    }
106
107    /**
108     * Creates a new <code>EmblFileFormer</code> using the specified
109     * stream.
110     *
111     * @param stream a <code>PrintStream</code>.
112     */
113    protected EmblFileFormer(PrintStream stream)
114    {
115        super();
116        this.stream = stream;
117    }
118
119    public PrintStream getPrintStream()
120    {
121        return stream;
122    }
123
124    public void setPrintStream(PrintStream stream)
125    {
126        this.stream = stream;
127    }
128
129    public void setName(String id) throws ParseException
130    {
131    }
132
133    public void startSequence() throws ParseException
134    {
135       aCount = 0;
136       cCount = 0;
137       gCount = 0;
138       tCount = 0;
139       oCount = 0;
140    }
141
142    public void endSequence() throws ParseException
143    {
144        stream.println(EmblLikeFormat.END_SEQUENCE_TAG);
145    }
146
147    public void setURI(String uri) throws ParseException { }
148
149    public void addSymbols(Alphabet  alpha,
150                           Symbol [] syms,
151                           int       start,
152                           int       length)
153        throws IllegalAlphabetException
154    {
155        try
156        {
157            int end = start + length - 1;
158
159            for (int i = start; i <= end; i++)
160            {
161                Symbol sym = syms[i];
162
163                if (sym == a)
164                    aCount++;
165                else if (sym == c)
166                    cCount++;
167                else if (sym == g)
168                    gCount++;
169                else if (sym == t)
170                    tCount++;
171                else
172                    oCount++;
173            }
174
175            StringBuffer sb = new StringBuffer(EmblLikeFormat.SEPARATOR_TAG);
176            sb.append(nl);
177            sb.append("SQ   Sequence ");
178            sb.append(length + " BP; ");
179            sb.append(aCount + " A; ");
180            sb.append(cCount + " C; ");
181            sb.append(gCount + " G; ");
182            sb.append(tCount + " T; ");
183            sb.append(oCount + " other;");
184
185            // Print sequence summary header
186            stream.println(sb);
187
188            int fullLine = length / 60;
189            int partLine = length % 60;
190
191            int lineCount = fullLine;
192            if (partLine > 0)
193                lineCount++;
194
195            int lineLens [] = new int [lineCount];
196
197            // All lines are 60, except last (if present)
198            Arrays.fill(lineLens, 60);
199
200            if (partLine > 0)
201                lineLens[lineCount - 1] = partLine;
202
203            for (int i = 0; i < lineLens.length; i++)
204            {
205                // Prep the whitespace
206                StringBuffer sq = new StringBuffer(EMPTY_LINE);
207
208                // How long is this chunk?
209                int len = lineLens[i];
210                // Prepare a Symbol array same length as chunk
211                Symbol [] sa = new Symbol [len];
212
213                // Get symbols and format into blocks of tokens
214                System.arraycopy(syms, start + (i * 60), sa, 0, len);
215
216                sb = new StringBuffer();
217
218                String blocks = (formatTokenBlock(sb, sa, 10,
219                         alpha.getTokenization("token"))).toString();
220
221                sq.replace(5, blocks.length() + 5, blocks);
222
223                // Calculate the running residue count and add to the line
224                String count = Integer.toString((i * 60) + len);
225                sq.replace((80 - count.length()), 80, count);
226
227                // Print formatted sequence line
228                stream.println(sq);
229            }
230        }
231        catch (BioException ex)
232        {
233            throw new IllegalAlphabetException(ex, "Alphabet not tokenizing");
234        }
235    }
236
237        public void addSequenceProperty(Object key, Object value)
238        throws ParseException
239    {
240        StringBuffer sb = new StringBuffer();
241
242        // Ignore separators if they are sent to us. The parser should
243        // be ignoring these really (lorna: I've changed this so they are ignored in SeqIOEventEmitter)
244        //if (key.equals(EmblLikeFormat.SEPARATOR_TAG))
245            //return;
246
247        String tag = key.toString();
248        String leader = tag + SQ_LEADER;
249        String line = "";
250        int wrapWidth = 85 - leader.length();
251
252        // Special case: accession number
253        if (key.equals(EmblProcessor.PROPERTY_EMBL_ACCESSIONS))
254        {
255            accLine = buildPropertyLine((Collection) value, ";", true);
256            return;
257        }
258        else if (key.equals(EmblLikeFormat.ACCESSION_TAG))
259        {
260            line = accLine;
261        } else if (key.equals(OrganismParser.PROPERTY_ORGANISM)) {
262            Taxon taxon = (Taxon) value;
263            addSequenceProperty(EmblLikeFormat.SOURCE_TAG, taxon);
264            addSequenceProperty(EmblLikeFormat.ORGANISM_TAG, taxon.getParent());
265            addSequenceProperty(EmblLikeFormat.ORGANISM_XREF_TAG, taxon);
266            return;
267        }
268        if (value instanceof String)
269        {
270            line = (String) value;
271        }
272        else if (value instanceof Collection)
273        {
274            // Special case: date lines
275            if (key.equals(EmblLikeFormat.DATE_TAG))
276            {
277                line = buildPropertyLine((Collection) value, nl + leader, false);
278                wrapWidth = Integer.MAX_VALUE;
279            }
280            //lorna :added 21.08.03, DR lines are another special case. Each one goes onto a separate line.
281            else if (key.equals(EmblLikeFormat.DR_TAG))
282            {
283                line = buildPropertyLine((Collection) value, nl + leader, false);
284                wrapWidth = Integer.MAX_VALUE;
285            }
286            else if (key.equals(EmblLikeFormat.AUTHORS_TAG))
287            {
288                line = buildPropertyLine((Collection) value, nl + leader, false); //lorna: add space here?
289                wrapWidth = Integer.MAX_VALUE;
290            }
291            else if (key.equals(EmblLikeFormat.REF_ACCESSION_TAG))
292            {
293                line = buildPropertyLine((Collection) value, nl + leader, false);
294                wrapWidth = Integer.MAX_VALUE;
295            }
296            else
297            {
298                line = buildPropertyLine((Collection) value, " ", false);
299            }
300        } else if (value instanceof Taxon) {
301            if (key.equals(EmblLikeFormat.ORGANISM_TAG)) {
302                line = EbiFormat.getInstance().serialize((Taxon) value);
303            } else if (key.equals(EmblLikeFormat.SOURCE_TAG)) {
304                line = EbiFormat.getInstance().serializeSource((Taxon) value);
305            } else if (key.equals(EmblLikeFormat.ORGANISM_XREF_TAG)) {
306                line = EbiFormat.getInstance().serializeXRef((Taxon) value);
307            }
308        }
309
310        if (line.length() == 0)
311        {
312            stream.println(tag);
313        }
314        else
315        {
316            sb = formatSequenceProperty(sb, line, leader, wrapWidth);
317            stream.println(sb);
318        }
319        // Special case: those which don't get separated
320        if (! NON_SEPARATED_TAGS.contains(key))
321            stream.println(EmblLikeFormat.SEPARATOR_TAG);
322        // Special case: feature header
323        if (key.equals(EmblLikeFormat.FEATURE_TAG))
324            stream.println(EmblLikeFormat.FEATURE_TAG);
325    }
326
327
328    public void startFeature(Feature.Template templ)
329        throws ParseException
330    {
331        int strand = 0;
332
333        if (templ instanceof StrandedFeature.Template)
334            strand = ((StrandedFeature.Template) templ).strand.getValue();
335
336        StringBuffer sb = new StringBuffer(FT_LEADER);
337        sb = formatLocationBlock(sb, templ.location, strand, FT_LEADER, 80);
338        sb.replace(5, 5 + templ.type.length(), templ.type);
339        stream.println(sb);
340    }
341
342    public void endFeature() throws ParseException { }
343
344    public void addFeatureProperty(Object key, Object value)
345    {
346        // Don't print internal data structures
347        if (key.equals(Feature.PROPERTY_DATA_KEY))
348            return;
349
350        StringBuffer fb;
351        StringBuffer sb;
352
353        // The value may be a collection if several qualifiers of the
354        // same type are present in a feature
355        if (value instanceof Collection)
356        {
357            for (Iterator vi = ((Collection) value).iterator(); vi.hasNext();)
358            {
359                fb = new StringBuffer();
360                sb = new StringBuffer();
361
362                fb = formatQualifierBlock(fb,
363                                          formatQualifier(sb, key, vi.next()).substring(0),
364                                          FT_LEADER,
365                                          80);
366                stream.println(fb);
367            }
368        }
369        else
370        {
371            fb = new StringBuffer();
372            sb = new StringBuffer();
373
374            fb = formatQualifierBlock(fb,
375                                      formatQualifier(sb, key, value).substring(0),
376                                      FT_LEADER,
377                                      80);
378            stream.println(fb);
379        }
380    }
381
382    private String buildPropertyLine(Collection property,
383                                     String separator,
384                                     boolean terminate)
385    {
386        StringBuffer sb = new StringBuffer();
387
388        for (Iterator pi = property.iterator(); pi.hasNext();)
389        {
390            sb.append(pi.next().toString());
391            sb.append(separator);
392        }
393
394        if (terminate)
395        {
396            return sb.substring(0);
397        }
398        else
399        {
400            return sb.substring(0, sb.length() - separator.length());
401        }
402    }
403}