001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.seq.io;
023
024import java.io.PrintStream;
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.Collection;
028import java.util.Iterator;
029import java.util.List;
030import java.util.StringTokenizer;
031
032import org.biojava.bio.BioError;
033import org.biojava.bio.BioException;
034import org.biojava.bio.seq.DNATools;
035import org.biojava.bio.seq.Feature;
036import org.biojava.bio.seq.StrandedFeature;
037import org.biojava.bio.symbol.Alphabet;
038import org.biojava.bio.symbol.IllegalAlphabetException;
039import org.biojava.bio.symbol.IllegalSymbolException;
040import org.biojava.bio.symbol.Symbol;
041
042/**
043 * <code>GenbankFileFormer</code> performs the detailed formatting of
044 * Genbank entries for writing to a <code>PrintStream</code>. There is
045 * some code dupication with <code>EmblFileFormer</code> which could
046 * be factored out.
047 *
048 * @author Keith James
049 * @since 1.2
050 * @deprecated Use org.biojavax.bio.seq.io framework instead
051 */
052public class GenbankFileFormer extends AbstractGenEmblFileFormer
053    implements SeqFileFormer
054{
055    private PrintStream stream;
056
057    // Main sequence formatting buffer
058    private StringBuffer sq = new StringBuffer();
059    // Main qualifier formatting buffer
060    private StringBuffer qb = new StringBuffer();
061    // Utility formatting buffer
062    private StringBuffer ub = new StringBuffer();
063
064    // Buffers for each possible sequence property line
065    private StringBuffer idb = null;
066    private StringBuffer acb = null;
067    private StringBuffer deb = null;
068    private StringBuffer svb = null;
069    private StringBuffer kwb = null;
070    private StringBuffer osb = null;
071    private StringBuffer ocb = null;
072    private StringBuffer ccb = null;
073    private Object rfb = null;
074    private StringBuffer ftb = new StringBuffer();
075
076    // Locusline buffers
077    private StringBuffer typeb = new StringBuffer();
078    private StringBuffer strb = new StringBuffer();
079    private StringBuffer sizeb = new StringBuffer();
080    private StringBuffer circb = new StringBuffer();
081    private StringBuffer mdatb = new StringBuffer();
082    private StringBuffer divb = new StringBuffer();
083
084    private SymbolTokenization dnaTokenization;
085
086    //vector NTI requires a slightly different flavour of Genbank
087    private boolean vecNTISupport = false;
088
089    {
090        try
091        {
092            dnaTokenization = DNATools.getDNA().getTokenization("token");
093        }
094        catch (BioException ex)
095        {
096            throw new BioError("Couldn't initialize tokenizer for the DNA alphabet",ex);
097        }
098    }
099
100    /**
101     * Creates a new <code>GenbankFileFormer</code> using
102     * <code>System.out</code> stream.
103     */
104    protected GenbankFileFormer()
105    {
106        this(System.out);
107    }
108
109    /**
110     * Creates a new <code>GenbankFileFormer</code> using the
111     * specified stream.
112     *
113     * @param stream a <code>PrintStream</code>.
114     */
115    protected GenbankFileFormer(PrintStream stream)
116    {
117        this.stream = stream;
118    }
119
120    public PrintStream getPrintStream()
121    {
122        return stream;
123    }
124
125    public void setPrintStream(PrintStream stream)
126    {
127        this.stream = stream;
128    }
129
130    public void setName(String id) throws ParseException
131    {
132        idb = new StringBuffer("LOCUS       " + id);
133    }
134
135    public void startSequence() throws ParseException { }
136
137    public void endSequence() throws ParseException { }
138
139    public void setURI(String uri) throws ParseException { }
140
141    public void addSymbols(Alphabet  alpha,
142                           Symbol [] syms,
143                           int       start,
144                           int       length)
145        throws IllegalAlphabetException
146    {
147        try
148        {
149            int aCount = 0;
150            int cCount = 0;
151            int gCount = 0;
152            int tCount = 0;
153            int oCount = 0;
154
155            int end = start + length - 1;
156
157            for (int i = start; i <= end; i++)
158            {
159                char c = dnaTokenization.tokenizeSymbol(syms[i]).charAt(0);
160
161                switch (c)
162                {
163                    case 'a': case 'A':
164                        aCount++;
165                        break;
166                    case 'c': case 'C':
167                        cCount++;
168                        break;
169                    case 'g': case 'G':
170                        gCount++;
171                        break;
172                    case 't': case 'T':
173                        tCount++;
174                        break;
175
176                    default:
177                        oCount++;
178                }
179            }
180
181            // FIXME: (kj) shouldn't be printing sequence properties
182            // in addSymbols method. If you filter out symbols you
183            // lose all sequence properties too.
184
185            // Print out sequence properties in order
186            locusLineCreator(length);
187            if (idb != null) {stream.println(idb); }
188            if (acb != null) {stream.println(acb); }
189            if (svb != null) {stream.println(svb); }
190            if (deb != null) {stream.println(deb); }
191            if (kwb != null) {stream.println(kwb); }
192            if (osb != null) {stream.println(osb); }
193            if (ocb != null) {stream.println(ocb); }
194            if (ccb != null) {stream.println(ccb); }
195            if (rfb != null) {//RichardH
196                if (rfb instanceof List) {
197                    Iterator i = ((List)rfb).iterator();
198                    while (i.hasNext()) { stream.println((StringBuffer)i.next()); } 
199                } else {
200                    stream.println(rfb); 
201                }
202            } 
203            
204            if (ftb.length() != 0)
205            {
206                ftb.insert(0, "FEATURES             Location/Qualifiers" + nl);
207                stream.print(ftb);
208            }
209
210            sq.setLength(0);
211            sq.append("BASE COUNT    ");
212            sq.append(aCount + " a   ");
213            sq.append(cCount + " c   ");
214            sq.append(gCount + " g   ");
215            sq.append(tCount + " t    ");
216            sq.append(oCount + " others");
217            sq.append(nl);
218            sq.append("ORIGIN");
219
220            // Print sequence summary header
221            stream.println(sq);
222
223            int fullLine = length / 60;
224            int partLine = length % 60;
225
226            int lineCount = fullLine;
227            if (partLine > 0)
228                lineCount++;
229
230            int lineLens [] = new int [lineCount];
231
232            // All lines are 60, except last (if present)
233            Arrays.fill(lineLens, 60);
234
235            if (partLine > 0)
236                lineLens[lineCount - 1] = partLine;
237
238            // Prepare line 80 characters wide, sequence is subset of this
239            char [] emptyLine = new char [80];
240
241            for (int i = 0; i < lineLens.length; i++)
242            {
243                sq.setLength(0);
244                ub.setLength(0);
245
246                // How long is this chunk?
247                int len = lineLens[i];
248
249                // Prep the whitespace
250                Arrays.fill(emptyLine, ' ');
251                sq.append(emptyLine);
252
253                // Prepare a Symbol array same length as chunk
254                Symbol [] sa = new Symbol [len];
255
256                // Get symbols and format into blocks of tokens
257                System.arraycopy(syms, start + (i * 60), sa, 0, len);
258
259                String blocks = (formatTokenBlock(ub, sa, 10, dnaTokenization)).toString();
260
261                sq.replace(10, blocks.length() + 10, blocks);
262
263                // Calculate the running residue count and add to the line
264                String count = Integer.toString((i * 60) + 1);
265                sq.replace((9 - count.length()), 9, count);
266
267                // Print formatted sequence line
268                stream.println(sq);
269            }
270
271            // Print end of entry
272            stream.println("//");
273        }
274        catch (IllegalSymbolException ex)
275        {
276            throw new IllegalAlphabetException(ex, "DNA not tokenizing");
277        }
278    }
279
280    public void addSequenceProperty(Object key, Object value)
281        throws ParseException
282    {
283        if (key.equals("LOCUS")) {
284            idb.setLength(0);
285            idb.append("LOCUS       " + (String) value);
286        }
287        else if (key.equals("TYPE")) {
288            typeb.append(value);
289        }
290        else if (key.equals("DIVISION")) {
291            divb.append(value);
292        }
293        else if (key.equals("CIRCULAR")) {
294            circb.append(value);
295        }
296        else if (key.equals("DT") || key.equals("MDAT")) {
297            if (value instanceof ArrayList) {
298                mdatb.append(((ArrayList) value).get(0));
299            }
300            else {
301                mdatb.append(value);
302            }
303        }
304        else if (key.equals("DE") || key.equals("DEFINITION")) {
305            deb = new StringBuffer(sequenceBufferCreator("DEFINITION ", value));
306        }
307        else if (key.equals("SV") || key.equals("VERSION")) {
308            if (svb != null) {
309                svb.insert(11, (String) value);
310            }
311            else {
312                svb = new StringBuffer("VERSION     " + (String) value);
313            }
314        }
315        else if (key.equals("GI")) {
316            if (svb != null) {
317                svb.append("  GI:" + (String) value);
318            }
319            else {
320                svb = new StringBuffer("VERSION       GI:" + (String) value);
321            }
322        }
323        else if (key.equals("KW") || key.equals("KEYWORDS")) {
324            kwb = new StringBuffer(sequenceBufferCreator("KEYWORDS   ", value));
325        }
326        else if (key.equals("OS") || key.equals("SOURCE")) {
327            osb = new StringBuffer(sequenceBufferCreator("SOURCE     ", value));
328        }
329        else if (key.equals("OC") || key.equals("ORGANISM")) {
330            ocb = new StringBuffer(sequenceBufferCreator("  ORGANISM ", value));
331        }
332        else if (key.equals("CC") || key.equals("COMMENT")) {
333            ccb = new StringBuffer(sequenceBufferCreator("COMMENT    ", value));
334        }
335        else if (key.equals(GenbankProcessor.PROPERTY_GENBANK_ACCESSIONS))
336        {
337            ub.setLength(0);
338            ub.append("ACCESSION   ");
339            if(value instanceof List) {
340                for (Iterator ai = ((List) value).iterator(); ai.hasNext();)
341                {
342                    ub.append((String) ai.next());
343                }
344            } else {
345                ub.append(value);
346            }
347            acb = new StringBuffer(ub.substring(0));
348        }
349        // GenBank-style References by RichardH
350        // FIXME: (rh) Understand EMBL-style references and ReferenceAnnotation objects here too.
351        else if (key.equals("REFERENCE")) {
352            if (value instanceof List) {
353                List rfbs = new ArrayList();
354                List refs = (List)value;
355                Iterator i = refs.iterator();
356                while (i.hasNext()) {
357                    String v = (String)i.next();
358                    StringBuffer rfb1 = new StringBuffer(sequenceBufferCreator("REFERENCE  ",v));
359                    rfbs.add(rfb1);
360                }
361                rfb = rfbs;
362            } else {
363                rfb = new StringBuffer(sequenceBufferCreator("REFERENCE  ",value));
364            }
365        }
366        else if (key.equals("AUTHORS")) {            
367            if (value instanceof List) {
368                List rfbs = (List)rfb;
369                List refs = (List)value;
370                Iterator i = refs.iterator();
371                Iterator j = rfbs.iterator();
372                while (i.hasNext()) {
373                    String v = (String)i.next();
374                    StringBuffer rfb1 = (StringBuffer)j.next();
375                    rfb1.append("\n"+sequenceBufferCreator("  AUTHORS  ",v));
376                }
377            } else {                
378                if (rfb instanceof List) {
379                    ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator("  AUTHORS  ",value));
380                } else {                 
381                    ((StringBuffer)rfb).append("\n"+sequenceBufferCreator("  AUTHORS  ",value));
382                }
383            }
384        }
385        else if (key.equals("TITLE")) {            
386            if (value instanceof List) {
387                List rfbs = (List)rfb;
388                List refs = (List)value;
389                Iterator i = refs.iterator();
390                Iterator j = rfbs.iterator();
391                while (i.hasNext()) {
392                    String v = (String)i.next();
393                    StringBuffer rfb1 = (StringBuffer)j.next();
394                    rfb1.append("\n"+sequenceBufferCreator("  TITLE    ",v));
395                }
396            } else {                
397                if (rfb instanceof List) {
398                    ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator("  TITLE    ",value));
399                } else {                 
400                    ((StringBuffer)rfb).append("\n"+sequenceBufferCreator("  TITLE    ",value));
401                }
402            }
403        }
404        else if (key.equals("JOURNAL")) {            
405            if (value instanceof List) {
406                List rfbs = (List)rfb;
407                List refs = (List)value;
408                Iterator i = refs.iterator();
409                Iterator j = rfbs.iterator();
410                while (i.hasNext()) {
411                    String v = (String)i.next();
412                    StringBuffer rfb1 = (StringBuffer)j.next();
413                    rfb1.append("\n"+sequenceBufferCreator("  JOURNAL  ",v));
414                }
415            } else {                
416                if (rfb instanceof List) {
417                    ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator("  JOURNAL  ",value));
418                } else {                 
419                    ((StringBuffer)rfb).append("\n"+sequenceBufferCreator("  JOURNAL  ",value));
420                }
421            }
422        }
423        else if (key.equals("PUBMED")) {            
424            if (value instanceof List) {
425                List rfbs = (List)rfb;
426                List refs = (List)value;
427                Iterator i = refs.iterator();
428                Iterator j = rfbs.iterator();
429                while (i.hasNext()) {
430                    String v = (String)i.next();
431                    StringBuffer rfb1 = (StringBuffer)j.next();
432                    rfb1.append("\n"+sequenceBufferCreator("  PUBMED   ",v));
433                }
434            } else {                
435                if (rfb instanceof List) {
436                    ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator("  PUBMED   ",value));
437                } else {                 
438                    ((StringBuffer)rfb).append("\n"+sequenceBufferCreator("  PUBMED   ",value));
439                }
440            }
441        }
442        else if (key.equals("MEDLINE")) {            
443            if (value instanceof List) {
444                List rfbs = (List)rfb;
445                List refs = (List)value;
446                Iterator i = refs.iterator();
447                Iterator j = rfbs.iterator();
448                while (i.hasNext()) {
449                    String v = (String)i.next();
450                    StringBuffer rfb1 = (StringBuffer)j.next();
451                    rfb1.append("\n"+sequenceBufferCreator("  MEDLINE  ",v));
452                }
453            } else {                
454                if (rfb instanceof List) {
455                    ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator("  MEDLINE  ",value));
456                } else {                 
457                    ((StringBuffer)rfb).append("\n"+sequenceBufferCreator("  MEDLINE  ",value));
458                }
459            }
460        }
461    }
462
463    public void startFeature(Feature.Template templ)
464        throws ParseException
465    {
466        // There are 21 spaces in the leader
467        String leader = "                     ";
468        int    strand = 0;
469
470        if (templ instanceof StrandedFeature.Template)
471            strand = ((StrandedFeature.Template) templ).strand.getValue();
472
473        ub.setLength(0);
474        ub.append(leader);
475
476        StringBuffer lb = formatLocationBlock(ub,
477                                              templ.location,
478                                              strand,
479                                              leader,
480                                              80);
481
482        lb.replace(5, 5 + templ.type.length(), templ.type);
483
484        ftb.append(lb + nl);
485    }
486
487    public void endFeature() throws ParseException { }
488
489    public void addFeatureProperty(Object key, Object value)
490        throws ParseException
491    {
492        // There are 21 spaces in the leader
493        String   leader = "                     ";
494
495        // Don't print internal data structures
496        if (key.equals(Feature.PROPERTY_DATA_KEY))
497            return;
498
499        // The value may be a collection if several qualifiers of the
500        // same type are present in a feature
501        if (Collection.class.isInstance(value))
502        {
503            for (Iterator vi = ((Collection) value).iterator(); vi.hasNext();)
504            {
505                qb.setLength(0);
506                ub.setLength(0);
507                StringBuffer fb = formatQualifierBlock(qb,
508                                                       formatQualifier(ub, key, vi.next()).substring(0),
509                                                       leader,
510                                                       80);
511                ftb.append(fb + nl);
512            }
513        }
514        else
515        {
516            qb.setLength(0);
517            ub.setLength(0);
518            StringBuffer fb = formatQualifierBlock(qb,
519                                                   formatQualifier(ub, key, value).substring(0),
520                                                   leader,
521                                                   80);
522            ftb.append(fb + nl);
523        }
524    }
525
526    /**
527     * VectorNTI requires GenBank format to be a little more specific than
528     * required by the GenBank definition. By setting this to true the produced
529     * output should be parsable by VectorNTI. By default this is false.
530     *
531     * @param b to support or not to support.
532     */
533    public void setVectorNTISupport(boolean b){
534      vecNTISupport = b;
535    }
536
537    /**
538     * Is VectorNTI compatable output being produced?
539     * @return false by default.
540     */
541    public boolean getVectorNTISupport(){
542      return vecNTISupport;
543    }
544
545    private String sequenceBufferCreator(Object key, Object value) {
546        StringBuffer temp = new StringBuffer();
547
548        if (value == null) {
549            temp.append(key.toString());
550        }
551        else if (value instanceof ArrayList) {
552            Iterator iter = ((ArrayList) value).iterator();
553            temp.append(key.toString() + " " + iter.next());
554            while (iter.hasNext()) {
555              if (vecNTISupport) {
556                temp.append(nl + key.toString() +"            " + iter.next());
557              }
558              else {
559                temp.append(nl + "            " + iter.next());
560              }
561            }
562        }
563        else {
564            // FIXME: (kj) unsafe cast to String
565            StringTokenizer valueToke = new StringTokenizer((String) value, " ");
566            int fullline = 80;
567            int length = 0;
568            // FIXME: (kj) unsafe cast to String
569            temp.append((String) key);
570            if (valueToke.hasMoreTokens()) {
571                String token = valueToke.nextToken();
572
573                while (true) {
574                    length = (temp.length() % (fullline + 1)) + token.length() + 1;
575                    if (temp.length() % (fullline + 1) == 0) length = 81 + token.length();
576                    while (length <= fullline && valueToke.hasMoreTokens()) {
577                        temp.append(" " + token);
578                        token = valueToke.nextToken();
579                        length = (temp.length() % (fullline + 1)) + token.length() + 1;
580                        if (temp.length() % (fullline + 1) == 0) length = 81 + token.length();
581                    }
582                    if (valueToke.hasMoreTokens()) {
583                        for(int i = length-token.length(); i < fullline; i++) {
584                            temp.append(" ");
585                        }
586                        temp.append(nl + "           ");
587                    }
588                    else if (length <= fullline) {
589                        temp.append(" " + token);
590                        break;
591                    }
592                    else {
593                        temp.append(nl);
594                        temp.append("            " + token);
595                        break;
596                    }
597                }
598            }
599            else {
600                temp.append(" ");
601            }
602        }
603
604        return temp.substring(0);
605    }
606
607    private StringBuffer fixLength(StringBuffer temp, int length) {
608        // FIXME: (kj) check performance
609        while (temp.length() < length) {
610            temp.append(" ");
611        }
612        return temp;
613    }
614
615    private void locusLineCreator(int size) {
616        idb = fixLength(idb, 30);
617        typeb = fixLength(typeb, 8);
618
619        sizeb.insert(0, size);
620        while(sizeb.length() < 12) {sizeb.insert(0, " ");}
621        sizeb.append(" bp ");
622
623        if (strb.length() > 0) {
624            strb.append("-");
625        }
626        strb = fixLength(strb, 3);
627        circb = fixLength(circb, 9);
628        mdatb = fixLength(mdatb, 11);
629        divb = fixLength(divb, 4);
630        idb.insert(29, sizeb);
631        idb.insert(44, strb);
632        idb.insert(47, typeb);
633        idb.insert(55, circb);
634        idb.insert(64, divb);
635        idb.insert(68, mdatb);
636        idb.setLength(79);
637    }
638}