001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.seq.io;
023
024import java.io.PrintStream;
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.Iterator;
028import java.util.List;
029import java.util.StringTokenizer;
030
031import org.biojava.bio.seq.Feature;
032import org.biojava.bio.seq.StrandedFeature;
033import org.biojava.bio.symbol.Alphabet;
034import org.biojava.bio.symbol.FuzzyLocation;
035import org.biojava.bio.symbol.IllegalAlphabetException;
036import org.biojava.bio.symbol.IllegalSymbolException;
037import org.biojava.bio.symbol.Location;
038import org.biojava.bio.symbol.PointLocation;
039import org.biojava.bio.symbol.RangeLocation;
040import org.biojava.bio.symbol.Symbol;
041
042/**
043 * Formats a sequence into Swissprot/TrEMBL format.  Modeled after
044 * EmblFileFormer.
045 *
046 * @author Greg Cox
047 * @since 1.2
048 * @deprecated Use org.biojavax.bio.seq.io framework instead
049 */
050public class SwissprotFileFormer extends AbstractGenEmblFileFormer
051    implements SeqFileFormer
052{
053    // Main qualifier formatting buffer
054    private StringBuffer qb = new StringBuffer();
055    // Utility formatting buffer
056    private StringBuffer ub = new StringBuffer();
057
058    // Buffers for each possible sequence property line
059    private StringBuffer idb = null;
060    private StringBuffer acb = null;
061    private StringBuffer dtb = null;
062    private StringBuffer deb = null;
063    private StringBuffer svb = null;
064    private StringBuffer kwb = null;
065    private StringBuffer osb = null;
066    private StringBuffer ocb = null;
067    private StringBuffer ccb = null;
068    private StringBuffer ftb = new StringBuffer();
069
070    // Static variables
071    static int LOCATION_WIDTH = 6;
072
073    // Member variables
074    PrintStream mStream;
075
076    // Constructors and initialization
077
078    /**
079     * Creates a new <code>SwissprotFileFormer</code> using
080     * <code>System.out</code> stream.
081     */
082    protected SwissprotFileFormer()
083    {
084        super();
085        this.mStream = System.out;
086    }
087
088    /**
089     * Creates a new <code>SwissprotFileFormer</code> using the
090     * specified stream.
091     *
092     * @param theStream a <code>PrintStream</code> object.
093     */
094    protected SwissprotFileFormer(PrintStream theStream)
095    {
096        super();
097        this.mStream = theStream;
098    }
099
100    // Interface implementations
101    // SeqIOListener methods
102
103    /**
104     * Start the processing of a sequence.  This method exists primarily
105     * to enforce the life-cycles of SeqIOListener objects.
106     */
107    public void startSequence() throws ParseException
108    {
109    }
110
111    /**
112     * Notify the listener that processing of the sequence is complete.
113     */
114    public void endSequence() throws ParseException
115    {
116    }
117
118    /**
119     * The name is printed out as part of the identifier line.  It will be
120     * replaced if an ID keyword exists in the annotations.
121     *
122     * @param theName the String that should be returned by getName for the sequence
123     * being parsed
124     */
125    public void setName(String theName) throws ParseException
126    {
127        idb = new StringBuffer("ID   " + theName);
128    }
129
130    /**
131     * Null implementation.  This object formats and prints a sequence.  The
132     * URI alone cannot be printed in Swissprot format.  Therefore, it's
133     * easiest to ignore it.     
134     * @param theURI the new URI of the sequence
135     */
136    public void setURI(String theURI) throws ParseException
137    {
138    }
139
140    /**
141     * Prints out the sequences properties in order.
142     * Prints out the symbol array passed in in lines of 60, blocks of 10
143     *
144     * @param theAlphabet The alphabet of the symbol data
145     * @param theSymbols An array containing symbols
146     * @param theStart The start offset of valid data within the array
147     * @param theLength The number of valid symbols in the array
148     *
149     * @throws IllegalAlphabetException if we can't cope with this
150     *                                  alphabet.
151     */
152    public void addSymbols(Alphabet theAlphabet,
153                           Symbol[] theSymbols,
154                           int theStart,
155                           int theLength)
156        throws IllegalAlphabetException
157    {
158
159        PrintStream stream = this.getPrintStream();
160
161        // Print out all of the sequence properties in order
162        if (idb != null) {stream.println(idb); stream.println("XX");}
163        if (acb != null) {stream.println(acb); stream.println("XX");}
164        if (svb != null) {stream.println(svb); stream.println("XX");}
165        if (dtb != null) {stream.println(dtb); stream.println("XX");}
166        if (deb != null) {stream.println(deb); stream.println("XX");}
167        if (kwb != null) {stream.println(kwb); stream.println("XX");}
168        if (osb != null) {stream.println(osb);}
169        if (ocb != null) {stream.println(ocb); stream.println("XX");}
170        if (ccb != null) {stream.println(ccb); stream.println("XX");}
171        if (ftb.length() != 0) {
172            stream.print(ftb);
173        }
174
175        this.printOutSequenceHeaderLine(theAlphabet, theSymbols, theStart, theLength);
176
177        List brokenLines = this.breakSymbolArray(theAlphabet, theSymbols,
178                                                 theStart, theLength);
179
180        java.util.Iterator iterator = brokenLines.iterator();
181        String leader = "     ";
182        while(iterator.hasNext())
183        {
184            stream.print(leader + iterator.next() + nl);
185        }
186        stream.println("//");
187    }
188
189    /**
190     * Formats sequence properties into form suitable for printing to
191     * file.
192     *
193     * @param key    The key of the sequence property
194     * @param value  The value of the sequence property
195     *
196     * @returns      Properly formated string
197     */
198    private String sequenceBufferCreator(Object key, Object value) {
199        StringBuffer temp = new StringBuffer();
200
201        if (value == null) {
202            temp.append((String) key);
203        }
204        else if (value instanceof ArrayList) {
205            Iterator iter = ((ArrayList) value).iterator();
206            while (iter.hasNext()) {
207                temp.append((String) key + "   " + iter.next());
208                if (iter.hasNext())
209                    temp.append(nl);
210            }
211        }
212        else {
213            StringTokenizer valueToke = new StringTokenizer((String) value, " ");
214            int fullline = 80;
215            int length = 0;
216            String token = valueToke.nextToken();
217
218            while (true) {
219                temp.append((String) key + "  ");
220                length = (temp.length() % (fullline + 1)) + token.length() + 1;
221                if (temp.length() % (fullline + 1) == 0) length = 81 + token.length();
222                while (length <= fullline && valueToke.hasMoreTokens()) {
223                    temp.append(" " + token);
224                    token = valueToke.nextToken();
225                    length = (temp.length() % (fullline + 1)) + token.length() + 1;
226                    if (temp.length() % (fullline + 1) == 0) length = 81 + token.length();
227                }
228                if (valueToke.hasMoreTokens()) {
229                    for(int i = length-token.length(); i < fullline; i++) {
230                        temp.append(" ");
231                    }
232                    temp.append(nl);
233                }
234                else if (length <= fullline) {
235                    temp.append(" " + token);
236                    break;
237                }
238                else {
239                    temp.append(nl);
240                    temp.append((String) key + "   " + token);
241                    break;
242                }
243            }
244        }
245
246        return temp.toString();
247    }
248
249    /**
250     * Notify the listener of a sequence-wide property.  This might
251     * be stored as an entry in the sequence's annotation bundle.
252     * Checks for possible known properties to be shown in the file.
253     *
254     * @param key Key the property will be stored under
255     * @param value Value stored under the key
256     */
257    public void addSequenceProperty(Object key, Object value) throws ParseException
258    {
259        if (key.equals("ID")) {
260            idb.setLength(0);
261            idb.append("ID   " + (String) value);
262        }
263        else if (key.equals("DT") || key.equals("MDAT")) {
264            dtb = new StringBuffer(sequenceBufferCreator("DT", value));
265        }
266        else if (key.equals("DE") || key.equals("DEFINITION")) {
267            deb = new StringBuffer(sequenceBufferCreator("DE", value));
268        }
269        else if (key.equals("SV") || key.equals("VERSION")) {
270            svb = new StringBuffer(sequenceBufferCreator("SV", value));
271        }
272        else if (key.equals("KW") || key.equals("KEYWORDS")) {
273            kwb = new StringBuffer(sequenceBufferCreator("KW", value));
274        }
275        else if (key.equals("OS") || key.equals("SOURCE")) {
276            osb = new StringBuffer(sequenceBufferCreator("OS", value));
277        }
278        else if (key.equals("OC") || key.equals("ORGANISM")) {
279            ocb = new StringBuffer(sequenceBufferCreator("OC", value));
280        }
281        else if (key.equals("CC") || key.equals("COMMENT")) {
282            ccb = new StringBuffer(sequenceBufferCreator("CC", value));
283        }
284        else if (key.equals(SwissprotProcessor.PROPERTY_SWISSPROT_ACCESSIONS))
285        {
286            acb = new StringBuffer();
287            acb.append("AC   ");
288            for (Iterator ai = ((List) value).iterator(); ai.hasNext();)
289            {
290                acb.append((String) ai.next());
291                acb.append(";");
292            }
293        }
294    }
295
296    /**
297     * Null implementation.
298     *
299     * @param templ The template for this new feature object
300     */
301    public void startFeature(Feature.Template templ) throws ParseException
302    {
303        // There are 19 spaces in the leader
304        String leader = "FT                   ";
305
306        ub.setLength(0);
307        ub.append(leader);
308
309        StringBuffer lb = formatLocation(ub, templ.location);
310
311        lb.replace(5, 5 + templ.type.length(), templ.type);
312        ftb.append(lb + nl);
313    }
314
315    /**
316     * Null implementation.
317     */
318    public void endFeature() throws ParseException
319    {
320    }
321
322    /**
323     * Null implementation
324     *
325     * @param key Key the property will be stored under
326     * @param value Value stored under the key
327     */
328
329    public void addFeatureProperty(Object key, Object value) throws ParseException
330    {
331        // There are 19 spaces in the leader
332        String leader = "FT                   ";
333
334        // Don't print internal data structures
335        if (key.equals(Feature.PROPERTY_DATA_KEY))
336            return;
337
338        // The value may be a collection if several qualifiers of the
339        // same type are present in a feature
340        if (Collection.class.isInstance(value))
341        {
342            for (Iterator vi = ((Collection) value).iterator(); vi.hasNext();)
343            {
344                qb.setLength(0);
345                ub.setLength(0);
346                StringBuffer fb = formatQualifierBlock(qb,
347                                                       formatQualifier(ub, key, vi.next()).toString(),
348                                                       leader,
349                                                       80);
350                ftb.append(fb + nl);
351            }
352        }
353        else
354        {
355            qb.setLength(0);
356            ub.setLength(0);
357            StringBuffer fb = formatQualifierBlock(qb,
358                                                   formatQualifier(ub, key, value).toString(),
359                                                   leader,
360                                                   80);
361            ftb.append(fb + nl);
362        }
363    }
364
365    // SeqFileFormer methods
366    /**
367     * <code>getPrintStream</code> returns the
368     * <code>PrintStream</code> to which an instance of SwissprotFileFormer
369     * will write the formatted data. The default is System.out
370     *
371     * @return the <code>PrintStream</code> which will be written to.
372     */
373    public PrintStream getPrintStream()
374    {
375        return(this.mStream);
376    }
377
378    /**
379     * <code>setPrintStream</code> informs an instance which
380     * <code>PrintStream</code> to use.
381     *
382     * @param theStream a <code>PrintStream</code> to write to.
383     */
384    public void setPrintStream(PrintStream theStream)
385    {
386        this.mStream = theStream;
387    }
388
389    /**
390     * <code>formatLocation</code> creates a String representation of
391     * a <code>Location</code>. Strand information is ignored, as Swissprot
392     * files represent proteins. An alternative form of this function does not
393     * take a Strand; that form is available only on SwissprotFileFormer; it
394     * is not part of the SeqFileFormer interface.
395     *
396     * @param theBuffer a <code>StringBuffer</code> to append the location
397     * to.
398     * @param theLocation a <code>Location</code> to format.
399     * @param theStrand a <code>StrandedFeature.Strand</code> indicating nothing
400     * of relevance
401     *
402     * @return a <code>StringBuffer</code> with the location appended.
403     */
404    public StringBuffer formatLocation(StringBuffer theBuffer,
405                                       Location theLocation,
406                                       StrandedFeature.Strand theStrand)
407    {
408        return(this.formatLocation(theBuffer, theLocation));
409    }
410
411    /**
412     * Creates a string representation of the location of a feature
413     *
414     * @param theFeature The feature with the location to format
415     * @return String The formatted location
416     */
417    public String formatLocation(Feature theFeature)
418    {
419        StringBuffer toReturn = this.formatLocation(new StringBuffer(), theFeature.getLocation());
420        return toReturn.toString();
421    }
422
423    // Public methods
424    /**
425     * <code>formatLocation</code> creates a String representation of
426     * a <code>Location</code>. The stringbuffer returned represents columns
427     * 15-27 of the Swissprot feature table entry. An alternative form of this
428     * function takes a Strand; that form is part of the SeqFileFormer
429     * interface.
430     *
431     * @param theBuffer a <code>StringBuffer</code> to append the location
432     * to.
433     * @param theLocation a <code>Location</code> to format.
434     *
435     * @return a <code>StringBuffer</code> with the location appended.
436     */
437    public StringBuffer formatLocation(StringBuffer theBuffer,
438                                       Location theLocation)
439        {
440            // Five Location cases, each treated seperately:
441            //   Point Location: "     5      5"
442            //   Range Location: "     5     10"
443            //   Fuzzy Location: "    <5     10"
444            //   Fuzzy Location: "     ?     10"
445            //   Fuzzy Location: "   ?24     35" (Not in the current
446            //       specification, but used anyways
447            StringBuffer startPoint = new StringBuffer(LOCATION_WIDTH);
448            StringBuffer endPoint   = new StringBuffer(LOCATION_WIDTH);
449            if((theLocation instanceof PointLocation) ||
450               (theLocation instanceof RangeLocation))
451            {
452                //   Point Location: "     5      5"
453                //   Range Location: "     5     10"
454                startPoint = formatPoint(theLocation.getMin(), theLocation.getMin(), false);
455                endPoint = formatPoint(theLocation.getMax(), theLocation.getMax(), false);
456            }
457            else if(theLocation instanceof FuzzyLocation)
458            {
459                // Handle all fuzzy location types through the magic of delegation.
460                // If you pass things around long enough, someone's bound to do it
461                // for you
462                FuzzyLocation tempLocation = (FuzzyLocation)theLocation;
463                //System.out.println("OuterMin: " + tempLocation.getOuterMin());
464                //System.out.println("InnerMin: " + tempLocation.getInnerMin());
465                //System.out.println("InnerMax: " + tempLocation.getInnerMax());
466                //System.out.println("OuterMax: " + tempLocation.getOuterMax());
467                startPoint = this.formatPoint(tempLocation.getOuterMin(),
468                                              tempLocation.getInnerMin(), tempLocation.isMinFuzzy());
469                endPoint = this.formatPoint(tempLocation.getInnerMax(),
470                                            tempLocation.getOuterMax(), tempLocation.isMaxFuzzy());
471            }
472
473            return new StringBuffer(startPoint.toString() + " " + endPoint.toString());
474        }
475
476    // Protected methods
477    /**
478     * Prints out sequence header with only length data.
479     *
480     * @param theAlphabet The alphabet of the symbol data
481     * @param theSymbols An array containing symbols
482     * @param theStart The start offset of valid data within the array
483     * @param theLength The number of valid symbols in the array
484     *
485     * @throws IllegalAlphabetException if we can't cope with this
486     *                                  alphabet.
487     */
488    protected void printOutSequenceHeaderLine(Alphabet theAlphabet,
489                                              Symbol[] theSymbols,
490                                              int theStart,
491                                              int theLength)
492        throws IllegalAlphabetException
493    {
494        this.getPrintStream().println("SQ   SEQUENCE   " + theLength + " AA;   ");
495    }
496
497    /**
498     * Converts the symbol list passed in into an array of strings.  The
499     * strings will be blocks of ten, with six blocks on a line.
500     *
501     * @param theAlphabet The alphabet of the symbol data
502     * @param theSymbols An array containing symbols
503     * @param theStart The start offset of valid data within the array
504     * @param theLength The number of valid symbols in the array
505     * @return The symbol list passed in broken into blocks of ten
506     * characters, six to a string.
507     *
508     * @throws IllegalAlphabetException if we can't cope with this
509     *                                  alphabet.
510     */
511    protected List breakSymbolArray(Alphabet theAlphabet,
512                                    Symbol[] theSymbols,
513                                    int theStart,
514                                    int theLength)
515        throws IllegalAlphabetException
516    {
517        List returnList = new ArrayList(theLength / 60 + 1);
518        int blockCount = 0;
519        int blockIndex = 0;
520        StringBuffer tempString = new StringBuffer();
521        SymbolTokenization tokenization;
522        try {
523            tokenization = theAlphabet.getTokenization("token");
524        } catch (Exception ex) {
525            throw new IllegalAlphabetException(ex, "Couldn't get tokenization for this alphabet");
526        }
527        for(int i = theStart; i < theStart + theLength; i++)
528        {
529            try
530            {
531                theAlphabet.validate(theSymbols[i]);
532            }
533            catch (IllegalSymbolException e)
534            {
535                throw new IllegalAlphabetException(e);
536            }
537
538            // Every six completed blocks, put on the stack to return
539            if(blockIndex == 10)
540            {
541                tempString.append(' ');
542                blockIndex = 0;
543                blockCount++;
544            }
545
546            if(blockCount == 6)
547            {
548                returnList.add(tempString.substring(0));
549                tempString.setLength(0);
550                blockCount = 0;
551                blockIndex = 0;
552            }
553            try {
554                tempString.append(tokenization.tokenizeSymbol(theSymbols[i]));
555            } catch (IllegalSymbolException ex) {
556                throw new IllegalAlphabetException(ex, "Couldn't tokenize symbols");
557            }
558            blockIndex++;
559        }
560
561        // Add the last line on
562        if(tempString.length() != 0)
563        {
564            returnList.add(tempString.substring(0));
565        }
566        return returnList;
567    }
568
569    /**
570     * Simple method that adds spaces onto the buffer passed in.  This method
571     * exists to refactor some code used in location formatting.  It isn't
572     * intended to be generally used.
573     *
574     * @param theBuffer Buffer to append whitespace to.
575     * @param theLength Ammount of whitespace to append.
576     */
577    protected void fillBuffer(StringBuffer theBuffer, int theLength)
578    {
579        for(int i = 0; i < theLength; i++)
580        {
581            theBuffer.append(' ');
582        }
583    }
584
585    /**
586     * Formats the points from fuzzy locations.  This is called easily with
587     * this.formatPoint(FuzzyLocation.getInnerMax(), FuzzyLocation.getOuterMax(), FuzzyLocation.isFuzzyMax())
588     *
589     * @param theMaxIndex Inner index of the fuzzy point
590     * @param theMinIndex Outer index of the fuzzy point
591     * @param isFuzzy Indicates if this point is fuzzy
592     */
593    protected StringBuffer formatPoint(int theMinIndex, int theMaxIndex, boolean isFuzzy)
594    {
595        StringBuffer bufferToReturn = new StringBuffer(LOCATION_WIDTH);
596        if(isFuzzy == false)
597        {
598            String tempString = Integer.toString(theMinIndex);
599            int offset = LOCATION_WIDTH - tempString.length();
600            this.fillBuffer(bufferToReturn, offset);
601            bufferToReturn.append(tempString);
602        }
603        else
604        {
605            // MIN_VALUE to MAX_VALUE is the ? location regardless of which end is which
606            if((theMinIndex == Integer.MIN_VALUE) && (theMaxIndex == Integer.MAX_VALUE))
607            {
608                int offset = LOCATION_WIDTH - 1;
609                this.fillBuffer(bufferToReturn, offset);
610                bufferToReturn.append('?');
611            }
612            // If the outer index is MIN_VALUE, that's <n
613            else if(theMinIndex == Integer.MIN_VALUE)
614            {
615                String tempString = Integer.toString(theMaxIndex);
616                int offset = LOCATION_WIDTH - tempString.length() - 1;
617                this.fillBuffer(bufferToReturn, offset);
618                bufferToReturn.append('<');
619                bufferToReturn.append(tempString);
620            }
621            // If the outer index is MAX_VALUE, that's >n
622            else if(theMaxIndex == Integer.MAX_VALUE)
623            {
624                String tempString = Integer.toString(theMinIndex);
625                int offset = LOCATION_WIDTH - tempString.length() - 1;
626                this.fillBuffer(bufferToReturn, offset);
627                bufferToReturn.append('>');
628                bufferToReturn.append(tempString);
629            }
630            // The only swissprot location left is ?nn
631            else if(theMinIndex == theMaxIndex)
632            {
633                String tempString = Integer.toString(theMinIndex);
634                int offset = LOCATION_WIDTH - tempString.length() - 1;
635                this.fillBuffer(bufferToReturn, offset);
636                bufferToReturn.append('?');
637                bufferToReturn.append(tempString);
638            }
639            else
640            {
641                // The location cannot be formatted in Swissprot format
642                // Revisit
643                System.out.println("Error in formatPoint");
644                System.out.println("\tInner: " + theMinIndex);
645                System.out.println("\tOuter: " + theMaxIndex);
646                System.out.println("\tFuzzy: " + isFuzzy);
647            }
648        }
649        return bufferToReturn;
650    }
651
652    // Private methods
653}