001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.program.sax;
023
024import java.io.BufferedReader;
025import java.io.IOException;
026import java.text.DecimalFormat;
027import java.text.NumberFormat;
028import java.util.Arrays;
029import java.util.HashMap;
030import java.util.Map;
031import java.util.Set;
032
033import org.biojava.bio.BioException;
034import org.biojava.bio.search.SearchContentHandler;
035import org.biojava.utils.ParserException;
036import org.xml.sax.Attributes;
037import org.xml.sax.InputSource;
038import org.xml.sax.SAXException;
039import org.xml.sax.helpers.AttributesImpl;
040
041/**
042 * <p><code>FastaSearchSAXParser</code> is a SAX2 compliant parser for
043 * '-m 10' format output from the the Fasta search program (see the
044 * Fasta documentation for details of this format).</p>
045 *
046 * <p>Versions of Fasta supported are as follows. Note that the compile
047 * time option -DM10_CONS should be used to allow correct reporting of
048 * the number of matches in HSPSummary elements</p>
049 *
050 * <ul>
051 *   <li>33t07</li>
052 *   <li>33t08 (current tests are against output from this version)</li>
053 * </ul>
054 *
055 * <p>The SAX2 events produced are as if the input to the parser was
056 * an XML file validating against the BioJava
057 * BlastLikeDataSetCollection DTD. There is no requirement for an
058 * intermediate conversion of native output to XML format.</p>
059 *
060 * @author Keith James
061 * @since 1.2
062 */
063public class FastaSearchSAXParser extends AbstractNativeAppSAXParser
064    implements SearchContentHandler
065{
066    private FastaSearchParser fastaParser;
067    private Map               searchProperties;
068    private Map               hitProperties;
069
070    private String queryID;
071    private String databaseID;
072
073    private AttributesImpl  attributes;
074    private QName                qName;
075
076    private boolean firstHit = true;
077
078    // Set/reset by callback from main parser
079    private boolean moreSearchesAvailable = true;
080
081    // For formatting rounded numbers
082    private NumberFormat nFormat;
083
084    // Platform independent linebreaks
085    private String nl;
086
087    // For creating character events
088    private StringBuffer props;
089    private StringBuffer seqTokens;
090    private String stringOut;
091    private char [] charOut;
092
093    /**
094     * Creates a new <code>FastaSearchSAXParser</code> instance.
095     */
096    public FastaSearchSAXParser()
097    {
098        this.setNamespacePrefix("biojava");
099        this.addPrefixMapping("biojava", "http://www.biojava.org");
100
101        fastaParser = new FastaSearchParser();
102        attributes  = new AttributesImpl();
103        qName       = new QName(this);
104        nFormat     = new DecimalFormat("###.0");
105        props       = new StringBuffer(1024);
106        seqTokens   = new StringBuffer(2048);
107        nl          = System.getProperty("line.separator");
108    }
109
110    public void parse(InputSource source)
111        throws IOException, SAXException
112    {
113        BufferedReader content = getContentStream(source);
114
115        if (oHandler == null)
116            throw new SAXException("Running FastaSearchSAXParser with null ContentHandler");
117
118        try
119        {
120            attributes.clear();
121            // Namespace attribute
122            qName.setQName("xmlns");
123            attributes.addAttribute(qName.getURI(),
124                                    qName.getLocalName(),
125                                    qName.getQName(),
126                                    "CDATA",
127                                    "");
128            // Namespace attribute
129            qName.setQName("xmlns:biojava");
130            attributes.addAttribute(qName.getURI(),
131                                    qName.getLocalName(),
132                                    qName.getQName(),
133                                    "CDATA",
134                                    "http://www.biojava.org");
135
136            // Start the BlastLikeDataSetCollection
137            startElement(new QName(this, this.prefix("BlastLikeDataSetCollection")),
138                         (Attributes) attributes);
139
140            while (moreSearchesAvailable)
141            {
142                // This method returns once a single result is
143                // parsed. The parser also informs us of subsequent
144                // results via the setMoreSearches() method.
145                fastaParser.parseSearch(content, this);
146            }
147
148            // End the BlastLikeDataSetCollection
149            endElement(new QName(this, this.prefix("BlastLikeDataSetCollection")));
150        }
151        catch (BioException be)
152        {
153            throw new SAXException(be);
154        }
155        catch (ParserException pe)
156        {
157            throw new SAXException(pe);
158        }
159    }
160
161    public boolean getMoreSearches()
162    {
163        return moreSearchesAvailable;
164    }
165
166    public void setMoreSearches(boolean value)
167    {
168        moreSearchesAvailable = value;
169    }
170
171    /**
172     * <code>setQuerySeq</code> identifies the query sequence by a
173     * name, ID or URN.
174     *
175     * @param identifier a <code>String</code> which should be an
176     * unique identifer for the sequence.
177     *
178     * @deprecated use <code>setQueryID</code> instead.
179     */
180    public void setQuerySeq(String identifier)
181    {
182        setQueryID(identifier);
183    }
184
185    public void setQueryID(String queryID)
186    {
187        this.queryID = queryID;
188    }
189
190    /**
191     * <code>setSubjectDB</code> identifies the database searched by a
192     * name, ID or URN.
193     *
194     * @param identifier a <code>String</code> which should be an unique
195     * identifier for the database searched.
196     *
197     * @deprecated use <code>setDatabaseID</code> instead.
198     */
199    public void setSubjectDB(String identifier)
200    {
201        setDatabaseID(identifier);
202    }
203
204    public void setDatabaseID(String databaseID)
205    {
206        this.databaseID = databaseID;
207    }
208
209    public void startSearch()
210    {
211        searchProperties = new HashMap();
212    }
213
214    public void addSearchProperty(Object key, Object value)
215    {
216        searchProperties.put(key, value);
217    }
218
219    public void endSearch()
220    {
221        try
222        {
223            // If we found any hits then we need to close a Detail
224            // element too
225            if (! firstHit)
226            {
227                endElement(new QName(this, this.prefix("Detail")));
228
229                // Prime to get next firstHit
230                firstHit = true;
231            }
232
233            endElement(new QName(this, this.prefix("BlastLikeDataSet")));
234        }
235        catch (SAXException se)
236        {
237            System.err.println("An error occurred while creating an endElement SAX event: ");
238            se.printStackTrace();
239        }
240    }
241
242    public void startHeader() { }
243
244    public void endHeader()
245    {
246        try
247        {
248            attributes.clear();
249            // Program name attribute
250            qName.setQName("program");
251            attributes.addAttribute(qName.getURI(),
252                                    qName.getLocalName(),
253                                    qName.getQName(),
254                                    "CDATA",
255                                    (String) searchProperties.get("pg_name"));
256
257            // Program version attribute
258            qName.setQName("version");
259            attributes.addAttribute(qName.getURI(),
260                                    qName.getLocalName(),
261                                    qName.getQName(),
262                                    "CDATA",
263                                    (String) searchProperties.get("pg_ver"));
264
265            // Start the BlastLikeDataSet
266            startElement(new QName(this, this.prefix("BlastLikeDataSet")),
267                         (Attributes) attributes);
268
269            attributes.clear();
270            // Start the Header
271            startElement(new QName(this, this.prefix("Header")),
272                         (Attributes) attributes);
273
274            attributes.clear();
275            // Query id attribute
276            qName.setQName("id");
277            attributes.addAttribute(qName.getURI(),
278                                    qName.getLocalName(),
279                                    qName.getQName(),
280                                    "CDATA",
281                                    queryID);
282
283            // metaData attribute for QueryId
284            qName.setQName("metaData");
285            attributes.addAttribute(qName.getURI(),
286                                    qName.getLocalName(),
287                                    qName.getQName(),
288                                    "CDATA",
289                                    "none");
290
291            // Start the QueryId
292            startElement(new QName(this, this.prefix("QueryId")),
293                         (Attributes) attributes);
294            // End the QueryId
295            endElement(new QName(this, this.prefix("QueryId")));
296
297            attributes.clear();
298            // id attribute for DatabaseId
299            qName.setQName("id");
300            attributes.addAttribute(qName.getURI(),
301                                    qName.getLocalName(),
302                                    qName.getQName(),
303                                    "CDATA",
304                                    databaseID);
305
306            // metaData attribute for DatabaseId
307            qName.setQName("metaData");
308            attributes.addAttribute(qName.getURI(),
309                                    qName.getLocalName(),
310                                    qName.getQName(),
311                                    "CDATA",
312                                    "none");
313
314            // Start the DatabaseId
315            startElement(new QName(this, this.prefix("DatabaseId")),
316                         (Attributes) attributes);
317            // End the DatabaseId
318            endElement(new QName(this, this.prefix("DatabaseId")));
319
320            attributes.clear();
321            // Whitespace attribute for raw data
322            qName.setQName("xml:space");
323            attributes.addAttribute(qName.getURI(),
324                                    qName.getLocalName(),
325                                    qName.getQName(),
326                                    "CDATA",
327                                    "preserve");
328
329            // Start the RawOutput
330            startElement(new QName(this, this.prefix("RawOutput")),
331                         (Attributes) attributes);
332
333            // Reconstitute the 'raw' header from the properties Map
334            Set spKeys = searchProperties.keySet();
335
336            String [] searchPropKeys =
337                (String []) spKeys.toArray(new String [spKeys.size() - 1]);
338            Arrays.sort(searchPropKeys);
339
340            // Clear StringBuffer
341            props.setLength(0);
342
343            props.append(nl);
344            for (int i = 0; i < searchPropKeys.length; i++)
345            {
346                props.append(searchPropKeys[i] + ": ");
347                props.append((String) searchProperties.get(searchPropKeys[i]) + nl);
348            }
349
350            charOut = new char [props.length()];
351            props.getChars(0, props.length(), charOut, 0);
352
353            // Characters of raw header
354            characters(charOut, 0, charOut.length);
355
356            // End the RawOutput
357            endElement(new QName(this, this.prefix("RawOutput")));
358
359            // End the Header
360            endElement(new QName(this, this.prefix("Header")));
361        }
362        catch (SAXException se)
363        {
364            System.err.println("An error occurred while creating SAX events from header data: ");
365            se.printStackTrace();
366        }
367    }
368
369    public void startHit()
370    {
371        // Hit elements must be wrapped in a Detail element so we
372        // start one at the first hit
373        if (firstHit)
374        {
375            firstHit = false;
376            attributes.clear();
377
378            try
379            {
380                startElement(new QName(this, this.prefix("Detail")),
381                             (Attributes) attributes);
382            }
383            catch (SAXException se)
384            {
385                System.err.println("An error occurred while creating startElement SAX event from hit data: ");
386                se.printStackTrace();
387            }
388        }
389
390        hitProperties = new HashMap();
391    }
392
393    public void addHitProperty(Object key, Object value)
394    {
395        hitProperties.put(key, value);
396    }
397
398    public void endHit() { }
399
400    public void startSubHit() { }
401
402    public void addSubHitProperty(Object key, Object value)
403    {
404        hitProperties.put(key, value);
405    }
406
407    public void endSubHit()
408    {
409        attributes.clear();
410        // Query sequence length attribute
411        qName.setQName("sequenceLength");
412        attributes.addAttribute(qName.getURI(),
413                                qName.getLocalName(),
414                                qName.getQName(),
415                                "CDATA",
416                                (String) hitProperties.get("subject_sq_len"));
417
418        try
419        {
420            // Start the Hit
421            startElement(new QName(this, this.prefix("Hit")),
422                         (Attributes) attributes);
423
424            attributes.clear();
425            // Hit id attribute
426            qName.setQName("id");
427            attributes.addAttribute(qName.getURI(),
428                                    qName.getLocalName(),
429                                    qName.getQName(),
430                                    "CDATA",
431                                    (String) hitProperties.get("id"));
432            // Metadata attribute
433            qName.setQName("metaData");
434            attributes.addAttribute(qName.getURI(),
435                                    qName.getLocalName(),
436                                    qName.getQName(),
437                                    "CDATA",
438                                    "none");
439            // Start the HitId
440            startElement(new QName(this, this.prefix("HitId")),
441                         (Attributes) attributes);
442            // End the HitId
443            endElement(new QName(this, this.prefix("HitId")));
444
445            attributes.clear();
446            // Start the HitDescription
447            startElement(new QName(this, this.prefix("HitDescription")),
448                         (Attributes) attributes);
449
450            stringOut = (String) hitProperties.get("desc");
451
452            charOut = new char [stringOut.length()];
453            stringOut.getChars(0, stringOut.length(), charOut, 0);
454
455            // Characters of description
456            characters(charOut, 0, charOut.length);
457
458            // End the HitDescription
459            endElement(new QName(this, this.prefix("HitDescription")));
460
461            // Start the HSPCollection
462            startElement(new QName(this, this.prefix("HSPCollection")),
463                         (Attributes) attributes);
464
465            // Start the HSP (for Fasta, we use one "HSP" to represent the hit
466            startElement(new QName(this, this.prefix("HSP")),
467                         (Attributes) attributes);
468
469            String score;
470            if (hitProperties.containsKey("fa_z-score"))
471                score = (String) hitProperties.get("fa_z-score");
472            else
473                throw new SAXException("Failed to retrieve hit z-score from search data");
474
475            // Score attribute
476            qName.setQName("score");
477            attributes.addAttribute(qName.getURI(),
478                                    qName.getLocalName(),
479                                    qName.getQName(),
480                                    "CDATA",
481                                    score);
482            // expectValue attribute
483            qName.setQName("expectValue");
484            attributes.addAttribute(qName.getURI(),
485                                    qName.getLocalName(),
486                                    qName.getQName(),
487                                    "CDATA",
488                                    (String) hitProperties.get("fa_expect"));
489            // numberOfIdentities attribute
490            qName.setQName("numberOfIdentities");
491            attributes.addAttribute(qName.getURI(),
492                                    qName.getLocalName(),
493                                    qName.getQName(),
494                                    "CDATA",
495                                    countTokens(':', (String) hitProperties.get("matchTokens")));
496
497            String overlap;
498            if (hitProperties.containsKey("fa_overlap"))
499                overlap = hitProperties.get("fa_overlap").toString();
500            else if (hitProperties.containsKey("sw_overlap"))
501                overlap = hitProperties.get("sw_overlap").toString();
502            else
503                throw new SAXException("Failed to retrieve hit overlap from search data");
504
505            // alignmentSize attribute
506            qName.setQName("alignmentSize");
507            attributes.addAttribute(qName.getURI(),
508                                    qName.getLocalName(),
509                                    qName.getQName(),
510                                    "CDATA",
511                                    overlap);
512
513            float percentId;
514            if (hitProperties.containsKey("fa_ident"))
515                percentId = Float.parseFloat((String) hitProperties.get("fa_ident"));
516            else
517                percentId = Float.parseFloat((String) hitProperties.get("sw_ident"));
518
519            // percentageIdentity attribute
520            qName.setQName("percentageIdentity");
521            attributes.addAttribute(qName.getURI(),
522                                    qName.getLocalName(),
523                                    qName.getQName(),
524                                    "CDATA",
525                                    nFormat.format(percentId * 100));
526
527            // Maybe proper RNA check? Should be same for query and subject
528            String seqType;
529            if (hitProperties.get("query_sq_type").equals("dna"))
530                seqType = "dna";
531            else
532                seqType = "protein";
533
534            // querySequenceType attribute
535            qName.setQName("querySequenceType");
536            attributes.addAttribute(qName.getURI(),
537                                    qName.getLocalName(),
538                                    qName.getQName(),
539                                    "CDATA",
540                                    seqType);
541
542            // Maybe proper RNA check? Maybe raise exception if not
543            // same as query type?
544            if (hitProperties.get("subject_sq_type").equals("dna"))
545                seqType = "dna";
546            else
547                seqType = "protein";
548
549            // hitSequenceType attribute
550            qName.setQName("hitSequenceType");
551            attributes.addAttribute(qName.getURI(),
552                                    qName.getLocalName(),
553                                    qName.getQName(),
554                                    "CDATA",
555                                    seqType);
556
557            // Strand information only valid for DNA
558            if (seqType.equals("dna"))
559            {
560                // queryStrand attribute (always plus for Fasta)
561                qName.setQName("queryStrand");
562                attributes.addAttribute(qName.getURI(),
563                                        qName.getLocalName(),
564                                        qName.getQName(),
565                                        "CDATA",
566                                        "plus");
567
568                String strand;
569                if (hitProperties.get("fa_frame").equals("f"))
570                    strand = "plus";
571                else
572                    strand = "minus";
573
574                // hitStrand attribute (may be minus for Fasta vs. nt sequence)
575                qName.setQName("hitStrand");
576                attributes.addAttribute(qName.getURI(),
577                                        qName.getLocalName(),
578                                        qName.getQName(),
579                                        "CDATA",
580                                        strand);
581            }
582
583            // Start the HSPSummary
584            startElement(new QName(this, this.prefix("HSPSummary")),
585                         (Attributes) attributes);
586
587            attributes.clear();
588            // Start the RawOutput
589            startElement(new QName(this, this.prefix("RawOutput")),
590                         (Attributes) attributes);
591
592            // Reconstitute the 'raw' header from the properties Map
593            Set hpKeys = hitProperties.keySet();
594
595            String [] hitPropKeys =
596                (String []) hpKeys.toArray(new String [hpKeys.size() - 1]);
597            Arrays.sort(hitPropKeys);
598
599            // Clear StringBuffer
600            props.setLength(0);
601            props.append(nl);
602            for (int i = 0; i < hitPropKeys.length; i++)
603            {
604                // Skip the sequence and consensus tokens
605                if (hitPropKeys[i].endsWith("Tokens"))
606                    continue;
607                props.append(hitPropKeys[i] + ": ");
608                props.append((String) hitProperties.get(hitPropKeys[i]) + nl);
609            }
610
611            charOut = new char [props.length()];
612            props.getChars(0, props.length(), charOut, 0);
613
614            // Characters of raw header
615            characters(charOut, 0, charOut.length);
616
617            // End the RawOutput
618            endElement(new QName(this, this.prefix("RawOutput")));
619
620            // End the HSPSummary
621            endElement(new QName(this, this.prefix("HSPSummary")));
622
623            // Start the BlastLikeAlignment
624            startElement(new QName(this, this.prefix("BlastLikeAlignment")),
625                         (Attributes) attributes);
626
627            String alStart     = (String) hitProperties.get("query_al_start");
628            String alStop      = (String) hitProperties.get("query_al_stop");
629            String alDispStart = (String) hitProperties.get("query_al_display_start");
630
631            // Query sequence startPosition attribute
632            qName.setQName("startPosition");
633            attributes.addAttribute(qName.getURI(),
634                                    qName.getLocalName(),
635                                    qName.getQName(),
636                                    "CDATA",
637                                    alStart);
638
639            // Query sequence stopPosition attribute
640            qName.setQName("stopPosition");
641            attributes.addAttribute(qName.getURI(),
642                                    qName.getLocalName(),
643                                    qName.getQName(),
644                                    "CDATA",
645                                    alStop);
646
647            // Start the QuerySequence
648            startElement(new QName(this, this.prefix("QuerySequence")),
649                         (Attributes) attributes);
650
651            seqTokens.setLength(0);
652            seqTokens.append((String) hitProperties.get("querySeqTokens"));
653
654            // Fasta includes context sequence which we need to trim
655            stringOut = prepSeqTokens(seqTokens,
656                                      Integer.parseInt(alStart),
657                                      Integer.parseInt(alStop),
658                                      Integer.parseInt(alDispStart));
659
660            charOut = new char [stringOut.length()];
661            stringOut.getChars(0, stringOut.length(), charOut, 0);
662
663            // Characters of QuerySequence
664            characters(charOut, 0, charOut.length);
665
666            // End the QuerySequence
667            endElement(new QName(this, this.prefix("QuerySequence")));
668
669            attributes.clear();
670            // Whitespace attribute for MatchConsensus
671            qName.setQName("xml:space");
672            attributes.addAttribute(qName.getURI(),
673                                    qName.getLocalName(),
674                                    qName.getQName(),
675                                    "CDATA",
676                                    "preserve");
677
678            // Start the MatchConsensus
679            startElement(new QName(this, this.prefix("MatchConsensus")),
680                         (Attributes) attributes);
681
682            stringOut = ((String) hitProperties.get("matchTokens")).trim();
683            
684            charOut = new char [stringOut.length()];
685            stringOut.getChars(0, stringOut.length(), charOut, 0);
686
687            // Characters of MatchConsensus
688            characters(charOut, 0, charOut.length);
689
690            // End the MatchConsensus
691            endElement(new QName(this, this.prefix("MatchConsensus")));
692
693            alStart     = (String) hitProperties.get("subject_al_start");
694            alStop      = (String) hitProperties.get("subject_al_stop");
695            alDispStart = (String) hitProperties.get("subject_al_display_start");
696
697            attributes.clear();
698            // Hit sequence startPosition attribute
699            qName.setQName("startPosition");
700            attributes.addAttribute(qName.getURI(),
701                                    qName.getLocalName(),
702                                    qName.getQName(),
703                                    "CDATA",
704                                    alStart);
705
706            // Hit sequence stopPosition attribute
707            qName.setQName("stopPosition");
708            attributes.addAttribute(qName.getURI(),
709                                    qName.getLocalName(),
710                                    qName.getQName(),
711                                    "CDATA",
712                                    alStop);
713
714            // Start the HitSequence
715            startElement(new QName(this, this.prefix("HitSequence")),
716                         (Attributes) attributes);
717
718            seqTokens.setLength(0);
719            seqTokens.append((String) hitProperties.get("subjectSeqTokens"));
720
721            // Fasta includes context sequence which we need to trim
722            stringOut = prepSeqTokens(seqTokens,
723                                      Integer.parseInt(alStart),
724                                      Integer.parseInt(alStop),
725                                      Integer.parseInt(alDispStart));
726
727            charOut = new char [stringOut.length()];
728            stringOut.getChars(0, stringOut.length(), charOut, 0);
729
730            // Characters of HitSequence
731            characters(charOut, 0, charOut.length);
732
733            // End the HitSequence
734            endElement(new QName(this, this.prefix("HitSequence")));
735
736            // End the BlastLikeAlignment
737            endElement(new QName(this, this.prefix("BlastLikeAlignment")));
738
739            // End the HSP
740            endElement(new QName(this, this.prefix("HSP")));
741
742            // End the HSPCollection
743            endElement(new QName(this, this.prefix("HSPCollection")));
744
745            // End the hit
746            endElement(new QName(this, this.prefix("Hit")));
747        }
748        catch (SAXException se)
749        {
750            System.err.println("An error occurred while creating SAX events from hit data: ");
751            se.printStackTrace();
752        }
753    }
754
755    /**
756     * <code>countTokens</code> counts up the occurrences of a char in
757     * a <code>String</code>.
758     *
759     * @param token a <code>char</code> to count.
760     * @param string a <code>String</code> to count within.
761     *
762     * @return a <code>String</code> representation of the total count.
763     */
764    private String countTokens(char token, String string)
765    {
766        int count = 0;
767        for (int i = string.length(); --i >= 0;)
768        {
769            if (string.charAt(i) == token)
770                count++;
771        }
772        return String.valueOf(count);
773    }
774
775    /**
776     * The <code>prepSeqTokens</code> method prepares the sequence
777     * data extracted from the Fasta output. Two things need to be
778     * done; firstly, the leading gaps are removed from the sequence
779     * (these are just format padding and not really part of the
780     * alignment) and secondly, as Fasta supplies some flanking
781     * sequence context for its alignments, this must be removed
782     * too. See the Fasta documentation for an explanation of the
783     * format.
784     *
785     * @param name a <code>StringBuffer</code> containing the
786     * unprepared sequence tokens.
787     * @param alStart an <code>int</code> indicating the start
788     * position of the alignment in the original sequence.
789     * @param alStop an <code>int</code> indicating the stop
790     * position of the alignment in the original sequence.
791     * @param alDispStart an <code>int</code> indicating the start
792     * of a flanking context in the original sequence.
793     *
794     * @return a <code>String</code> value consisting of a subsequence
795     * containing only the interesting alignment.
796     */
797    private String prepSeqTokens(StringBuffer seqTokens,
798                                 int          alStart,
799                                 int          alStop,
800                                 int          alDispStart)
801    {
802        // Strip leading gap characters
803        while (seqTokens.charAt(0) == '-')
804            seqTokens.deleteCharAt(0);
805        
806        int gapCount = 0;
807        // Count gaps to add to number of chars returned
808        for (int i = seqTokens.length(); --i >= 0;)
809        {
810            if (seqTokens.charAt(i) == '-')
811                gapCount++;
812        }
813
814        // Calculate the position at which the real alignment
815        // starts/stops, allowing for the gaps, which are not counted
816        // in the numbering system
817        return seqTokens.substring(alStart - alDispStart,
818                                   alStop  - alDispStart + gapCount + 1);
819    }
820}