001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojavax.bio.seq.io;
023
024import java.io.BufferedInputStream;
025import java.io.BufferedReader;
026import java.io.File;
027import java.io.FileReader;
028import java.io.IOException;
029import java.io.InputStreamReader;
030import java.io.PrintStream;
031import java.io.PrintWriter;
032import java.util.ArrayList;
033import java.util.Iterator;
034import java.util.List;
035import java.util.Set;
036import java.util.TreeSet;
037import java.util.regex.Matcher;
038import java.util.regex.Pattern;
039
040import javax.xml.parsers.ParserConfigurationException;
041
042import org.biojava.bio.seq.Sequence;
043import org.biojava.bio.seq.io.ParseException;
044import org.biojava.bio.seq.io.SeqIOListener;
045import org.biojava.bio.seq.io.SymbolTokenization;
046import org.biojava.bio.symbol.IllegalSymbolException;
047import org.biojava.bio.symbol.SimpleSymbolList;
048import org.biojava.bio.symbol.Symbol;
049import org.biojava.bio.symbol.SymbolList;
050import org.biojava.ontology.Term;
051import org.biojava.utils.ChangeVetoException;
052import org.biojava.utils.xml.PrettyXMLWriter;
053import org.biojava.utils.xml.XMLWriter;
054import org.biojavax.Comment;
055import org.biojavax.CrossRef;
056import org.biojavax.DocRef;
057import org.biojavax.DocRefAuthor;
058import org.biojavax.Namespace;
059import org.biojavax.Note;
060import org.biojavax.RankedCrossRef;
061import org.biojavax.RankedDocRef;
062import org.biojavax.RichAnnotation;
063import org.biojavax.RichObjectFactory;
064import org.biojavax.SimpleCrossRef;
065import org.biojavax.SimpleDocRef;
066import org.biojavax.SimpleDocRefAuthor;
067import org.biojavax.SimpleNote;
068import org.biojavax.SimpleRankedCrossRef;
069import org.biojavax.SimpleRankedDocRef;
070import org.biojavax.SimpleRichAnnotation;
071import org.biojavax.bio.seq.Position;
072import org.biojavax.bio.seq.RichFeature;
073import org.biojavax.bio.seq.RichLocation;
074import org.biojavax.bio.seq.RichSequence;
075import org.biojavax.bio.seq.SimplePosition;
076import org.biojavax.bio.seq.SimpleRichLocation;
077import org.biojavax.bio.taxa.NCBITaxon;
078import org.biojavax.bio.taxa.SimpleNCBITaxon;
079import org.biojavax.ontology.ComparableTerm;
080import org.biojavax.utils.StringTools;
081import org.biojavax.utils.XMLTools;
082import org.xml.sax.Attributes;
083import org.xml.sax.SAXException;
084import org.xml.sax.helpers.DefaultHandler;
085
086/**
087 * Format reader for INSDseq files. This version of INSDseq format will generate
088 * and write RichSequence objects. Loosely Based on code from the old, deprecated,
089 * org.biojava.bio.seq.io.GenbankXmlFormat object.
090 *
091 * Understands http://www.insdc.org/files/documents/INSD_V1.4.dtd
092 * 
093 * Does NOT understand the "sites" keyword in INSDReference_position. Interprets
094 * this instead as an empty location. This is because
095 * there is no obvious way of representing the "sites" keyword in BioSQL.
096 * 
097 * Note also that the INSDInterval tags and associate stuff are not read, as
098 * this is duplicate information to the INSDFeature_location tag which is
099 * already fully parsed. However, they are written on output, although there is
100 * no guarantee that the INSDInterval tags will exactly match the 
101 * INSDFeature_location tag as it is not possible to exactly reflect its contents
102 * using these.
103 *
104 * @author Alan Li (code based on his work)
105 * @author Richard Holland
106 * @author George Waldon
107 * @since 1.5
108 */
109public class INSDseqFormat extends RichSequenceFormat.BasicFormat {
110    
111    // Register this format with the format auto-guesser.
112    static {
113        RichSequence.IOTools.registerFormat(INSDseqFormat.class);
114    }
115    
116    /**
117     * The name of this format
118     */
119    public static final String INSDSEQ_FORMAT = "INSDseq";
120    
121    protected static final String INSDSEQS_GROUP_TAG = "INSDSet";
122    protected static final String INSDSEQ_TAG = "INSDSeq";
123    
124    protected static final String LOCUS_TAG = "INSDSeq_locus";
125    protected static final String LENGTH_TAG = "INSDSeq_length";
126    protected static final String TOPOLOGY_TAG = "INSDSeq_topology";
127    protected static final String STRANDED_TAG = "INSDSeq_strandedness";
128    protected static final String MOLTYPE_TAG = "INSDSeq_moltype";
129    protected static final String DIVISION_TAG = "INSDSeq_division";
130    protected static final String UPDATE_DATE_TAG = "INSDSeq_update-date";
131    protected static final String CREATE_DATE_TAG = "INSDSeq_create-date";
132    protected static final String UPDATE_REL_TAG = "INSDSeq_update-release";
133    protected static final String CREATE_REL_TAG = "INSDSeq_create-release";
134    protected static final String DEFINITION_TAG = "INSDSeq_definition";
135    protected static final String DATABASE_XREF_TAG = "INSDSeq_database-reference";
136    protected static final String XREF_TAG = "INSDXref";
137    
138    protected static final String ACCESSION_TAG = "INSDSeq_primary-accession";
139    protected static final String ACC_VERSION_TAG = "INSDSeq_accession-version";
140    protected static final String SECONDARY_ACCESSIONS_GROUP_TAG = "INSDSeq_secondary-accessions";
141    protected static final String SECONDARY_ACCESSION_TAG = "INSDSecondary-accn";
142    protected static final String OTHER_SEQIDS_GROUP_TAG = "INSDSeq_other-seqids";
143    protected static final String OTHER_SEQID_TAG = "INSDSeqid";
144    
145    protected static final String KEYWORDS_GROUP_TAG = "INSDSeq_keywords";
146    protected static final String KEYWORD_TAG = "INSDKeyword";
147    
148    protected static final String SOURCE_TAG = "INSDSeq_source";
149    protected static final String ORGANISM_TAG = "INSDSeq_organism";
150    protected static final String TAXONOMY_TAG = "INSDSeq_taxonomy";
151    
152    protected static final String REFERENCES_GROUP_TAG = "INSDSeq_references";
153    protected static final String REFERENCE_TAG = "INSDReference";
154    protected static final String REFERENCE_LOCATION_TAG = "INSDReference_reference";
155    protected static final String REFERENCE_POSITION_TAG = "INSDReference_position";
156    protected static final String TITLE_TAG = "INSDReference_title";
157    protected static final String JOURNAL_TAG = "INSDReference_journal";
158    protected static final String PUBMED_TAG = "INSDReference_pubmed";
159    protected static final String XREF_DBNAME_TAG = "INSDXref_dbname";
160    protected static final String XREF_ID_TAG = "INSDXref_id";
161    protected static final String REMARK_TAG = "INSDReference_remark";
162    protected static final String AUTHORS_GROUP_TAG = "INSDReference_authors";
163    protected static final String AUTHOR_TAG = "INSDAuthor";
164    protected static final String CONSORTIUM_TAG = "INSDReference_consortium";
165    
166    protected static final String COMMENT_TAG = "INSDSeq_comment";
167
168    protected static final String FEATURES_GROUP_TAG = "INSDSeq_feature-table";
169    protected static final String FEATURE_TAG = "INSDFeature";
170    protected static final String FEATURE_KEY_TAG = "INSDFeature_key";
171    protected static final String FEATURE_LOC_TAG = "INSDFeature_location";
172    protected static final String FEATURE_INTERVALS_GROUP_TAG = "INSDFeature_intervals";
173    protected static final String FEATURE_INTERVAL_TAG = "INSDInterval";
174    protected static final String FEATURE_FROM_TAG = "INSDInterval_from";
175    protected static final String FEATURE_TO_TAG = "INSDInterval_to";
176    protected static final String FEATURE_POINT_TAG = "INSDInterval_point";
177    protected static final String FEATURE_ISCOMP_TAG = "INSDInterval_iscomp";
178    protected static final String FEATURE_INTERBP_TAG = "INSDInterval_interbp";
179    protected static final String FEATURE_ACCESSION_TAG = "INSDInterval_accession";
180    protected static final String FEATURE_OPERATOR_TAG = "INSDFeature_operator";
181    protected static final String FEATURE_PARTIAL5_TAG = "INSDFeature_partial5";
182    protected static final String FEATURE_PARTIAL3_TAG = "INSDFeature_partial3";
183    protected static final String FEATUREQUALS_GROUP_TAG = "INSDFeature_quals";
184    protected static final String FEATUREQUAL_TAG = "INSDQualifier";
185    protected static final String FEATUREQUAL_NAME_TAG = "INSDQualifier_name";
186    protected static final String FEATUREQUAL_VALUE_TAG = "INSDQualifier_value";
187    
188    protected static final String SEQUENCE_TAG = "INSDSeq_sequence";
189    protected static final String CONTIG_TAG = "INSDSeq_contig";
190    
191    // dbxref line
192    protected static final Pattern dbxp = Pattern.compile("^([^:]+):(\\S+)$");
193    
194    protected static final Pattern xmlSchema = Pattern.compile(".*http://www\\.ebi\\.ac\\.uk/dtd/INSD_INSDSeq\\.dtd.*");
195    
196    /**
197     * Implements some INSDseq-specific terms.
198     */
199    public static class Terms extends RichSequence.Terms {               
200        /**
201         * Getter for the INSDseq term
202         * @return The INSDseq Term
203         */
204        public static ComparableTerm getOtherSeqIdTerm() {
205            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("OtherSeqID");
206        }
207        
208        /**
209         * Getter for the INSDseq term
210         * @return The INSDseq Term
211         */
212        public static ComparableTerm getINSDseqTerm() {
213            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("INSDseq");
214        }
215    }
216    
217    /**
218     * {@inheritDoc}
219     * A file is in INSDseq format if the second XML line contains the phrase "http://www.ebi.ac.uk/dtd/INSD_INSDSeq.dtd".
220     */
221    public boolean canRead(File file) throws IOException {
222        BufferedReader br = new BufferedReader(new FileReader(file));
223        br.readLine(); // skip first line
224        String secondLine = br.readLine();
225        boolean readable = secondLine!=null && xmlSchema.matcher(secondLine).matches(); // check on second line
226        br.close();
227        return readable;
228    }
229    
230    /**
231     * {@inheritDoc}
232     * Always returns a DNA tokenizer.
233     */
234    public SymbolTokenization guessSymbolTokenization(File file) throws IOException {
235        return RichSequence.IOTools.getDNAParser();
236    }
237    
238    /**
239     * {@inheritDoc}
240     * A stream is in INSDseq format if the second XML line contains the phrase "http://www.ebi.ac.uk/dtd/INSD_INSDSeq.dtd".
241     */
242    public boolean canRead(BufferedInputStream stream) throws IOException {
243        stream.mark(2000); // some streams may not support this
244        BufferedReader br = new BufferedReader(new InputStreamReader(stream));
245        br.readLine(); // skip first line
246        String secondLine = br.readLine();
247        boolean readable = secondLine!=null && xmlSchema.matcher(secondLine).matches(); // check on second line
248        // don't close the reader as it'll close the stream too.
249        // br.close();
250        stream.reset();
251        return readable;
252    }
253    
254    /**
255     * {@inheritDoc}
256     * Always returns a DNA tokenizer.
257     */
258    public SymbolTokenization guessSymbolTokenization(BufferedInputStream stream) throws IOException {
259        return RichSequence.IOTools.getDNAParser();
260    }
261    
262    /**
263     * {@inheritDoc}
264     */
265    public boolean readSequence(BufferedReader reader,
266            SymbolTokenization symParser,
267            SeqIOListener listener)
268            throws IllegalSymbolException, IOException, ParseException {
269        if (!(listener instanceof RichSeqIOListener)) throw new IllegalArgumentException("Only accepting RichSeqIOListeners today");
270        return this.readRichSequence(reader,symParser,(RichSeqIOListener)listener,null);
271    }
272    
273    /**
274     * {@inheritDoc}
275     */
276    public boolean readRichSequence(BufferedReader reader,
277            SymbolTokenization symParser,
278            RichSeqIOListener rlistener,
279            Namespace ns)
280            throws IllegalSymbolException, IOException, ParseException {
281        
282        try {
283            DefaultHandler m_handler = new INSDseqHandler(this,symParser,rlistener,ns);
284            return XMLTools.readXMLChunk(reader, m_handler, INSDSEQ_TAG);
285        } catch (ParserConfigurationException e) {
286            throw new ParseException(e);
287        } catch (SAXException e) {
288            throw new ParseException(e);
289        }
290    }
291    
292    private PrintWriter pw;
293    private XMLWriter xmlWriter;
294    
295    private XMLWriter getXMLWriter() {
296        if(xmlWriter==null) {
297            // make an XML writer
298            pw = new PrintWriter(this.getPrintStream());
299            xmlWriter = new PrettyXMLWriter(pw);
300        }
301        return xmlWriter;
302    }
303    
304    /**
305     * {@inheritDoc}
306     */
307    public void beginWriting() throws IOException {
308        XMLWriter xml = getXMLWriter();
309        xml.printRaw("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>");
310        xml.printRaw("<!DOCTYPE INSDSeq PUBLIC \"-//EMBL-EBI//INSD INSDSeq/EN\" \"http://www.insdc.org/files/documents/INSD_V1.4.dtd\">");
311        xml.openTag(INSDSEQS_GROUP_TAG);
312    }
313    
314    /**
315     * {@inheritDoc}
316     */
317    public void finishWriting() throws IOException {
318        XMLWriter xml = getXMLWriter();
319        xml.closeTag(INSDSEQS_GROUP_TAG);
320        pw.flush();
321    }
322    
323    /**
324     * {@inheritDoc}
325     */
326    public void writeSequence(Sequence seq, PrintStream os) throws IOException {
327        if (this.getPrintStream()==null) this.setPrintStream(this.getPrintStream());
328        this.writeSequence(seq, RichObjectFactory.getDefaultNamespace());
329    }
330    
331    /**
332     * {@inheritDoc}
333     */
334    public void writeSequence(Sequence seq, String format, PrintStream os) throws IOException {
335        if (this.getPrintStream()==null) this.setPrintStream(this.getPrintStream());
336        if (!format.equals(this.getDefaultFormat())) throw new IllegalArgumentException("Unknown format: "+format);
337        this.writeSequence(seq, RichObjectFactory.getDefaultNamespace());
338    }
339    
340    /**
341     * {@inheritDoc}
342     * Namespace is ignored as INSDseq has no concept of it.
343     */
344    public void writeSequence(Sequence seq, Namespace ns) throws IOException {
345        RichSequence rs;
346        try {
347            if (seq instanceof RichSequence) rs = (RichSequence)seq;
348            else rs = RichSequence.Tools.enrich(seq);
349        } catch (ChangeVetoException e) {
350            throw new IOException("Unable to enrich sequence", e);
351        }
352        
353        Set<Note> notes = rs.getNoteSet();
354        List accessions = new ArrayList();
355        List otherSeqIDs = new ArrayList();
356        List kws = new ArrayList();
357        String stranded = null;
358        String udat = null;
359        String cdat = null;
360        String urel = null;
361        String crel = null;
362        String moltype = rs.getAlphabet().getName();
363        for (Iterator<Note> i = notes.iterator(); i.hasNext();) {
364            Note n = i.next();
365            if (n.getTerm().equals(Terms.getStrandedTerm())) stranded=n.getValue();
366            else if (n.getTerm().equals(Terms.getDateUpdatedTerm())) udat=n.getValue();
367            else if (n.getTerm().equals(Terms.getDateCreatedTerm())) cdat=n.getValue();
368            else if (n.getTerm().equals(Terms.getRelUpdatedTerm())) urel=n.getValue();
369            else if (n.getTerm().equals(Terms.getRelCreatedTerm())) crel=n.getValue();
370            else if (n.getTerm().equals(Terms.getMolTypeTerm())) moltype=n.getValue();
371            else if (n.getTerm().equals(Terms.getAdditionalAccessionTerm())) accessions.add(n.getValue());
372            else if (n.getTerm().equals(Terms.getOtherSeqIdTerm())) otherSeqIDs.add(n.getValue());
373            else if (n.getTerm().equals(Terms.getKeywordTerm())) kws.add(n.getValue());
374        }
375               
376        XMLWriter xml = getXMLWriter();
377        xml.openTag(INSDSEQ_TAG);
378        
379        xml.openTag(LOCUS_TAG);
380        xml.print(rs.getName());
381        xml.closeTag(LOCUS_TAG);
382        
383        xml.openTag(LENGTH_TAG);
384        xml.print(""+rs.length());
385        xml.closeTag(LENGTH_TAG);
386        
387        if (stranded!=null) {
388            xml.openTag(STRANDED_TAG);
389            xml.print(stranded);
390            xml.closeTag(STRANDED_TAG);
391        }
392        
393        if (moltype!=null) {
394            xml.openTag(MOLTYPE_TAG);
395            xml.print(moltype);
396            xml.closeTag(MOLTYPE_TAG);
397        }
398        
399        xml.openTag(TOPOLOGY_TAG);
400        if (rs.getCircular()) xml.print("circular");
401        else xml.print("linear");
402        xml.closeTag(TOPOLOGY_TAG);
403        
404        if (rs.getDivision()!=null) {
405            xml.openTag(DIVISION_TAG);
406            xml.print(rs.getDivision());
407            xml.closeTag(DIVISION_TAG);
408        }
409        
410        xml.openTag(UPDATE_DATE_TAG);
411        xml.print(udat);
412        xml.closeTag(UPDATE_DATE_TAG);
413        
414        if(cdat!=null) {
415            xml.openTag(CREATE_DATE_TAG);
416            xml.print(cdat);
417            xml.closeTag(CREATE_DATE_TAG);
418        }
419        
420        if (urel!=null) {
421            xml.openTag(UPDATE_REL_TAG);
422            xml.print(urel);
423            xml.closeTag(UPDATE_REL_TAG);
424        }
425        
426        if (crel!=null) {
427            xml.openTag(CREATE_REL_TAG);
428            xml.print(crel);
429            xml.closeTag(CREATE_REL_TAG);
430        }
431        
432        if (rs.getDescription()!=null) {
433            xml.openTag(DEFINITION_TAG);
434            xml.print(rs.getDescription());
435            xml.closeTag(DEFINITION_TAG);
436        }
437        
438        xml.openTag(ACC_VERSION_TAG);
439        xml.print(rs.getAccession()+"."+rs.getVersion());
440        xml.closeTag(ACC_VERSION_TAG);
441        
442        if (!otherSeqIDs.isEmpty()) {
443            xml.openTag(OTHER_SEQIDS_GROUP_TAG);
444            for (Iterator i = otherSeqIDs.iterator(); i.hasNext(); ) {
445                
446                xml.openTag(OTHER_SEQID_TAG);
447                xml.print((String)i.next());
448                xml.closeTag(OTHER_SEQID_TAG);
449                
450            }
451            xml.closeTag(OTHER_SEQIDS_GROUP_TAG);
452        }
453        
454        if (!accessions.isEmpty()) {
455            xml.openTag(SECONDARY_ACCESSIONS_GROUP_TAG);
456            for (Iterator i = accessions.iterator(); i.hasNext(); ) {
457                
458                xml.openTag(SECONDARY_ACCESSION_TAG);
459                xml.print((String)i.next());
460                xml.closeTag(SECONDARY_ACCESSION_TAG);
461                
462            }
463            xml.closeTag(SECONDARY_ACCESSIONS_GROUP_TAG);
464        }
465        
466        if (!kws.isEmpty()) {
467            xml.openTag(KEYWORDS_GROUP_TAG);
468            for (Iterator i = kws.iterator(); i.hasNext(); ) {
469                xml.openTag(KEYWORD_TAG);
470                xml.print((String)i.next());
471                xml.closeTag(KEYWORD_TAG);
472            }
473            xml.closeTag(KEYWORDS_GROUP_TAG);
474        }
475        
476        NCBITaxon tax = rs.getTaxon();
477        if (tax!=null) {
478            xml.openTag(SOURCE_TAG);
479            xml.print(tax.getDisplayName());
480            xml.closeTag(SOURCE_TAG);
481            
482            xml.openTag(ORGANISM_TAG);
483            xml.print(tax.getDisplayName().split("\\(")[0].trim());
484            xml.closeTag(ORGANISM_TAG);
485            
486            xml.openTag(TAXONOMY_TAG);
487            String h = tax.getNameHierarchy();
488            xml.print(h.substring(0, h.length()-1)); // chomp dot
489            xml.closeTag(TAXONOMY_TAG);
490        }
491        
492        // references - rank (bases x to y)
493        if (!rs.getRankedDocRefs().isEmpty()) {
494            xml.openTag(REFERENCES_GROUP_TAG);
495            for (Iterator<RankedDocRef> r = rs.getRankedDocRefs().iterator(); r.hasNext();) {
496                xml.openTag(REFERENCE_TAG);
497                
498                RankedDocRef rdr = r.next();
499                DocRef d = rdr.getDocumentReference();
500                
501                xml.openTag(REFERENCE_LOCATION_TAG);
502                xml.print(Integer.toString(rdr.getRank()));
503                xml.closeTag(REFERENCE_LOCATION_TAG);
504                
505                RichLocation rdrl = rdr.getLocation();
506                if(!rdrl.equals(RichLocation.EMPTY_LOCATION)) {
507                    xml.openTag(REFERENCE_POSITION_TAG);
508                    for (Iterator i = rdrl.blockIterator(); i.hasNext(); ) {
509                            RichLocation l = (RichLocation)i.next();
510                            xml.print(l.getMin()+".."+l.getMax());
511                            if (i.hasNext()) xml.print("; ");
512                    }
513                    xml.closeTag(REFERENCE_POSITION_TAG);
514                }
515                
516                xml.openTag(AUTHORS_GROUP_TAG);
517                List<DocRefAuthor> auths = d.getAuthorList();
518                for (Iterator<DocRefAuthor> i = auths.iterator(); i.hasNext(); ) {
519                    DocRefAuthor a = i.next();
520                    if (!a.isConsortium()) {
521                        xml.openTag(AUTHOR_TAG);
522                        xml.print(a.getName());
523                        xml.closeTag(AUTHOR_TAG);
524                        i.remove();
525                    }
526                }
527                xml.closeTag(AUTHORS_GROUP_TAG);
528                if (!auths.isEmpty()) { // only consortia left in the set now
529                    DocRefAuthor a = auths.iterator().next(); // take the first one only
530                    xml.openTag(CONSORTIUM_TAG);
531                    xml.print(a.getName());
532                    xml.closeTag(CONSORTIUM_TAG);
533                }
534                
535                if (d.getTitle()!=null) {
536                    xml.openTag(TITLE_TAG);
537                    xml.print(d.getTitle());
538                    xml.closeTag(TITLE_TAG);
539                }
540                
541                xml.openTag(JOURNAL_TAG);
542                xml.print(d.getLocation());
543                xml.closeTag(JOURNAL_TAG);
544                
545                CrossRef c = d.getCrossref();
546                if (c!=null) {
547                    if (c.getDbname().equals(Terms.PUBMED_KEY)) {
548                        xml.openTag(PUBMED_TAG);
549                        xml.print(c.getAccession());
550                        xml.closeTag(PUBMED_TAG);
551                    } else {
552                        xml.openTag(XREF_TAG);
553                        xml.openTag(XREF_DBNAME_TAG);
554                        xml.print(c.getDbname());
555                        xml.closeTag(XREF_DBNAME_TAG);
556                        xml.openTag(XREF_ID_TAG);
557                        xml.print(c.getAccession());
558                        xml.closeTag(XREF_ID_TAG);
559                        xml.closeTag(XREF_TAG);
560                    }
561                }
562                
563                if (d.getRemark()!=null) {
564                    xml.openTag(REMARK_TAG);
565                    xml.print(d.getRemark());
566                    xml.closeTag(REMARK_TAG);
567                }
568                
569                xml.closeTag(REFERENCE_TAG);
570            }
571            xml.closeTag(REFERENCES_GROUP_TAG);
572        }
573        
574        if (!rs.getComments().isEmpty()) {
575            xml.openTag(COMMENT_TAG);
576            for (Iterator<Comment> i = rs.getComments().iterator(); i.hasNext(); ) xml.println(((Comment)i.next()).getComment());
577            xml.closeTag(COMMENT_TAG);
578        }
579        
580        
581        // db references - only first one is output
582        if (!rs.getRankedCrossRefs().isEmpty()) {
583            Iterator<RankedCrossRef> r = rs.getRankedCrossRefs().iterator();
584            RankedCrossRef rcr = r.next();
585            CrossRef c = rcr.getCrossRef();
586            Set<Note> noteset = c.getNoteSet();
587            StringBuffer sb = new StringBuffer();
588            sb.append(c.getDbname().toUpperCase());
589            sb.append("; ");
590            sb.append(c.getAccession());
591            boolean hasSecondary = false;
592            for (Iterator<Note> i = noteset.iterator(); i.hasNext(); ) {
593                Note n = i.next();
594                if (n.getTerm().equals(Terms.getAdditionalAccessionTerm())) {
595                    sb.append("; ");
596                    sb.append(n.getValue());
597                    hasSecondary = true;
598                }
599            }
600            //create unnecessary event firing
601            //if (!hasSecondary) sb.append("; -");
602            
603            xml.openTag(DATABASE_XREF_TAG);
604            xml.print(sb.toString());
605            xml.closeTag(DATABASE_XREF_TAG);
606        }
607        
608        if (!rs.getFeatureSet().isEmpty()) {
609            xml.openTag(FEATURES_GROUP_TAG);
610            for (Iterator i = rs.getFeatureSet().iterator(); i.hasNext(); ) {
611                RichFeature f = (RichFeature)i.next();
612                xml.openTag(FEATURE_TAG);
613                
614                xml.openTag(FEATURE_KEY_TAG);
615                xml.print(f.getTypeTerm().getName());
616                xml.closeTag(FEATURE_KEY_TAG);
617                
618                xml.openTag(FEATURE_LOC_TAG);
619                xml.print(GenbankLocationParser.writeLocation((RichLocation)f.getLocation()));
620                xml.closeTag(FEATURE_LOC_TAG);
621                
622                // New in 1.4 - duplicate the location as a 
623                // tree of XML tags.
624                xml.openTag(FEATURE_INTERVALS_GROUP_TAG);
625
626                RichLocation loc = (RichLocation)f.getLocation();
627                boolean first = true;
628                boolean partial5 = false;
629                boolean partial3 = false;
630                Term operator = loc.getTerm();
631                for (Iterator j = loc.blockIterator(); j.hasNext(); ) {
632                    xml.openTag(FEATURE_INTERVAL_TAG);
633                    
634                        RichLocation rl = (RichLocation)j.next();                    
635                        if (rl.getMin()==rl.getMax()) {
636                                xml.openTag(FEATURE_POINT_TAG);
637                                xml.print(""+rl.getMin());
638                                xml.closeTag(FEATURE_POINT_TAG);
639                        } else {
640                                xml.openTag(FEATURE_FROM_TAG);
641                                xml.print(""+rl.getMin());
642                                xml.closeTag(FEATURE_FROM_TAG);
643                                xml.openTag(FEATURE_TO_TAG);
644                                xml.print(""+rl.getMax());
645                                xml.closeTag(FEATURE_TO_TAG);
646                        }
647                        boolean iscomp = rl.getStrand().equals(RichLocation.Strand.NEGATIVE_STRAND);
648                        boolean interbp = 
649                                (rl.getMinPosition().getType()!=null && rl.getMinPosition().getType().equals(Position.BETWEEN_BASES)) || 
650                                (rl.getMaxPosition().getType()!=null && rl.getMaxPosition().getType().equals(Position.BETWEEN_BASES));
651                        if (first && rl.getMinPosition().getFuzzyStart()) partial5 = true;
652                        if (!j.hasNext() && rl.getMaxPosition().getFuzzyEnd()) partial3 = true;
653                        first = false;
654                        
655                    xml.openTag(FEATURE_ISCOMP_TAG);
656                    xml.print(""+iscomp);
657                    xml.closeTag(FEATURE_ISCOMP_TAG);
658                    
659                    xml.openTag(FEATURE_INTERBP_TAG);
660                    xml.print(""+interbp);
661                    xml.closeTag(FEATURE_INTERBP_TAG);
662                    
663                    xml.openTag(FEATURE_ACCESSION_TAG);
664                    xml.print(((RichSequence)f.getSequence()).getAccession());
665                    xml.closeTag(FEATURE_ACCESSION_TAG);
666                    
667                    xml.closeTag(FEATURE_INTERVAL_TAG);
668                }
669                
670                if (operator!=null) {
671                        xml.openTag(FEATURE_OPERATOR_TAG);
672                        xml.print(operator.getName());
673                        xml.closeTag(FEATURE_OPERATOR_TAG);
674                }
675                
676                xml.openTag(FEATURE_PARTIAL5_TAG);
677                xml.print(""+partial5);
678                xml.closeTag(FEATURE_PARTIAL5_TAG);
679                
680                xml.openTag(FEATURE_PARTIAL3_TAG);
681                xml.print(""+partial3);
682                xml.closeTag(FEATURE_PARTIAL3_TAG);
683                
684                xml.closeTag(FEATURE_INTERVALS_GROUP_TAG);
685                
686                xml.openTag(FEATUREQUALS_GROUP_TAG);
687                
688                for (Iterator<Note> j = f.getNoteSet().iterator(); j.hasNext();) {
689                    Note n = j.next();
690                    xml.openTag(FEATUREQUAL_TAG);
691                    
692                    xml.openTag(FEATUREQUAL_NAME_TAG);
693                    xml.print(""+n.getTerm().getName());
694                    xml.closeTag(FEATUREQUAL_NAME_TAG);
695                    
696                    xml.openTag(FEATUREQUAL_VALUE_TAG);
697                    if (n.getValue()!=null && !n.getValue().equals("")) {
698                        if (n.getTerm().getName().equalsIgnoreCase("translation")) {
699                                String[] lines = StringTools.wordWrap(n.getValue(), "\\s+", this.getLineWidth());
700                                for (int k = 0; k < lines.length; k++) xml.println(lines[k]);
701                        } else {
702                                xml.print(n.getValue());
703                        }
704                    }
705                    xml.closeTag(FEATUREQUAL_VALUE_TAG);
706                    
707                    xml.closeTag(FEATUREQUAL_TAG);
708                }
709                // add-in to source feature only organism and db_xref="taxon:xyz" where present
710                if (f.getType().equalsIgnoreCase("source") && tax!=null) {
711                    xml.openTag(FEATUREQUAL_TAG);
712                    
713                    xml.openTag(FEATUREQUAL_NAME_TAG);
714                    xml.print("db_xref");
715                    xml.closeTag(FEATUREQUAL_NAME_TAG);
716                    
717                    xml.openTag(FEATUREQUAL_VALUE_TAG);
718                    xml.print("taxon:"+tax.getNCBITaxID());
719                    xml.closeTag(FEATUREQUAL_VALUE_TAG);
720                    
721                    xml.closeTag(FEATUREQUAL_TAG);
722                    
723                    String displayName = tax.getDisplayName();
724                    if (displayName.indexOf('(')>-1) displayName = displayName.substring(0, displayName.indexOf('(')).trim();
725                    
726                    xml.openTag(FEATUREQUAL_TAG);
727                    
728                    xml.openTag(FEATUREQUAL_NAME_TAG);
729                    xml.print("organism");
730                    xml.closeTag(FEATUREQUAL_NAME_TAG);
731                    
732                    xml.openTag(FEATUREQUAL_VALUE_TAG);
733                    xml.print(displayName);
734                    xml.closeTag(FEATUREQUAL_VALUE_TAG);
735                    
736                    xml.closeTag(FEATUREQUAL_TAG);
737                }
738                // add-in other dbxrefs where present
739                for (Iterator<RankedCrossRef> j = f.getRankedCrossRefs().iterator(); j.hasNext();) {
740                    RankedCrossRef rcr = j.next();
741                    CrossRef cr = rcr.getCrossRef();
742                    xml.openTag(FEATUREQUAL_TAG);
743                    
744                    xml.openTag(FEATUREQUAL_NAME_TAG);
745                    xml.print("db_xref");
746                    xml.closeTag(FEATUREQUAL_NAME_TAG);
747                    
748                    xml.openTag(FEATUREQUAL_VALUE_TAG);
749                    xml.print(cr.getDbname()+":"+cr.getAccession());
750                    xml.closeTag(FEATUREQUAL_VALUE_TAG);
751                    
752                    xml.closeTag(FEATUREQUAL_TAG);
753                }
754                xml.closeTag(FEATUREQUALS_GROUP_TAG);
755                
756                xml.closeTag(FEATURE_TAG);
757            }
758            xml.closeTag(FEATURES_GROUP_TAG);
759        }
760        
761        xml.openTag(SEQUENCE_TAG);
762        String[] lines = StringTools.wordWrap(rs.seqString(), "\\s+", this.getLineWidth());
763        for (int i = 0; i < lines.length; i ++) xml.println(lines[i]);
764        xml.closeTag(SEQUENCE_TAG);
765        
766        xml.closeTag(INSDSEQ_TAG);
767        
768        pw.flush();
769    }
770    
771    /**
772     * {@inheritDoc}
773     */
774    public String getDefaultFormat() {
775        return INSDSEQ_FORMAT;
776    }
777    
778    // SAX event handler for parsing http://www.ebi.ac.uk/embl/Documentation/DTD/INSDSeq_v1.3.dtd.txt
779    private class INSDseqHandler extends DefaultHandler {
780        
781        private RichSequenceFormat parent;
782        private SymbolTokenization symParser;
783        private RichSeqIOListener rlistener;
784        private Namespace ns;
785        private StringBuffer m_currentString;
786        
787        private NCBITaxon tax;
788        private String organism;
789        private String accession;
790        private RichFeature.Template templ;
791        private String currFeatQual;
792        private String currRefLocation;
793        private List currRefAuthors;
794        private String currRefTitle;
795        private String currRefJournal;
796        private String currRefPubmed;
797        private String currRefRemark;
798        private String currRefPosition;
799        private String currRefXrefDBName;
800        private String currRefXrefID;
801        private List currRefXrefs;
802        private int rcrossrefCount;
803        
804        // construct a new handler that will populate the given list of sequences
805        private INSDseqHandler(RichSequenceFormat parent,
806                SymbolTokenization symParser,
807                RichSeqIOListener rlistener,
808                Namespace ns) {
809            this.parent = parent;
810            this.symParser = symParser;
811            this.rlistener = rlistener;
812            this.ns = ns;
813            this.m_currentString = new StringBuffer();
814        }
815        
816        // process an opening tag
817        public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
818            if (qName.equals(INSDSEQ_TAG)) {
819                try {
820                    rlistener.startSequence();
821                    if (ns==null) ns=RichObjectFactory.getDefaultNamespace();
822                    rlistener.setNamespace(ns);
823                } catch (ParseException e) {
824                    throw new SAXException(e);
825                }
826            } else if (qName.equals(REFERENCE_TAG) && !this.parent.getElideReferences()) {
827                currRefLocation = null;
828                currRefPosition = null;
829                currRefAuthors = new ArrayList();
830                currRefTitle = null;
831                currRefJournal = null;
832                currRefPubmed = null;
833                currRefRemark = null;
834                currRefXrefs = new ArrayList();
835            } else if (qName.equals(XREF_TAG) && !this.parent.getElideReferences()) {
836                currRefXrefDBName = null;
837                currRefXrefID = null;
838            } else if (qName.equals(FEATURE_TAG) && !this.parent.getElideFeatures()) {
839                templ = new RichFeature.Template();
840                templ.annotation = new SimpleRichAnnotation();
841                templ.sourceTerm = Terms.getINSDseqTerm();
842                templ.featureRelationshipSet = new TreeSet();
843                templ.rankedCrossRefs = new TreeSet();
844            }
845        }
846        
847        // process a closing tag - we will have read the text already
848        public void endElement(String uri, String localName, String qName) throws SAXException {
849            String val = this.m_currentString.toString().trim();
850            
851            try {
852                if (qName.equals(LOCUS_TAG))
853                    rlistener.setName(val);
854                else if (qName.equals(ACCESSION_TAG)) {
855                    accession = val;
856                    rlistener.setAccession(accession);
857                } else if (qName.equals(ACC_VERSION_TAG)) {
858                    String parts[] = val.split("\\.");
859                    accession = parts[0];
860                    rlistener.setAccession(accession);
861                    if (parts.length>1) rlistener.setVersion(Integer.parseInt(parts[1]));
862                } else if (qName.equals(SECONDARY_ACCESSION_TAG)) {
863                    rlistener.addSequenceProperty(Terms.getAdditionalAccessionTerm(),val);
864                } else if (qName.equals(OTHER_SEQID_TAG)) {
865                    rlistener.addSequenceProperty(Terms.getOtherSeqIdTerm(),val);
866                } else if (qName.equals(DIVISION_TAG)) {
867                    rlistener.setDivision(val);
868                } else if (qName.equals(MOLTYPE_TAG)) {
869                    rlistener.addSequenceProperty(Terms.getMolTypeTerm(),val);
870                } else if (qName.equals(UPDATE_DATE_TAG)) {
871                    rlistener.addSequenceProperty(Terms.getDateUpdatedTerm(),val);
872                } else if (qName.equals(UPDATE_REL_TAG)) {
873                    rlistener.addSequenceProperty(Terms.getRelUpdatedTerm(),val);
874                } else if (qName.equals(CREATE_DATE_TAG)) {
875                    rlistener.addSequenceProperty(Terms.getDateCreatedTerm(),val);
876                } else if (qName.equals(CREATE_REL_TAG)) {
877                    rlistener.addSequenceProperty(Terms.getRelCreatedTerm(),val);
878                } else if (qName.equals(STRANDED_TAG)) {
879                    rlistener.addSequenceProperty(Terms.getStrandedTerm(),val);
880                } else if (qName.equals(TOPOLOGY_TAG)) {
881                    if ("circular".equals(val)) rlistener.setCircular(true);
882                } else if (qName.equals(DEFINITION_TAG)) {
883                    rlistener.setDescription(val);
884                } else if (qName.equals(KEYWORD_TAG)) {
885                    rlistener.addSequenceProperty(Terms.getKeywordTerm(), val);
886                } else if (qName.equals(COMMENT_TAG) && !this.parent.getElideComments()) {
887                    rlistener.setComment(val);
888                } else if (qName.equals(DATABASE_XREF_TAG)) {
889                    // database_identifier; primary_identifier; secondary_identifier....
890                    String[] parts = val.split(";");
891                    // construct a DBXREF out of the dbname part[0] and accession part[1]
892                    CrossRef crossRef = (CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{parts[0].trim(),parts[1].trim(), new Integer(0)});
893                    // assign remaining bits of info as annotations
894                    for (int j = 2; j < parts.length; j++) {
895                        Note note = new SimpleNote(Terms.getAdditionalAccessionTerm(),parts[j].trim(),j-1);
896                        try {
897                            crossRef.getRichAnnotation().addNote(note);
898                        } catch (ChangeVetoException ce) {
899                            ParseException pe = new ParseException("Could not annotate identifier terms");
900                            pe.initCause(ce);
901                            throw pe;
902                        }
903                    }
904                    RankedCrossRef rcrossRef = new SimpleRankedCrossRef(crossRef, 0);
905                    rlistener.setRankedCrossRef(rcrossRef);
906                } else if (qName.equals(SEQUENCE_TAG) && !this.parent.getElideSymbols()) {
907                    try {
908                        SymbolList sl = new SimpleSymbolList(symParser,
909                                val.replaceAll("\\s+","").replaceAll("[\\.|~]","-"));
910                        rlistener.addSymbols(symParser.getAlphabet(),
911                                (Symbol[])(sl.toList().toArray(new Symbol[0])),
912                                0, sl.length());
913                    } catch (Exception e) {
914                        throw new ParseException(e);
915                    }
916                } else if (qName.equals(CONTIG_TAG))
917                    throw new SAXException("Cannot handle contigs yet");
918                else if (qName.equals(REFERENCE_LOCATION_TAG) && !this.parent.getElideReferences()) {
919                    currRefLocation = val;
920                } else if (qName.equals(REFERENCE_POSITION_TAG) && !this.parent.getElideReferences()) {
921                    currRefPosition = val;
922                } else if (qName.equals(AUTHOR_TAG) && !this.parent.getElideReferences()) {
923                    currRefAuthors.add(new SimpleDocRefAuthor(val,false,false));
924                } else if (qName.equals(CONSORTIUM_TAG) && !this.parent.getElideReferences()) {
925                    currRefAuthors.add(new SimpleDocRefAuthor(val,true,false));
926                } else if (qName.equals(TITLE_TAG) && !this.parent.getElideReferences()) {
927                    currRefTitle = val;
928                } else if (qName.equals(JOURNAL_TAG) && !this.parent.getElideReferences()) {
929                    currRefJournal = val;
930                } else if (qName.equals(XREF_DBNAME_TAG) && !this.parent.getElideReferences()) {
931                    currRefXrefDBName = val;
932                } else if (qName.equals(XREF_ID_TAG) && !this.parent.getElideReferences()) {
933                    currRefXrefID = val;
934                } else if (qName.equals(XREF_TAG) && !this.parent.getElideReferences()) {
935                    CrossRef xr = (CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{
936                        currRefXrefDBName,currRefXrefID, new Integer(0)});
937                    currRefXrefs.add(xr);
938                } else if (qName.equals(PUBMED_TAG) && !this.parent.getElideReferences()) {
939                    currRefPubmed = val;
940                } else if (qName.equals(REMARK_TAG) && !this.parent.getElideReferences() && !this.parent.getElideComments()) {
941                    currRefRemark = val;
942                } else if (qName.equals(REFERENCE_TAG) && !this.parent.getElideReferences()) {
943                    // create the crossref - medline gets priority, then pubmed, then doi
944                    CrossRef dcr = null;
945                    if (currRefPubmed!=null) {
946                        dcr = (CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{Terms.PUBMED_KEY, currRefPubmed, new Integer(0)});
947                    } else {
948                        CrossRef pubmed = null;
949                        CrossRef doi = null;
950                        CrossRef other = null;
951                        for (int i = 0; i < currRefXrefs.size(); i++) {
952                            CrossRef cr = (CrossRef)currRefXrefs.get(i);
953                            if(cr.getDbname().equals("pubmed")) pubmed = cr;
954                            else if(cr.getDbname().equals("doi")) doi = cr;
955                            else other = cr;
956                        }
957                        if(pubmed!=null) dcr = pubmed;
958                        else if(doi!=null) dcr = doi;
959                        else dcr = other;
960                    }
961                    // create the docref object
962                    try {
963                        DocRef dr = (DocRef)RichObjectFactory.getObject(SimpleDocRef.class,new Object[]{currRefAuthors,currRefJournal,currRefTitle});
964                        // assign the crossref to the docref 
965                        if (dcr!=null) dr.setCrossref(dcr);
966                        // assign the remarks
967                        dr.setRemark(currRefRemark);
968                        // assign the docref to the bioentry
969                        if (currRefPosition!=null) {
970                            // Use the actual location specified.
971                            RichLocation loc;
972                            if (currRefPosition.equals("") || currRefPosition.equals("sites")) loc = RichLocation.EMPTY_LOCATION;
973                            else {
974                                List members = new ArrayList();
975                                String[] parts = currRefPosition.split(";\\s+");
976                                for (int i = 0; i < parts.length; i++) {
977                                    String[] parts2 = parts[i].split("\\.\\.");
978                                    if (parts2.length>1) {
979                                        RichLocation newLoc = new SimpleRichLocation(
980                                                new SimplePosition(Integer.parseInt(parts2[0])),
981                                                new SimplePosition(Integer.parseInt(parts2[1])),
982                                                i);
983                                        members.add(newLoc);
984                                    } else {
985                                        RichLocation newLoc = new SimpleRichLocation(
986                                                new SimplePosition(Integer.parseInt(parts2[0])), i);
987                                        members.add(newLoc);
988                                    }
989                                }
990                                loc = RichLocation.Tools.construct(members);
991                            }
992                            RankedDocRef rdr = new SimpleRankedDocRef(dr,loc,0); //rank set in listener
993                            rlistener.setRankedDocRef(rdr);
994                        } else {
995                            //by default location on first position, full span would be better
996                            RankedDocRef rdr = new SimpleRankedDocRef(dr,new Integer(1),new Integer(1),0);
997                            rlistener.setRankedDocRef(rdr);
998                        }
999                    } catch (ChangeVetoException e) {
1000                        throw new ParseException(e);
1001                    }
1002                }
1003                else if (qName.equals(FEATURE_KEY_TAG) && !this.parent.getElideFeatures()) {
1004                    templ.typeTerm = RichObjectFactory.getDefaultOntology().getOrCreateTerm(val);
1005                } else if (qName.equals(FEATURE_LOC_TAG) && !this.parent.getElideFeatures()) {
1006                    String tidyLocStr = val.replaceAll("\\s+","");
1007                    templ.location = GenbankLocationParser.parseLocation(ns, accession, tidyLocStr);
1008                    rlistener.startFeature(templ);
1009                    rcrossrefCount = 0;
1010                    // We don't read the hierarchy of tags for location as they
1011                    // should contain the same information.
1012                } else if (qName.equals(FEATUREQUAL_NAME_TAG) && !this.parent.getElideFeatures()) {
1013                    if (currFeatQual!=null) {
1014                        rlistener.addFeatureProperty(RichObjectFactory.getDefaultOntology().getOrCreateTerm(currFeatQual),null);
1015                    }
1016                    currFeatQual = val;
1017                } else if (qName.equals(FEATUREQUAL_VALUE_TAG) && !this.parent.getElideFeatures()) {
1018                    if (currFeatQual.equalsIgnoreCase("db_xref")) {
1019                        Matcher m = dbxp.matcher(val);
1020                        if (m.matches()) {
1021                            String dbname = m.group(1);
1022                            String raccession = m.group(2);
1023                            if (dbname.equalsIgnoreCase("taxon")) {
1024                                // Set the Taxon instead of a dbxref
1025                                tax = (NCBITaxon)RichObjectFactory.getObject(SimpleNCBITaxon.class, new Object[]{Integer.valueOf(raccession)});
1026                                rlistener.setTaxon(tax);
1027                                try {
1028                                    if (organism!=null) tax.addName(NCBITaxon.SCIENTIFIC,organism);
1029                                } catch (ChangeVetoException e) {
1030                                    throw new ParseException(e);
1031                                }
1032                            } else {
1033                                try {
1034                                    CrossRef cr = (CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{dbname, raccession, new Integer(0)});
1035                                    RankedCrossRef rcr = new SimpleRankedCrossRef(cr, ++rcrossrefCount);
1036                                    rlistener.getCurrentFeature().addRankedCrossRef(rcr);
1037                                } catch (ChangeVetoException e) {
1038                                    throw new ParseException(e);
1039                                }
1040                            }
1041                        } else {
1042                            throw new ParseException("Bad dbxref found: "+val);
1043                        }
1044                    } else if (currFeatQual.equalsIgnoreCase("organism")) {
1045                        try {
1046                            organism = val;
1047                            if (tax!=null) tax.addName(NCBITaxon.SCIENTIFIC,organism);
1048                        } catch (ChangeVetoException e) {
1049                            throw new ParseException(e);
1050                        }
1051                    } else {
1052                        if (currFeatQual.equalsIgnoreCase("translation")) {
1053                            // strip spaces from sequence
1054                            val = val.replaceAll("\\s+","");
1055                        }
1056                        rlistener.addFeatureProperty(RichObjectFactory.getDefaultOntology().getOrCreateTerm(currFeatQual),val);
1057                    }
1058                    currFeatQual = null;
1059                } else if (qName.equals(FEATURE_TAG) && !this.parent.getElideFeatures()) {
1060                    rlistener.endFeature();
1061                }
1062                
1063                
1064                else if (qName.equals(INSDSEQ_TAG))
1065                    rlistener.endSequence();
1066            } catch (ParseException e) {
1067                throw new SAXException(e);
1068            }
1069            
1070            // drop old string
1071            this.m_currentString.setLength(0);
1072        }
1073        
1074        // process text inside tags
1075        public void characters(char[] ch, int start, int length) {
1076            this.m_currentString.append(ch, start, length);
1077        }
1078    }
1079}
1080