001/*  @(#)FastaSequence.java 1.0  September 2009
002 *
003 *  Copyright (c) 2009 Peter Troshin
004 *
005 *        BioJava development code
006 *
007 * This code may be freely distributed and modified under the
008 * terms of the GNU Lesser General Public Licence.  This should
009 * be distributed with the code.  If you do not have a copy,
010 * see:
011 *
012 *      http://www.gnu.org/copyleft/lesser.html
013 *
014 * Copyright for this code is held jointly by the individual
015 * authors.  These should be listed in @author doc comments.
016 *
017 * For more information on the BioJava project and its aims,
018 * or to join the biojava-l mailing list, visit the home page
019 * at:
020 *
021 *      http://www.biojava.org/
022 *
023 */
024
025package org.biojava.nbio.data.sequence;
026
027import javax.xml.bind.annotation.XmlAccessType;
028import javax.xml.bind.annotation.XmlAccessorType;
029import java.util.regex.Matcher;
030import java.util.regex.Pattern;
031
032/**
033 * A FASTA formatted sequence. Please note that this class does not make any
034 * assumptions as to what sequence it store e.g. it could be nucleotide, protein
035 * or even gapped alignment sequence! The only guarantee it makes is that the
036 * sequence does not contain white space characters e.g. spaces, new lines etc
037 *
038 * @author pvtroshin
039 * @version 1.0
040 * @since 3.0.2
041 */
042
043@XmlAccessorType(XmlAccessType.FIELD)
044public final class FastaSequence implements Comparable<FastaSequence>{
045
046        /**
047         * Sequence id
048         */
049        private String id;
050
051        // TODO what about gapped sequence here! should be indicated
052        /**
053         * Returns the string representation of sequence
054         */
055        private String sequence;
056
057        @SuppressWarnings("unused")
058        private FastaSequence() {
059        // Default constructor for JaxB
060        }
061
062        /**
063         * Upon construction the any whitespace characters are removed from the
064         * sequence
065         *
066         * @param id
067         * @param sequence
068         */
069        public FastaSequence(final String id, final String sequence) {
070        this.id = id.trim();
071        this.sequence = SequenceUtil.cleanSequence(sequence);
072        }
073
074        /**
075         * Gets the value of id
076         *
077         * @return the value of id
078         */
079        public String getId() {
080        return id;
081        }
082
083        /**
084         * Gets the value of sequence
085         *
086         * @return the value of sequence
087         */
088        public String getSequence() {
089        return sequence;
090        }
091
092        public static int countMatchesInSequence(final String theString,
093                final String theRegExp) {
094        final Pattern p = Pattern.compile(theRegExp);
095        final Matcher m = p.matcher(theString);
096        int cnt = 0;
097        while (m.find()) {
098                cnt++;
099        }
100        return cnt;
101        }
102
103        public String getFormattedFasta() {
104        return getFormatedSequence(80);
105        }
106
107        /**
108         *
109         * @return one line name, next line sequence, no matter what the sequence
110         *         length is
111         */
112        public String getOnelineFasta() {
113        String fasta = ">" + getId() + "\n";
114        fasta += getSequence() + "\n";
115        return fasta;
116        }
117
118        /**
119         * Format sequence per width letter in one string. Without spaces.
120         *
121         * @return multiple line formated sequence, one line width letters length
122         *
123         */
124        public String getFormatedSequence(final int width) {
125        if (sequence == null) {
126                return "";
127        }
128
129        assert width >= 0 : "Wrong width parameter ";
130
131        final StringBuilder sb = new StringBuilder(sequence);
132        int nchunks = sequence.length() / width;
133        // add up inserted new line chars
134        nchunks = (nchunks + sequence.length()) / width;
135        int nlineCharcounter = 0;
136        for (int i = 1; i <= nchunks; i++) {
137                final int insPos = width * i + nlineCharcounter;
138                // to prevent inserting new line in the very end of a sequence then
139                // it would have failed.
140                // Also covers the case when the sequences shorter than width
141                if (sb.length() <= insPos) {
142                break;
143                }
144                sb.insert(insPos, "\n");
145                nlineCharcounter++;
146        }
147        return sb.toString();
148        }
149
150        /**
151         *
152         * @return sequence length
153         */
154        public int getLength() {
155        return sequence.length();
156        }
157
158        /**
159         * Same as oneLineFasta
160         */
161        @Override
162        public String toString() {
163        return this.getOnelineFasta();
164        }
165
166        @Override
167        public int hashCode() {
168        final int prime = 31;
169        int result = 1;
170        result = prime * result + ((id == null) ? 0 : id.hashCode());
171        result = prime * result
172                + ((sequence == null) ? 0 : sequence.hashCode());
173        return result;
174        }
175
176        @Override
177        public boolean equals(final Object obj) {
178        if (this == obj) {
179                return true;
180        }
181        if (obj == null) {
182                return false;
183        }
184        if (getClass() != obj.getClass()) {
185                return false;
186        }
187        final FastaSequence other = (FastaSequence) obj;
188        if (id == null) {
189                if (other.id != null) {
190                return false;
191                }
192        } else if (!id.equals(other.id)) {
193                return false;
194        }
195        if (sequence == null) {
196                if (other.sequence != null) {
197                return false;
198                }
199        } else if (!sequence.equals(other.sequence)) {
200                return false;
201        }
202        return true;
203        }
204
205        @Override
206        public int compareTo(FastaSequence o) {
207                if(o==null || o.id==null)
208                        return 1;
209
210                return this.getId().compareTo(o.id);
211        }
212
213}