001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.bio.program.fastq;
022
023import java.util.HashMap;
024import java.util.Map;
025
026/**
027 * FASTQ sequence format variant.
028 *
029 * @since 1.7.1
030 */
031public enum FastqVariant
032{
033    /** Sanger FASTQ sequence format variant. */
034    FASTQ_SANGER("Original or Sanger format")
035    {
036        @Override
037        public int minimumQualityScore()
038        {
039            return 0;
040        }
041
042        @Override
043        public int maximumQualityScore()
044        {
045            return 93;
046        }
047
048        @Override
049        public int qualityScore(final char c)
050        {
051            return ((int) c) - 33;
052        }
053
054        @Override
055        public int qualityScore(final double errorProbability)
056        {
057            // eq. 2
058            int phredQ = constrain(-10.0d * Math.log10(errorProbability));
059            return phredQ;
060        }
061
062        @Override
063        public char quality(final int qualityScore)
064        {
065            if (qualityScore < minimumQualityScore())
066            {
067                throw new IllegalArgumentException("qualityScore must be greater than or equal to minimumQualityScore()");
068            }
069            if (qualityScore > maximumQualityScore())
070            {
071                throw new IllegalArgumentException("qualityScore must be less than or equal to maximumQualityScore()");
072            }
073            return (char) (qualityScore + 33);
074        }
075
076        @Override
077        public double errorProbability(final int qualityScore)
078        {
079            return Math.pow(10.0d, ((double) qualityScore) / -10.0d);
080        }
081    },
082
083    /** Solexa FASTQ sequence format variant. */
084    FASTQ_SOLEXA("Solexa and early Illumina format")
085    {
086        @Override
087        public int minimumQualityScore()
088        {
089            return -5;
090        }
091
092        @Override
093        public int maximumQualityScore()
094        {
095            return 62;
096        }
097
098        @Override
099        public int qualityScore(final char c)
100        {
101            return ((int) c) - 64;
102        }
103
104        @Override
105        public int qualityScore(final double errorProbability)
106        {
107            // eq. 2
108            double phredQ = -10.0d * Math.log10(errorProbability);
109            // eq. 4
110            int solexaQ = constrain(10.0d * Math.log10(Math.pow(10.0d, (phredQ/10.0d)) - 1.0d));
111
112            return solexaQ;
113        }
114
115        @Override
116        public char quality(final int qualityScore)
117        {
118            if (qualityScore < minimumQualityScore())
119            {
120                throw new IllegalArgumentException("qualityScore must be greater than or equal to minimumQualityScore()");
121            }
122            if (qualityScore > maximumQualityScore())
123            {
124                throw new IllegalArgumentException("qualityScore must be less than or equal to maximumQualityScore()");
125            }
126            return (char) (qualityScore + 64);
127        }
128
129        @Override
130        public double errorProbability(final int qualityScore)
131        {
132            double q = Math.pow(10.0d, ((double) qualityScore) / -10.0d);
133            return q / (1.0d + q);
134        }
135    },
136
137    /** Illumina FASTQ sequence format variant. */
138    FASTQ_ILLUMINA("Illumina 1.3+ format")
139    {
140        @Override
141        public int minimumQualityScore()
142        {
143            return 0;
144        }
145
146        @Override
147        public int maximumQualityScore()
148        {
149            return 62;
150        }
151
152        @Override
153        public int qualityScore(final char c)
154        {
155            return ((int) c) - 64;
156        }
157
158        @Override
159        public int qualityScore(final double errorProbability)
160        {
161            // eq. 2
162            int phredQ = constrain(-10.0d * Math.log10(errorProbability));
163            return phredQ;
164        }
165
166        @Override
167        public char quality(final int qualityScore)
168        {
169            if (qualityScore < minimumQualityScore())
170            {
171                throw new IllegalArgumentException("qualityScore must be greater than or equal to minimumQualityScore()");
172            }
173            if (qualityScore > maximumQualityScore())
174            {
175                throw new IllegalArgumentException("qualityScore must be less than or equal to maximumQualityScore()");
176            }
177            return (char) (qualityScore + 64);
178        }
179
180        @Override
181        public double errorProbability(final int qualityScore)
182        {
183            return Math.pow(10.0d, ((double) qualityScore) / -10.0d);
184        }
185    };
186
187
188    /** Map of FASTQ sequence format variants keyed by name and lowercase-with-dashes name. */
189    private static final Map<String, FastqVariant> FASTQ_VARIANTS = new HashMap<String, FastqVariant>(6);
190
191    static
192    {
193        for (FastqVariant fastqVariant : values())
194        {
195            FASTQ_VARIANTS.put(fastqVariant.name(), fastqVariant);
196            FASTQ_VARIANTS.put(fastqVariant.lowercaseName(), fastqVariant);
197        }
198    }
199
200    /** Description of this FASTQ sequence format variant. */
201    private final String description;
202
203
204    /**
205     * Create a new FASTQ sequence format variant with the specified description.
206     *
207     * @param description description of this FASTQ sequence format variant, must not be null
208     */
209    private FastqVariant(final String description)
210    {
211        if (description == null)
212        {
213            throw new IllegalArgumentException("description must not be null");
214        }
215        this.description = description;
216    }
217
218
219    /**
220     * Return the description of this FASTQ sequence format variant.
221     * The description will not be null.
222     *
223     * @return the description of this FASTQ sequence format variant
224     */
225    public String getDescription()
226    {
227        return description;
228    }
229
230    /**
231     * Return true if this FASTQ sequence format variant is {@link #FASTQ_SANGER}.
232     *
233     * @return true if this FASTQ sequence format variant is {@link #FASTQ_SANGER}
234     */
235    public boolean isSanger()
236    {
237        return (this == FASTQ_SANGER);
238    }
239
240    /**
241     * Return true if this FASTQ sequence format variant is {@link #FASTQ_SOLEXA}.
242     *
243     * @return true if this FASTQ sequence format variant is {@link #FASTQ_SOLEXA}
244     */
245    public boolean isSolexa()
246    {
247        return (this == FASTQ_SOLEXA);
248    }
249
250    /**
251     * Return true if this FASTQ sequence format variant is {@link #FASTQ_ILLUMINA}.
252     *
253     * @return true if this FASTQ sequence format variant is {@link #FASTQ_ILLUMINA}
254     */
255    public boolean isIllumina()
256    {
257        return (this == FASTQ_ILLUMINA);
258    }
259
260    /**
261     * Return the minimum quality score for this FASTQ sequence format variant.
262     *
263     * @since 1.8.2
264     * @return the minimum quality score for this FASTQ sequence format variant.
265     */
266    public abstract int minimumQualityScore();
267
268    /**
269     * Return the maximum quality score for this FASTQ sequence format variant.
270     *
271     * @since 1.8.2
272     * @return the maximum quality score for this FASTQ sequence format variant.
273     */
274    public abstract int maximumQualityScore();
275
276    /**
277     * Convert the specified quality in ASCII format to a quality score.
278     *
279     * @since 1.8.2
280     * @param c quality in ASCII format
281     * @return the specified quality in ASCII format converted to a quality score
282     */
283    public abstract int qualityScore(char c);
284
285    /**
286     * Convert the specified error probability to a quality score.
287     *
288     * @since 1.9.3
289     * @param errorProbability error probability
290     * @return the specified error probability converted to a quality score
291     */
292    public abstract int qualityScore(double errorProbability);
293
294    /**
295     * Convert the specified quality score to a quality in ASCII format.
296     *
297     * @since 1.8.3
298     * @param qualityScore quality score, must be <code>&gt;= minimumQualityScore()</code>
299     *    and <code>&lt;= maximumQualityScore()</code>
300     * @return the quality in ASCII format converted from the specified quality score
301     */
302    public abstract char quality(int qualityScore);
303
304    /**
305     * Convert the specified quality in ASCII format to an error probability.
306     *
307     * @since 1.8.2
308     * @param c quality in ASCII format
309     * @return the specified quality in ASCII format converted to an error probability
310     */
311    public double errorProbability(char c)
312    {
313        return errorProbability(qualityScore(c));
314    }
315
316    /**
317     * Calculate the error probability given the specified quality score.
318     *
319     * @since 1.8.2
320     * @param qualityScore quality score
321     * @return the error probability given the specified quality score
322     */
323    public abstract double errorProbability(int qualityScore);
324
325    /**
326     * Return the name of this FASTQ sequence format variant in <code>lowercase-with-dashes</code> style.
327     *
328     * @return the name of this FASTQ sequence format variant in <code>lowercase-with-dashes</code> style
329     */
330    public String lowercaseName()
331    {
332        return name().toLowerCase().replace('_', '-');
333    }
334
335
336    /**
337     * Constrain the specified quality score in double precision to the minimum and maximum quality
338     * scores in int precision.
339     *
340     * @since 1.9.3
341     * @param qualityScore quality score in double precision
342     * @return the specified quality score in double precision constrained to the minimum and maximum quality
343     *    scores in int precision
344     */
345    protected int constrain(final double qualityScore)
346    {
347        // ick.
348        return Math.min(maximumQualityScore(), Math.max(minimumQualityScore(), Math.round((float) qualityScore)));
349    }
350
351    /**
352     * Return the FASTQ sequence format variant with the specified name, if any.  The name may
353     * be specified in either <code>UPPERCASE_WITH_UNDERSCORES</code>
354     * or <code>lowercase-with-dashes</code> style.
355     *
356     * @param name name
357     * @return the FASTQ sequence format variant with the specified name, or <code>null</code>
358     *    if no such FASTQ sequence format variant exists
359     */
360    public static FastqVariant parseFastqVariant(final String name)
361    {
362        return FASTQ_VARIANTS.get(name);
363    }
364}