001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.genome.io.fastq;
022
023import java.util.HashMap;
024import java.util.Map;
025
026/**
027 * FASTQ sequence format variant.
028 *
029 * @since 3.0.3
030 */
031public enum FastqVariant
032{
033        /** Sanger FASTQ sequence format variant. */
034        FASTQ_SANGER("Original or Sanger format")
035        {
036                @Override
037                public int minimumQualityScore()
038                {
039                        return 0;
040                }
041
042                @Override
043                public int maximumQualityScore()
044                {
045                        return 93;
046                }
047
048                @Override
049                public int qualityScore(final char c)
050                {
051                        return (c) - 33;
052                }
053
054                @Override
055                public int qualityScore(final double errorProbability)
056                {
057                        // eq. 2
058                        int phredQ = constrain(-10.0d * Math.log10(errorProbability));
059                        return phredQ;
060                }
061
062                @Override
063                public char quality(final int qualityScore)
064                {
065                        if (qualityScore < minimumQualityScore())
066                        {
067                                throw new IllegalArgumentException("qualityScore must be greater than or equal to minimumQualityScore()");
068                        }
069                        if (qualityScore > maximumQualityScore())
070                        {
071                                throw new IllegalArgumentException("qualityScore must be less than or equal to maximumQualityScore()");
072                        }
073                        return (char) (qualityScore + 33);
074                }
075
076                @Override
077                public double errorProbability(final int qualityScore)
078                {
079                        return Math.pow(10.0d, (qualityScore) / -10.0d);
080                }
081        },
082
083        /** Solexa FASTQ sequence format variant. */
084        FASTQ_SOLEXA("Solexa and early Illumina format")
085        {
086                @Override
087                public int minimumQualityScore()
088                {
089                        return -5;
090                }
091
092                @Override
093                public int maximumQualityScore()
094                {
095                        return 62;
096                }
097
098                @Override
099                public int qualityScore(final char c)
100                {
101                        return (c) - 64;
102                }
103
104                @Override
105                public int qualityScore(final double errorProbability)
106                {
107                        // eq. 2
108                        double phredQ = -10.0d * Math.log10(errorProbability);
109                        // eq. 4
110                        int solexaQ = constrain(10.0d * Math.log10(Math.pow(10.0d, (phredQ/10.0d)) - 1.0d));
111
112                        return solexaQ;
113                }
114
115                @Override
116                public char quality(final int qualityScore)
117                {
118                        if (qualityScore < minimumQualityScore())
119                        {
120                                throw new IllegalArgumentException("qualityScore must be greater than or equal to minimumQualityScore()");
121                        }
122                        if (qualityScore > maximumQualityScore())
123                        {
124                                throw new IllegalArgumentException("qualityScore must be less than or equal to maximumQualityScore()");
125                        }
126                        return (char) (qualityScore + 64);
127                }
128
129                @Override
130                public double errorProbability(final int qualityScore)
131                {
132                        double q = Math.pow(10.0d, (qualityScore) / -10.0d);
133                        return q / (1.0d + q);
134                }
135        },
136
137        /** Illumina FASTQ sequence format variant. */
138        FASTQ_ILLUMINA("Illumina 1.3+ format")
139        {
140                @Override
141                public int minimumQualityScore()
142                {
143                        return 0;
144                }
145
146                @Override
147                public int maximumQualityScore()
148                {
149                        return 62;
150                }
151
152                @Override
153                public int qualityScore(final char c)
154                {
155                        return (c) - 64;
156                }
157
158                @Override
159                public int qualityScore(final double errorProbability)
160                {
161                        // eq. 2
162                        int phredQ = constrain(-10.0d * Math.log10(errorProbability));
163                        return phredQ;
164                }
165
166                @Override
167                public char quality(final int qualityScore)
168                {
169                        if (qualityScore < minimumQualityScore())
170                        {
171                                throw new IllegalArgumentException("qualityScore must be greater than or equal to minimumQualityScore()");
172                        }
173                        if (qualityScore > maximumQualityScore())
174                        {
175                                throw new IllegalArgumentException("qualityScore must be less than or equal to maximumQualityScore()");
176                        }
177                        return (char) (qualityScore + 64);
178                }
179
180                @Override
181                public double errorProbability(final int qualityScore)
182                {
183                        return Math.pow(10.0d, (qualityScore) / -10.0d);
184                }
185        };
186
187
188        /** Map of FASTQ sequence format variants keyed by name and lowercase-with-dashes name. */
189        private static final Map<String, FastqVariant> FASTQ_VARIANTS = new HashMap<>(6);
190
191        static
192        {
193                for (FastqVariant fastqVariant : values())
194                {
195                        FASTQ_VARIANTS.put(fastqVariant.name(), fastqVariant);
196                        FASTQ_VARIANTS.put(fastqVariant.lowercaseName(), fastqVariant);
197                }
198        }
199
200        /** Description of this FASTQ sequence format variant. */
201        private final String description;
202
203
204        /**
205         * Create a new FASTQ sequence format variant with the specified description.
206         *
207         * @param description description of this FASTQ sequence format variant, must not be null
208         */
209        private FastqVariant(final String description)
210        {
211                if (description == null)
212                {
213                        throw new IllegalArgumentException("description must not be null");
214                }
215                this.description = description;
216        }
217
218
219        /**
220         * Return the description of this FASTQ sequence format variant.
221         * The description will not be null.
222         *
223         * @return the description of this FASTQ sequence format variant
224         */
225        public String getDescription()
226        {
227                return description;
228        }
229
230        /**
231         * Return true if this FASTQ sequence format variant is {@link #FASTQ_SANGER}.
232         *
233         * @return true if this FASTQ sequence format variant is {@link #FASTQ_SANGER}
234         */
235        public boolean isSanger()
236        {
237                return (this == FASTQ_SANGER);
238        }
239
240        /**
241         * Return true if this FASTQ sequence format variant is {@link #FASTQ_SOLEXA}.
242         *
243         * @return true if this FASTQ sequence format variant is {@link #FASTQ_SOLEXA}
244         */
245        public boolean isSolexa()
246        {
247                return (this == FASTQ_SOLEXA);
248        }
249
250        /**
251         * Return true if this FASTQ sequence format variant is {@link #FASTQ_ILLUMINA}.
252         *
253         * @return true if this FASTQ sequence format variant is {@link #FASTQ_ILLUMINA}
254         */
255        public boolean isIllumina()
256        {
257                return (this == FASTQ_ILLUMINA);
258        }
259
260        /**
261         * Return the minimum quality score for this FASTQ sequence format variant.
262         *
263         * @return the minimum quality score for this FASTQ sequence format variant.
264         */
265        public abstract int minimumQualityScore();
266
267        /**
268         * Return the maximum quality score for this FASTQ sequence format variant.
269         *
270         * @return the maximum quality score for this FASTQ sequence format variant.
271         */
272        public abstract int maximumQualityScore();
273
274        /**
275         * Convert the specified quality in ASCII format to a quality score.
276         *
277         * @param c quality in ASCII format
278         * @return the specified quality in ASCII format converted to a quality score
279         */
280        public abstract int qualityScore(char c);
281
282        /**
283         * Convert the specified error probability to a quality score.
284         *
285         * @since 4.2
286         * @param errorProbability error probability
287         * @return the specified error probability converted to a quality score
288         */
289        public abstract int qualityScore(double errorProbability);
290
291        /**
292         * Convert the specified quality score to a quality in ASCII format.
293         *
294         * @since 3.0.6
295         * @param qualityScore quality score, must be <code>&gt;= minimumQualityScore()</code>
296         *    and <code>&lt;= maximumQualityScore()</code>
297         * @return the quality in ASCII format converted from the specified quality score
298         */
299        public abstract char quality(int qualityScore);
300
301        /**
302         * Convert the specified quality in ASCII format to an error probability.
303         *
304         * @param c quality in ASCII format
305         * @return the specified quality in ASCII format converted to an error probability
306         */
307        public double errorProbability(char c)
308        {
309                return errorProbability(qualityScore(c));
310        }
311
312        /**
313         * Calculate the error probability given the specified quality score.
314         *
315         * @param qualityScore quality score
316         * @return the error probability given the specified quality score
317         */
318        public abstract double errorProbability(int qualityScore);
319
320        /**
321         * Return the name of this FASTQ sequence format variant in <code>lowercase-with-dashes</code> style.
322         *
323         * @return the name of this FASTQ sequence format variant in <code>lowercase-with-dashes</code> style
324         */
325        public String lowercaseName()
326        {
327                return name().toLowerCase().replace('_', '-');
328        }
329
330        /**
331         * Constrain the specified quality score in double precision to the minimum and maximum quality
332         * scores in int precision.
333         *
334         * @since 4.2
335         * @param qualityScore quality score in double precision
336         * @return the specified quality score in double precision constrained to the minimum and maximum quality
337         *    scores in int precision
338         */
339        protected int constrain(final double qualityScore)
340        {
341                // ick.
342                return Math.min(maximumQualityScore(), Math.max(minimumQualityScore(), Math.round((float) qualityScore)));
343        }
344
345        /**
346         * Return the FASTQ sequence format variant with the specified name, if any.  The name may
347         * be specified in either <code>UPPERCASE_WITH_UNDERSCORES</code>
348         * or <code>lowercase-with-dashes</code> style.
349         *
350         * @param name name
351         * @return the FASTQ sequence format variant with the specified name, or <code>null</code>
352         *    if no such FASTQ sequence format variant exists
353         */
354        public static FastqVariant parseFastqVariant(final String name)
355        {
356                return FASTQ_VARIANTS.get(name);
357        }
358}