001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.genome.io.fastq; 022 023import java.util.HashMap; 024import java.util.Map; 025 026/** 027 * FASTQ sequence format variant. 028 * 029 * @since 3.0.3 030 */ 031public enum FastqVariant 032{ 033 /** Sanger FASTQ sequence format variant. */ 034 FASTQ_SANGER("Original or Sanger format") 035 { 036 @Override 037 public int minimumQualityScore() 038 { 039 return 0; 040 } 041 042 @Override 043 public int maximumQualityScore() 044 { 045 return 93; 046 } 047 048 @Override 049 public int qualityScore(final char c) 050 { 051 return (c) - 33; 052 } 053 054 @Override 055 public int qualityScore(final double errorProbability) 056 { 057 // eq. 2 058 int phredQ = constrain(-10.0d * Math.log10(errorProbability)); 059 return phredQ; 060 } 061 062 @Override 063 public char quality(final int qualityScore) 064 { 065 if (qualityScore < minimumQualityScore()) 066 { 067 throw new IllegalArgumentException("qualityScore must be greater than or equal to minimumQualityScore()"); 068 } 069 if (qualityScore > maximumQualityScore()) 070 { 071 throw new IllegalArgumentException("qualityScore must be less than or equal to maximumQualityScore()"); 072 } 073 return (char) (qualityScore + 33); 074 } 075 076 @Override 077 public double errorProbability(final int qualityScore) 078 { 079 return Math.pow(10.0d, (qualityScore) / -10.0d); 080 } 081 }, 082 083 /** Solexa FASTQ sequence format variant. */ 084 FASTQ_SOLEXA("Solexa and early Illumina format") 085 { 086 @Override 087 public int minimumQualityScore() 088 { 089 return -5; 090 } 091 092 @Override 093 public int maximumQualityScore() 094 { 095 return 62; 096 } 097 098 @Override 099 public int qualityScore(final char c) 100 { 101 return (c) - 64; 102 } 103 104 @Override 105 public int qualityScore(final double errorProbability) 106 { 107 // eq. 2 108 double phredQ = -10.0d * Math.log10(errorProbability); 109 // eq. 4 110 int solexaQ = constrain(10.0d * Math.log10(Math.pow(10.0d, (phredQ/10.0d)) - 1.0d)); 111 112 return solexaQ; 113 } 114 115 @Override 116 public char quality(final int qualityScore) 117 { 118 if (qualityScore < minimumQualityScore()) 119 { 120 throw new IllegalArgumentException("qualityScore must be greater than or equal to minimumQualityScore()"); 121 } 122 if (qualityScore > maximumQualityScore()) 123 { 124 throw new IllegalArgumentException("qualityScore must be less than or equal to maximumQualityScore()"); 125 } 126 return (char) (qualityScore + 64); 127 } 128 129 @Override 130 public double errorProbability(final int qualityScore) 131 { 132 double q = Math.pow(10.0d, (qualityScore) / -10.0d); 133 return q / (1.0d + q); 134 } 135 }, 136 137 /** Illumina FASTQ sequence format variant. */ 138 FASTQ_ILLUMINA("Illumina 1.3+ format") 139 { 140 @Override 141 public int minimumQualityScore() 142 { 143 return 0; 144 } 145 146 @Override 147 public int maximumQualityScore() 148 { 149 return 62; 150 } 151 152 @Override 153 public int qualityScore(final char c) 154 { 155 return (c) - 64; 156 } 157 158 @Override 159 public int qualityScore(final double errorProbability) 160 { 161 // eq. 2 162 int phredQ = constrain(-10.0d * Math.log10(errorProbability)); 163 return phredQ; 164 } 165 166 @Override 167 public char quality(final int qualityScore) 168 { 169 if (qualityScore < minimumQualityScore()) 170 { 171 throw new IllegalArgumentException("qualityScore must be greater than or equal to minimumQualityScore()"); 172 } 173 if (qualityScore > maximumQualityScore()) 174 { 175 throw new IllegalArgumentException("qualityScore must be less than or equal to maximumQualityScore()"); 176 } 177 return (char) (qualityScore + 64); 178 } 179 180 @Override 181 public double errorProbability(final int qualityScore) 182 { 183 return Math.pow(10.0d, (qualityScore) / -10.0d); 184 } 185 }; 186 187 188 /** Map of FASTQ sequence format variants keyed by name and lowercase-with-dashes name. */ 189 private static final Map<String, FastqVariant> FASTQ_VARIANTS = new HashMap<String, FastqVariant>(6); 190 191 static 192 { 193 for (FastqVariant fastqVariant : values()) 194 { 195 FASTQ_VARIANTS.put(fastqVariant.name(), fastqVariant); 196 FASTQ_VARIANTS.put(fastqVariant.lowercaseName(), fastqVariant); 197 } 198 } 199 200 /** Description of this FASTQ sequence format variant. */ 201 private final String description; 202 203 204 /** 205 * Create a new FASTQ sequence format variant with the specified description. 206 * 207 * @param description description of this FASTQ sequence format variant, must not be null 208 */ 209 private FastqVariant(final String description) 210 { 211 if (description == null) 212 { 213 throw new IllegalArgumentException("description must not be null"); 214 } 215 this.description = description; 216 } 217 218 219 /** 220 * Return the description of this FASTQ sequence format variant. 221 * The description will not be null. 222 * 223 * @return the description of this FASTQ sequence format variant 224 */ 225 public String getDescription() 226 { 227 return description; 228 } 229 230 /** 231 * Return true if this FASTQ sequence format variant is {@link #FASTQ_SANGER}. 232 * 233 * @return true if this FASTQ sequence format variant is {@link #FASTQ_SANGER} 234 */ 235 public boolean isSanger() 236 { 237 return (this == FASTQ_SANGER); 238 } 239 240 /** 241 * Return true if this FASTQ sequence format variant is {@link #FASTQ_SOLEXA}. 242 * 243 * @return true if this FASTQ sequence format variant is {@link #FASTQ_SOLEXA} 244 */ 245 public boolean isSolexa() 246 { 247 return (this == FASTQ_SOLEXA); 248 } 249 250 /** 251 * Return true if this FASTQ sequence format variant is {@link #FASTQ_ILLUMINA}. 252 * 253 * @return true if this FASTQ sequence format variant is {@link #FASTQ_ILLUMINA} 254 */ 255 public boolean isIllumina() 256 { 257 return (this == FASTQ_ILLUMINA); 258 } 259 260 /** 261 * Return the minimum quality score for this FASTQ sequence format variant. 262 * 263 * @return the minimum quality score for this FASTQ sequence format variant. 264 */ 265 public abstract int minimumQualityScore(); 266 267 /** 268 * Return the maximum quality score for this FASTQ sequence format variant. 269 * 270 * @return the maximum quality score for this FASTQ sequence format variant. 271 */ 272 public abstract int maximumQualityScore(); 273 274 /** 275 * Convert the specified quality in ASCII format to a quality score. 276 * 277 * @param c quality in ASCII format 278 * @return the specified quality in ASCII format converted to a quality score 279 */ 280 public abstract int qualityScore(char c); 281 282 /** 283 * Convert the specified error probability to a quality score. 284 * 285 * @since 4.2 286 * @param errorProbability error probability 287 * @return the specified error probability converted to a quality score 288 */ 289 public abstract int qualityScore(double errorProbability); 290 291 /** 292 * Convert the specified quality score to a quality in ASCII format. 293 * 294 * @since 3.0.6 295 * @param qualityScore quality score, must be <code>>= minimumQualityScore()</code> 296 * and <code><= maximumQualityScore()</code> 297 * @return the quality in ASCII format converted from the specified quality score 298 */ 299 public abstract char quality(int qualityScore); 300 301 /** 302 * Convert the specified quality in ASCII format to an error probability. 303 * 304 * @param c quality in ASCII format 305 * @return the specified quality in ASCII format converted to an error probability 306 */ 307 public double errorProbability(char c) 308 { 309 return errorProbability(qualityScore(c)); 310 } 311 312 /** 313 * Calculate the error probability given the specified quality score. 314 * 315 * @param qualityScore quality score 316 * @return the error probability given the specified quality score 317 */ 318 public abstract double errorProbability(int qualityScore); 319 320 /** 321 * Return the name of this FASTQ sequence format variant in <code>lowercase-with-dashes</code> style. 322 * 323 * @return the name of this FASTQ sequence format variant in <code>lowercase-with-dashes</code> style 324 */ 325 public String lowercaseName() 326 { 327 return name().toLowerCase().replace('_', '-'); 328 } 329 330 /** 331 * Constrain the specified quality score in double precision to the minimum and maximum quality 332 * scores in int precision. 333 * 334 * @since 4.2 335 * @param qualityScore quality score in double precision 336 * @return the specified quality score in double precision constrained to the minimum and maximum quality 337 * scores in int precision 338 */ 339 protected int constrain(final double qualityScore) 340 { 341 // ick. 342 return Math.min(maximumQualityScore(), Math.max(minimumQualityScore(), Math.round((float) qualityScore))); 343 } 344 345 /** 346 * Return the FASTQ sequence format variant with the specified name, if any. The name may 347 * be specified in either <code>UPPERCASE_WITH_UNDERSCORES</code> 348 * or <code>lowercase-with-dashes</code> style. 349 * 350 * @param name name 351 * @return the FASTQ sequence format variant with the specified name, or <code>null</code> 352 * if no such FASTQ sequence format variant exists 353 */ 354 public static FastqVariant parseFastqVariant(final String name) 355 { 356 return FASTQ_VARIANTS.get(name); 357 } 358}