001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.bio.program.fastq; 022 023import java.util.HashMap; 024import java.util.Map; 025 026/** 027 * FASTQ sequence format variant. 028 * 029 * @since 1.7.1 030 */ 031public enum FastqVariant 032{ 033 /** Sanger FASTQ sequence format variant. */ 034 FASTQ_SANGER("Original or Sanger format") 035 { 036 @Override 037 public int minimumQualityScore() 038 { 039 return 0; 040 } 041 042 @Override 043 public int maximumQualityScore() 044 { 045 return 93; 046 } 047 048 @Override 049 public int qualityScore(final char c) 050 { 051 return ((int) c) - 33; 052 } 053 054 @Override 055 public int qualityScore(final double errorProbability) 056 { 057 // eq. 2 058 int phredQ = constrain(-10.0d * Math.log10(errorProbability)); 059 return phredQ; 060 } 061 062 @Override 063 public char quality(final int qualityScore) 064 { 065 if (qualityScore < minimumQualityScore()) 066 { 067 throw new IllegalArgumentException("qualityScore must be greater than or equal to minimumQualityScore()"); 068 } 069 if (qualityScore > maximumQualityScore()) 070 { 071 throw new IllegalArgumentException("qualityScore must be less than or equal to maximumQualityScore()"); 072 } 073 return (char) (qualityScore + 33); 074 } 075 076 @Override 077 public double errorProbability(final int qualityScore) 078 { 079 return Math.pow(10.0d, ((double) qualityScore) / -10.0d); 080 } 081 }, 082 083 /** Solexa FASTQ sequence format variant. */ 084 FASTQ_SOLEXA("Solexa and early Illumina format") 085 { 086 @Override 087 public int minimumQualityScore() 088 { 089 return -5; 090 } 091 092 @Override 093 public int maximumQualityScore() 094 { 095 return 62; 096 } 097 098 @Override 099 public int qualityScore(final char c) 100 { 101 return ((int) c) - 64; 102 } 103 104 @Override 105 public int qualityScore(final double errorProbability) 106 { 107 // eq. 2 108 double phredQ = -10.0d * Math.log10(errorProbability); 109 // eq. 4 110 int solexaQ = constrain(10.0d * Math.log10(Math.pow(10.0d, (phredQ/10.0d)) - 1.0d)); 111 112 return solexaQ; 113 } 114 115 @Override 116 public char quality(final int qualityScore) 117 { 118 if (qualityScore < minimumQualityScore()) 119 { 120 throw new IllegalArgumentException("qualityScore must be greater than or equal to minimumQualityScore()"); 121 } 122 if (qualityScore > maximumQualityScore()) 123 { 124 throw new IllegalArgumentException("qualityScore must be less than or equal to maximumQualityScore()"); 125 } 126 return (char) (qualityScore + 64); 127 } 128 129 @Override 130 public double errorProbability(final int qualityScore) 131 { 132 double q = Math.pow(10.0d, ((double) qualityScore) / -10.0d); 133 return q / (1.0d + q); 134 } 135 }, 136 137 /** Illumina FASTQ sequence format variant. */ 138 FASTQ_ILLUMINA("Illumina 1.3+ format") 139 { 140 @Override 141 public int minimumQualityScore() 142 { 143 return 0; 144 } 145 146 @Override 147 public int maximumQualityScore() 148 { 149 return 62; 150 } 151 152 @Override 153 public int qualityScore(final char c) 154 { 155 return ((int) c) - 64; 156 } 157 158 @Override 159 public int qualityScore(final double errorProbability) 160 { 161 // eq. 2 162 int phredQ = constrain(-10.0d * Math.log10(errorProbability)); 163 return phredQ; 164 } 165 166 @Override 167 public char quality(final int qualityScore) 168 { 169 if (qualityScore < minimumQualityScore()) 170 { 171 throw new IllegalArgumentException("qualityScore must be greater than or equal to minimumQualityScore()"); 172 } 173 if (qualityScore > maximumQualityScore()) 174 { 175 throw new IllegalArgumentException("qualityScore must be less than or equal to maximumQualityScore()"); 176 } 177 return (char) (qualityScore + 64); 178 } 179 180 @Override 181 public double errorProbability(final int qualityScore) 182 { 183 return Math.pow(10.0d, ((double) qualityScore) / -10.0d); 184 } 185 }; 186 187 188 /** Map of FASTQ sequence format variants keyed by name and lowercase-with-dashes name. */ 189 private static final Map<String, FastqVariant> FASTQ_VARIANTS = new HashMap<String, FastqVariant>(6); 190 191 static 192 { 193 for (FastqVariant fastqVariant : values()) 194 { 195 FASTQ_VARIANTS.put(fastqVariant.name(), fastqVariant); 196 FASTQ_VARIANTS.put(fastqVariant.lowercaseName(), fastqVariant); 197 } 198 } 199 200 /** Description of this FASTQ sequence format variant. */ 201 private final String description; 202 203 204 /** 205 * Create a new FASTQ sequence format variant with the specified description. 206 * 207 * @param description description of this FASTQ sequence format variant, must not be null 208 */ 209 private FastqVariant(final String description) 210 { 211 if (description == null) 212 { 213 throw new IllegalArgumentException("description must not be null"); 214 } 215 this.description = description; 216 } 217 218 219 /** 220 * Return the description of this FASTQ sequence format variant. 221 * The description will not be null. 222 * 223 * @return the description of this FASTQ sequence format variant 224 */ 225 public String getDescription() 226 { 227 return description; 228 } 229 230 /** 231 * Return true if this FASTQ sequence format variant is {@link #FASTQ_SANGER}. 232 * 233 * @return true if this FASTQ sequence format variant is {@link #FASTQ_SANGER} 234 */ 235 public boolean isSanger() 236 { 237 return (this == FASTQ_SANGER); 238 } 239 240 /** 241 * Return true if this FASTQ sequence format variant is {@link #FASTQ_SOLEXA}. 242 * 243 * @return true if this FASTQ sequence format variant is {@link #FASTQ_SOLEXA} 244 */ 245 public boolean isSolexa() 246 { 247 return (this == FASTQ_SOLEXA); 248 } 249 250 /** 251 * Return true if this FASTQ sequence format variant is {@link #FASTQ_ILLUMINA}. 252 * 253 * @return true if this FASTQ sequence format variant is {@link #FASTQ_ILLUMINA} 254 */ 255 public boolean isIllumina() 256 { 257 return (this == FASTQ_ILLUMINA); 258 } 259 260 /** 261 * Return the minimum quality score for this FASTQ sequence format variant. 262 * 263 * @since 1.8.2 264 * @return the minimum quality score for this FASTQ sequence format variant. 265 */ 266 public abstract int minimumQualityScore(); 267 268 /** 269 * Return the maximum quality score for this FASTQ sequence format variant. 270 * 271 * @since 1.8.2 272 * @return the maximum quality score for this FASTQ sequence format variant. 273 */ 274 public abstract int maximumQualityScore(); 275 276 /** 277 * Convert the specified quality in ASCII format to a quality score. 278 * 279 * @since 1.8.2 280 * @param c quality in ASCII format 281 * @return the specified quality in ASCII format converted to a quality score 282 */ 283 public abstract int qualityScore(char c); 284 285 /** 286 * Convert the specified error probability to a quality score. 287 * 288 * @since 1.9.3 289 * @param errorProbability error probability 290 * @return the specified error probability converted to a quality score 291 */ 292 public abstract int qualityScore(double errorProbability); 293 294 /** 295 * Convert the specified quality score to a quality in ASCII format. 296 * 297 * @since 1.8.3 298 * @param qualityScore quality score, must be <code>>= minimumQualityScore()</code> 299 * and <code><= maximumQualityScore()</code> 300 * @return the quality in ASCII format converted from the specified quality score 301 */ 302 public abstract char quality(int qualityScore); 303 304 /** 305 * Convert the specified quality in ASCII format to an error probability. 306 * 307 * @since 1.8.2 308 * @param c quality in ASCII format 309 * @return the specified quality in ASCII format converted to an error probability 310 */ 311 public double errorProbability(char c) 312 { 313 return errorProbability(qualityScore(c)); 314 } 315 316 /** 317 * Calculate the error probability given the specified quality score. 318 * 319 * @since 1.8.2 320 * @param qualityScore quality score 321 * @return the error probability given the specified quality score 322 */ 323 public abstract double errorProbability(int qualityScore); 324 325 /** 326 * Return the name of this FASTQ sequence format variant in <code>lowercase-with-dashes</code> style. 327 * 328 * @return the name of this FASTQ sequence format variant in <code>lowercase-with-dashes</code> style 329 */ 330 public String lowercaseName() 331 { 332 return name().toLowerCase().replace('_', '-'); 333 } 334 335 336 /** 337 * Constrain the specified quality score in double precision to the minimum and maximum quality 338 * scores in int precision. 339 * 340 * @since 1.9.3 341 * @param qualityScore quality score in double precision 342 * @return the specified quality score in double precision constrained to the minimum and maximum quality 343 * scores in int precision 344 */ 345 protected int constrain(final double qualityScore) 346 { 347 // ick. 348 return Math.min(maximumQualityScore(), Math.max(minimumQualityScore(), Math.round((float) qualityScore))); 349 } 350 351 /** 352 * Return the FASTQ sequence format variant with the specified name, if any. The name may 353 * be specified in either <code>UPPERCASE_WITH_UNDERSCORES</code> 354 * or <code>lowercase-with-dashes</code> style. 355 * 356 * @param name name 357 * @return the FASTQ sequence format variant with the specified name, or <code>null</code> 358 * if no such FASTQ sequence format variant exists 359 */ 360 public static FastqVariant parseFastqVariant(final String name) 361 { 362 return FASTQ_VARIANTS.get(name); 363 } 364}