001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.bio.program.abi; 022 023import java.util.ArrayList; 024import java.util.Iterator; 025import java.util.List; 026 027import org.biojava.bio.BioError; 028import org.biojava.bio.alignment.Alignment; 029import org.biojava.bio.seq.DNATools; 030import org.biojava.bio.seq.io.CharacterTokenization; 031import org.biojava.bio.seq.io.SymbolTokenization; 032import org.biojava.bio.symbol.AbstractAlphabet; 033import org.biojava.bio.symbol.AlphabetManager; 034import org.biojava.bio.symbol.AtomicSymbol; 035import org.biojava.bio.symbol.FiniteAlphabet; 036import org.biojava.bio.symbol.IllegalAlphabetException; 037import org.biojava.bio.symbol.IntegerAlphabet; 038import org.biojava.bio.symbol.Symbol; 039import org.biojava.bio.symbol.SymbolList; 040import org.biojava.bio.symbol.SymbolListViews; 041import org.biojava.utils.ListTools; 042 043/** 044 * Useful functionality for working with fasta files where the quality of the 045 * DNA is encoded as upper and lower case DNA characters. 046 * 047 * @author Matthew Pocock 048 */ 049public class ABITools { 050 /** 051 * The quality alphabet. This is equivalent to DNA x [0,1] where 0 represents 052 * poorly supported (lower case) and 1 represents strongly supported (upper 053 * case). 054 */ 055 public static final FiniteAlphabet QUALITY; 056 057 /** 058 * The poorly supported symbol. 059 */ 060 public static final AtomicSymbol _0; 061 062 /** 063 * The well supported symbol. 064 */ 065 public static final AtomicSymbol _1; 066 067 /** 068 * Alignment label for the DNA sequence row. 069 */ 070 public static final Object SEQUENCE = "SEQUENCE"; 071 072 /** 073 * Alignment label for the support row. 074 */ 075 public static final Object SUPPORT = "SUPPORT"; 076 077 static { 078 try { 079 IntegerAlphabet.SubIntegerAlphabet _01 080 = IntegerAlphabet.getSubAlphabet(0, 1); 081 _0 = _01.getSymbol(0); 082 _1 = _01.getSymbol(1); 083 084 List alphas = new ArrayList(); 085 alphas.add(DNATools.getDNA()); 086 alphas.add(_01); 087 088 // naughty here - we know because we are insiders that the result of this 089 // call will be an AbstractAlphabet impl 090 AbstractAlphabet quality = (AbstractAlphabet) AlphabetManager.getCrossProductAlphabet(alphas); 091 CharacterTokenization tok = new CharacterTokenization(quality, true); 092 093 // all lower case characters go to sym,0 094 // all upper case characters go to sym,1 095 SymbolList sl = DNATools.createDNA("agctrymkswhbvdn"); 096 ListTools.Doublet pair = new ListTools.Doublet(); 097 SymbolTokenization dnaTok = DNATools.getDNA().getTokenization("token"); 098 for(Iterator i = sl.iterator(); i.hasNext(); ) { 099 pair.setA((Symbol) i.next()); 100 String c = dnaTok.tokenizeSymbol((Symbol) pair.getA()); 101 102 pair.setB(_1); 103 tok.bindSymbol(quality.getSymbol(pair), c.toUpperCase().charAt(0)); 104 105 pair.setB(_0); 106 tok.bindSymbol(quality.getSymbol(pair), c.toLowerCase().charAt(0)); 107 } 108 109 quality.putTokenization("token", tok); 110 QUALITY = quality; 111 } catch (Exception e) { 112 throw new BioError("Could not initialize ABI quality alphabet",e); 113 } 114 } 115 116 /** 117 * <p> 118 * View a symbol list over the QUALITY alphabet as an alignment. 119 * </p> 120 * 121 * <p> 122 * The alignment will have labels of SEQUENCE and SUPPORT that retrieve the 123 * DNA sequence and the binary support values respectively. 124 * </p> 125 * 126 * @param abiSeq the SymbolList over the QUALITY alphabet to view 127 * @return an Alignment view of abiSeq 128 * @throws IllegalAlphabetException if abiSeq is not over QUALITY 129 */ 130 public static Alignment getAlignment(SymbolList abiSeq) 131 throws IllegalAlphabetException { 132 return SymbolListViews.alignment( 133 new ListTools.Doublet(SEQUENCE, SUPPORT), 134 abiSeq 135 ); 136 } 137}