001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.core.sequence.template; 022 023import org.biojava.nbio.core.sequence.compound.NucleotideCompound; 024 025import java.util.*; 026 027/** 028 * 029 * @author Andy Yates 030 * @param <C> Type of compound this set will contain but must extend 031 * NucleotideCompound 032 */ 033public abstract class AbstractNucleotideCompoundSet<C extends NucleotideCompound> 034 extends AbstractCompoundSet<C> { 035 036 protected void addNucleotideCompound(String base, String complement, String... equivalents) { 037 038 String[] upperEquivalents = new String[equivalents.length]; 039 String[] lowerEquivalents = new String[equivalents.length]; 040 for(int i=0; i<equivalents.length; i++) { 041 upperEquivalents[i] = equivalents[i].toUpperCase(); 042 lowerEquivalents[i] = equivalents[i].toLowerCase(); 043 } 044 045 C upper = newNucleotideCompound(base.toUpperCase(), complement.toUpperCase(), upperEquivalents); 046 C lower = newNucleotideCompound(base.toLowerCase(), complement.toLowerCase(), lowerEquivalents); 047 048 List<C> equivalentCompounds = new ArrayList<C>(); 049 050 for(int i=0; i<equivalents.length; i++) { 051 equivalentCompounds.add(getCompoundForString(upperEquivalents[i])); 052 equivalentCompounds.add(getCompoundForString(lowerEquivalents[i])); 053 } 054 055 addCompound(upper, lower, equivalentCompounds); 056 } 057 058 protected abstract C newNucleotideCompound(String base, String complement, String... equivalents); 059 060 /** 061 * Loops through all known nucleotides and attempts to find which are 062 * equivalent to each other. Also takes into account lower casing 063 * nucleotides as well as upper-cased ones. 064 */ 065 @SuppressWarnings("unchecked") 066 protected void calculateIndirectAmbiguities() { 067 Map<NucleotideCompound, List<NucleotideCompound>> equivalentsMap = new HashMap<NucleotideCompound, List<NucleotideCompound>>(); 068 069 List<NucleotideCompound> ambiguousCompounds = new ArrayList<NucleotideCompound>(); 070 for(NucleotideCompound compound: getAllCompounds()) { 071 if (!compound.isAmbiguous()) { 072 continue; 073 } 074 ambiguousCompounds.add(compound); 075 } 076 077 for(NucleotideCompound sourceCompound: ambiguousCompounds) { 078 Set<NucleotideCompound> compoundConstituents = sourceCompound.getConstituents(); 079 for(NucleotideCompound targetCompound: ambiguousCompounds) { 080 Set<NucleotideCompound> targetConstituents = targetCompound.getConstituents(); 081 if(targetConstituents.containsAll(compoundConstituents)) { 082 NucleotideCompound lcSourceCompound = toLowerCase(sourceCompound); 083 NucleotideCompound lcTargetCompound = toLowerCase(targetCompound); 084 085 //equivalentsMap.put(sourceCompound, targetCompound); 086 // equivalentsMap.put(sourceCompound, lcTargetCompound); 087 088 089 checkAdd(equivalentsMap, sourceCompound, targetCompound); 090 checkAdd(equivalentsMap, sourceCompound, lcTargetCompound); 091 092 checkAdd(equivalentsMap,targetCompound,sourceCompound); 093 checkAdd(equivalentsMap, lcTargetCompound, sourceCompound); 094 095 checkAdd(equivalentsMap, lcSourceCompound, targetCompound); 096 checkAdd(equivalentsMap, lcSourceCompound, lcTargetCompound); 097 098 } 099 } 100 } 101 102 //And once it's all done start adding them to the equivalents map 103 104 for ( NucleotideCompound key: equivalentsMap.keySet()){ 105 List<NucleotideCompound> vals = equivalentsMap.get(key); 106 for (NucleotideCompound value: vals){ 107 addEquivalent((C)key,(C)value); 108 addEquivalent((C)value,(C)key); 109 } 110 } 111 } 112 113 private void checkAdd( 114 Map<NucleotideCompound, List<NucleotideCompound>> equivalentsMap, 115 NucleotideCompound key, 116 NucleotideCompound value) { 117 118 119 List<NucleotideCompound> listS = equivalentsMap.get(key); 120 if ( listS == null){ 121 listS = new ArrayList<NucleotideCompound>(); 122 equivalentsMap.put(key, listS); 123 } 124 listS.add(value); 125 126 127} 128 129private NucleotideCompound toLowerCase(NucleotideCompound compound) { 130 return getCompoundForString(compound.getBase().toLowerCase()); 131 } 132 133 /** 134 * Calculates the best symbol for a collection of compounds. For example 135 * if you gave this method a AC it will return a M which is the ambiguity 136 * symbol for these compounds. 137 * 138 * @param compounds Compounds to calculate ambiguity for 139 * @return The ambiguity symbol which represents this set of nucleotides best 140 */ 141 public NucleotideCompound getAmbiguity(NucleotideCompound... compounds) { 142 Set<NucleotideCompound> settedCompounds = new HashSet<NucleotideCompound>(); 143 for(NucleotideCompound compound: compounds) { 144 for(NucleotideCompound subCompound: compound.getConstituents()) { 145 settedCompounds.add(getCompoundForString(subCompound.getBase().toUpperCase())); 146 } 147 } 148 for(NucleotideCompound compound: getAllCompounds()) { 149 if(compound.getConstituents().equals(settedCompounds)) { 150 return compound; 151 } 152 } 153 return null; 154 } 155 156 /** 157 * NucleotideCompounds can always complement 158 */ 159 @Override 160 public boolean isComplementable() { 161 return true; 162 } 163}