001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.core.sequence.template; 022 023import org.biojava.nbio.core.sequence.compound.NucleotideCompound; 024 025import java.util.*; 026import java.util.stream.Collectors; 027 028/** 029 * 030 * @author Andy Yates 031 * @param <C> Type of compound this set will contain but must extend 032 * NucleotideCompound 033 */ 034public abstract class AbstractNucleotideCompoundSet<C extends NucleotideCompound> 035 extends AbstractCompoundSet<C> { 036 037 protected void addNucleotideCompound(String base, String complement, String... equivalents) { 038 039 String[] upperEquivalents = new String[equivalents.length]; 040 String[] lowerEquivalents = new String[equivalents.length]; 041 for(int i=0; i<equivalents.length; i++) { 042 upperEquivalents[i] = equivalents[i].toUpperCase(); 043 lowerEquivalents[i] = equivalents[i].toLowerCase(); 044 } 045 046 C upper = newNucleotideCompound(base.toUpperCase(), complement.toUpperCase(), upperEquivalents); 047 C lower = newNucleotideCompound(base.toLowerCase(), complement.toLowerCase(), lowerEquivalents); 048 049 List<C> equivalentCompounds = new ArrayList<>(); 050 051 for(int i=0; i<equivalents.length; i++) { 052 equivalentCompounds.add(getCompoundForString(upperEquivalents[i])); 053 equivalentCompounds.add(getCompoundForString(lowerEquivalents[i])); 054 } 055 056 addCompound(upper, lower, equivalentCompounds); 057 } 058 059 protected abstract C newNucleotideCompound(String base, String complement, String... equivalents); 060 061 /** 062 * Loops through all known nucleotides and attempts to find which are 063 * equivalent to each other. Also takes into account lower casing 064 * nucleotides as well as upper-cased ones. 065 */ 066 @SuppressWarnings("unchecked") 067 protected void calculateIndirectAmbiguities() { 068 Map<NucleotideCompound, List<NucleotideCompound>> equivalentsMap = new HashMap<>(); 069 070 List<NucleotideCompound> ambiguousCompounds = getAllCompounds().stream() 071 .filter(compound -> compound.isAmbiguous()) 072 .collect(Collectors.toCollection(ArrayList::new)); 073 074 075 for(NucleotideCompound sourceCompound: ambiguousCompounds) { 076 Set<NucleotideCompound> compoundConstituents = sourceCompound.getConstituents(); 077 for(NucleotideCompound targetCompound: ambiguousCompounds) { 078 Set<NucleotideCompound> targetConstituents = targetCompound.getConstituents(); 079 if(targetConstituents.containsAll(compoundConstituents)) { 080 NucleotideCompound lcSourceCompound = toLowerCase(sourceCompound); 081 NucleotideCompound lcTargetCompound = toLowerCase(targetCompound); 082 083 //equivalentsMap.put(sourceCompound, targetCompound); 084 // equivalentsMap.put(sourceCompound, lcTargetCompound); 085 086 087 checkAdd(equivalentsMap, sourceCompound, targetCompound); 088 checkAdd(equivalentsMap, sourceCompound, lcTargetCompound); 089 090 checkAdd(equivalentsMap,targetCompound,sourceCompound); 091 checkAdd(equivalentsMap, lcTargetCompound, sourceCompound); 092 093 checkAdd(equivalentsMap, lcSourceCompound, targetCompound); 094 checkAdd(equivalentsMap, lcSourceCompound, lcTargetCompound); 095 096 } 097 } 098 } 099 100 //And once it's all done start adding them to the equivalents map 101 102 for ( NucleotideCompound key: equivalentsMap.keySet()){ 103 List<NucleotideCompound> vals = equivalentsMap.get(key); 104 for (NucleotideCompound value: vals){ 105 addEquivalent((C)key,(C)value); 106 addEquivalent((C)value,(C)key); 107 } 108 } 109 } 110 111 private void checkAdd( 112 Map<NucleotideCompound, List<NucleotideCompound>> equivalentsMap, 113 NucleotideCompound key, 114 NucleotideCompound value) { 115 116 117 List<NucleotideCompound> listS = equivalentsMap.get(key); 118 if ( listS == null){ 119 listS = new ArrayList<>(); 120 equivalentsMap.put(key, listS); 121 } 122 listS.add(value); 123 124 125} 126 127private NucleotideCompound toLowerCase(NucleotideCompound compound) { 128 return getCompoundForString(compound.getBase().toLowerCase()); 129 } 130 131 /** 132 * Calculates the best symbol for a collection of compounds. For example 133 * if you gave this method a AC it will return a M which is the ambiguity 134 * symbol for these compounds. 135 * 136 * @param compounds Compounds to calculate ambiguity for 137 * @return The ambiguity symbol which represents this set of nucleotides best 138 */ 139 public NucleotideCompound getAmbiguity(NucleotideCompound... compounds) { 140 Set<NucleotideCompound> settedCompounds = new HashSet<>(); 141 for(NucleotideCompound compound: compounds) { 142 for(NucleotideCompound subCompound: compound.getConstituents()) { 143 settedCompounds.add(getCompoundForString(subCompound.getBase().toUpperCase())); 144 } 145 } 146 for(NucleotideCompound compound: getAllCompounds()) { 147 if(compound.getConstituents().equals(settedCompounds)) { 148 return compound; 149 } 150 } 151 return null; 152 } 153 154 /** 155 * NucleotideCompounds can always complement 156 */ 157 @Override 158 public boolean isComplementable() { 159 return true; 160 } 161}