001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.sequence.template;
022
023import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
024
025import java.util.*;
026
027/**
028 *
029 * @author Andy Yates
030 * @param <C> Type of compound this set will contain but must extend
031 * NucleotideCompound
032 */
033public abstract class AbstractNucleotideCompoundSet<C extends NucleotideCompound>
034        extends AbstractCompoundSet<C> {
035
036        protected void addNucleotideCompound(String base, String complement, String... equivalents) {
037
038                String[] upperEquivalents = new String[equivalents.length];
039                String[] lowerEquivalents = new String[equivalents.length];
040                for(int i=0; i<equivalents.length; i++) {
041                        upperEquivalents[i] = equivalents[i].toUpperCase();
042                        lowerEquivalents[i] = equivalents[i].toLowerCase();
043                }
044
045                C upper = newNucleotideCompound(base.toUpperCase(), complement.toUpperCase(), upperEquivalents);
046                C lower = newNucleotideCompound(base.toLowerCase(), complement.toLowerCase(), lowerEquivalents);
047
048                List<C> equivalentCompounds = new ArrayList<C>();
049
050                for(int i=0; i<equivalents.length; i++) {
051                        equivalentCompounds.add(getCompoundForString(upperEquivalents[i]));
052                        equivalentCompounds.add(getCompoundForString(lowerEquivalents[i]));
053                }
054
055                addCompound(upper, lower, equivalentCompounds);
056        }
057
058        protected abstract C newNucleotideCompound(String base, String complement, String... equivalents);
059
060        /**
061         * Loops through all known nucleotides and attempts to find which are
062         * equivalent to each other. Also takes into account lower casing
063         * nucleotides as well as upper-cased ones.
064         */
065        @SuppressWarnings("unchecked")
066        protected void calculateIndirectAmbiguities() {
067                Map<NucleotideCompound, List<NucleotideCompound>> equivalentsMap = new HashMap<NucleotideCompound, List<NucleotideCompound>>();
068
069                List<NucleotideCompound> ambiguousCompounds = new ArrayList<NucleotideCompound>();
070                for(NucleotideCompound compound: getAllCompounds()) {
071                        if (!compound.isAmbiguous()) {
072                                continue;
073                        }
074                        ambiguousCompounds.add(compound);
075                }
076
077                for(NucleotideCompound sourceCompound: ambiguousCompounds) {
078                        Set<NucleotideCompound> compoundConstituents = sourceCompound.getConstituents();
079                        for(NucleotideCompound targetCompound: ambiguousCompounds) {
080                                Set<NucleotideCompound> targetConstituents = targetCompound.getConstituents();
081                                if(targetConstituents.containsAll(compoundConstituents)) {
082                                        NucleotideCompound lcSourceCompound = toLowerCase(sourceCompound);
083                                        NucleotideCompound lcTargetCompound = toLowerCase(targetCompound);
084
085                                //equivalentsMap.put(sourceCompound, targetCompound);
086                        //      equivalentsMap.put(sourceCompound, lcTargetCompound);
087
088
089                                        checkAdd(equivalentsMap, sourceCompound, targetCompound);
090                                        checkAdd(equivalentsMap, sourceCompound, lcTargetCompound);
091
092                                        checkAdd(equivalentsMap,targetCompound,sourceCompound);
093                                        checkAdd(equivalentsMap, lcTargetCompound, sourceCompound);
094
095                                        checkAdd(equivalentsMap, lcSourceCompound, targetCompound);
096                                        checkAdd(equivalentsMap, lcSourceCompound, lcTargetCompound);
097
098                                }
099                        }
100                }
101
102                //And once it's all done start adding them to the equivalents map
103
104                for ( NucleotideCompound key: equivalentsMap.keySet()){
105                        List<NucleotideCompound> vals = equivalentsMap.get(key);
106                        for (NucleotideCompound value: vals){
107                                addEquivalent((C)key,(C)value);
108                                addEquivalent((C)value,(C)key);
109                        }
110                }
111        }
112
113        private void checkAdd(
114                Map<NucleotideCompound, List<NucleotideCompound>> equivalentsMap,
115                NucleotideCompound key,
116                NucleotideCompound value) {
117
118
119                        List<NucleotideCompound> listS = equivalentsMap.get(key);
120                        if ( listS == null){
121                                listS = new ArrayList<NucleotideCompound>();
122                                equivalentsMap.put(key, listS);
123                        }
124                        listS.add(value);
125
126
127}
128
129private NucleotideCompound toLowerCase(NucleotideCompound compound) {
130                return getCompoundForString(compound.getBase().toLowerCase());
131        }
132
133        /**
134         * Calculates the best symbol for a collection of compounds. For example
135         * if you gave this method a AC it will return a M which is the ambiguity
136         * symbol for these compounds.
137         *
138         * @param compounds Compounds to calculate ambiguity for
139         * @return The ambiguity symbol which represents this set of nucleotides best
140         */
141        public NucleotideCompound getAmbiguity(NucleotideCompound... compounds) {
142                Set<NucleotideCompound> settedCompounds = new HashSet<NucleotideCompound>();
143                for(NucleotideCompound compound: compounds) {
144                        for(NucleotideCompound subCompound: compound.getConstituents()) {
145                                settedCompounds.add(getCompoundForString(subCompound.getBase().toUpperCase()));
146                        }
147                }
148                for(NucleotideCompound compound: getAllCompounds()) {
149                        if(compound.getConstituents().equals(settedCompounds)) {
150                                return compound;
151                        }
152                }
153                return null;
154        }
155
156                /**
157                 * NucleotideCompounds can always complement
158                 */
159                @Override
160                public boolean isComplementable() {
161                                return true;
162                }
163}