Source code

001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.sequence.template;
022
023import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
024
025import java.util.*;
026import java.util.stream.Collectors;
027
028/**
029 *
030 * @author Andy Yates
031 * @param <C> Type of compound this set will contain but must extend
032 * NucleotideCompound
033 */
034public abstract class AbstractNucleotideCompoundSet<C extends NucleotideCompound>
035        extends AbstractCompoundSet<C> {
036
037        protected void addNucleotideCompound(String base, String complement, String... equivalents) {
038
039                String[] upperEquivalents = new String[equivalents.length];
040                String[] lowerEquivalents = new String[equivalents.length];
041                for(int i=0; i<equivalents.length; i++) {
042                        upperEquivalents[i] = equivalents[i].toUpperCase();
043                        lowerEquivalents[i] = equivalents[i].toLowerCase();
044                }
045
046                C upper = newNucleotideCompound(base.toUpperCase(), complement.toUpperCase(), upperEquivalents);
047                C lower = newNucleotideCompound(base.toLowerCase(), complement.toLowerCase(), lowerEquivalents);
048
049                List<C> equivalentCompounds = new ArrayList<>();
050
051                for(int i=0; i<equivalents.length; i++) {
052                        equivalentCompounds.add(getCompoundForString(upperEquivalents[i]));
053                        equivalentCompounds.add(getCompoundForString(lowerEquivalents[i]));
054                }
055
056                addCompound(upper, lower, equivalentCompounds);
057        }
058
059        protected abstract C newNucleotideCompound(String base, String complement, String... equivalents);
060
061        /**
062         * Loops through all known nucleotides and attempts to find which are
063         * equivalent to each other. Also takes into account lower casing
064         * nucleotides as well as upper-cased ones.
065         */
066        @SuppressWarnings("unchecked")
067        protected void calculateIndirectAmbiguities() {
068                Map<NucleotideCompound, List<NucleotideCompound>> equivalentsMap = new HashMap<>();
069
070                List<NucleotideCompound> ambiguousCompounds = getAllCompounds().stream()                 
071                                                                               .filter(compound -> compound.isAmbiguous())
072                                                                               .collect(Collectors.toCollection(ArrayList::new));
073                        
074
075                for(NucleotideCompound sourceCompound: ambiguousCompounds) {
076                        Set<NucleotideCompound> compoundConstituents = sourceCompound.getConstituents();
077                        for(NucleotideCompound targetCompound: ambiguousCompounds) {
078                                Set<NucleotideCompound> targetConstituents = targetCompound.getConstituents();
079                                if(targetConstituents.containsAll(compoundConstituents)) {
080                                        NucleotideCompound lcSourceCompound = toLowerCase(sourceCompound);
081                                        NucleotideCompound lcTargetCompound = toLowerCase(targetCompound);
082
083                                //equivalentsMap.put(sourceCompound, targetCompound);
084                        //      equivalentsMap.put(sourceCompound, lcTargetCompound);
085
086
087                                        checkAdd(equivalentsMap, sourceCompound, targetCompound);
088                                        checkAdd(equivalentsMap, sourceCompound, lcTargetCompound);
089
090                                        checkAdd(equivalentsMap,targetCompound,sourceCompound);
091                                        checkAdd(equivalentsMap, lcTargetCompound, sourceCompound);
092
093                                        checkAdd(equivalentsMap, lcSourceCompound, targetCompound);
094                                        checkAdd(equivalentsMap, lcSourceCompound, lcTargetCompound);
095
096                                }
097                        }
098                }
099
100                //And once it's all done start adding them to the equivalents map
101
102                for ( NucleotideCompound key: equivalentsMap.keySet()){
103                        List<NucleotideCompound> vals = equivalentsMap.get(key);
104                        for (NucleotideCompound value: vals){
105                                addEquivalent((C)key,(C)value);
106                                addEquivalent((C)value,(C)key);
107                        }
108                }
109        }
110
111        private void checkAdd(
112                Map<NucleotideCompound, List<NucleotideCompound>> equivalentsMap,
113                NucleotideCompound key,
114                NucleotideCompound value) {
115
116
117                        List<NucleotideCompound> listS = equivalentsMap.get(key);
118                        if ( listS == null){
119                                listS = new ArrayList<>();
120                                equivalentsMap.put(key, listS);
121                        }
122                        listS.add(value);
123
124
125}
126
127private NucleotideCompound toLowerCase(NucleotideCompound compound) {
128                return getCompoundForString(compound.getBase().toLowerCase());
129        }
130
131        /**
132         * Calculates the best symbol for a collection of compounds. For example
133         * if you gave this method a AC it will return a M which is the ambiguity
134         * symbol for these compounds.
135         *
136         * @param compounds Compounds to calculate ambiguity for
137         * @return The ambiguity symbol which represents this set of nucleotides best
138         */
139        public NucleotideCompound getAmbiguity(NucleotideCompound... compounds) {
140                Set<NucleotideCompound> settedCompounds = new HashSet<>();
141                for(NucleotideCompound compound: compounds) {
142                        for(NucleotideCompound subCompound: compound.getConstituents()) {
143                                settedCompounds.add(getCompoundForString(subCompound.getBase().toUpperCase()));
144                        }
145                }
146                for(NucleotideCompound compound: getAllCompounds()) {
147                        if(compound.getConstituents().equals(settedCompounds)) {
148                                return compound;
149                        }
150                }
151                return null;
152        }
153
154                /**
155                 * NucleotideCompounds can always complement
156                 */
157                @Override
158                public boolean isComplementable() {
159                                return true;
160                }
161}