001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 01-21-2010 021 */ 022 023package org.biojava.nbio.core.sequence.compound; 024 025import org.biojava.nbio.core.sequence.template.CompoundSet; 026import org.biojava.nbio.core.sequence.template.Sequence; 027 028import java.io.Serializable; 029import java.util.*; 030 031/** 032 * Set of proteinogenic amino acids. Molecular weights are recorded in daltons (Da) as residues of a chain; monomers 033 * outside of a chain would likely have an additional mass of 18.01524 Da contributed by an associated water molecule. 034 * 035 * Currently we have different symbols to handle inserts so not as clean as it should be 036 * 037 * @author Richard Holland 038 * @author Scooter Willis 039 * @author Mark Chapman 040 */ 041public class AminoAcidCompoundSet implements CompoundSet<AminoAcidCompound>, Serializable { 042 043 /** 044 * 045 */ 046 private static final long serialVersionUID = 4000344194364133456L; 047 private final Map<String, AminoAcidCompound> aminoAcidCompoundCache = new HashMap<>(); 048 private final Map<String, AminoAcidCompound> aminoAcidCompoundCache3Letter = new HashMap<>(); 049 050 private final Map<AminoAcidCompound, Set<AminoAcidCompound>> equivalentsCache = 051 new HashMap<>(); 052 053 public AminoAcidCompoundSet() { 054 aminoAcidCompoundCache.put("A", new AminoAcidCompound(this, "A", "Ala", "Alanine", 71.0788f)); 055 aminoAcidCompoundCache.put("R", new AminoAcidCompound(this, "R", "Arg", "Arginine", 156.1875f)); 056 aminoAcidCompoundCache.put("N", new AminoAcidCompound(this, "N", "Asn", "Asparagine", 114.1039f)); 057 aminoAcidCompoundCache.put("D", new AminoAcidCompound(this, "D", "Asp", "Aspartic acid", 115.0886f)); 058 aminoAcidCompoundCache.put("C", new AminoAcidCompound(this, "C", "Cys", "Cysteine", 103.1388f)); 059 aminoAcidCompoundCache.put("E", new AminoAcidCompound(this, "E", "Glu", "Glutamic acid", 129.1155f)); 060 aminoAcidCompoundCache.put("Q", new AminoAcidCompound(this, "Q", "Gln", "Glutamine", 128.1307f)); 061 aminoAcidCompoundCache.put("G", new AminoAcidCompound(this, "G", "Gly", "Glycine", 57.0519f)); 062 aminoAcidCompoundCache.put("H", new AminoAcidCompound(this, "H", "His", "Histidine", 137.1411f)); 063 aminoAcidCompoundCache.put("I", new AminoAcidCompound(this, "I", "Ile", "Isoleucine", 113.1594f)); 064 aminoAcidCompoundCache.put("L", new AminoAcidCompound(this, "L", "Leu", "Leucine", 113.1594f)); 065 aminoAcidCompoundCache.put("K", new AminoAcidCompound(this, "K", "Lys", "Lysine", 128.1741f)); 066 aminoAcidCompoundCache.put("M", new AminoAcidCompound(this, "M", "Met", "Methionine", 131.1986f)); 067 aminoAcidCompoundCache.put("F", new AminoAcidCompound(this, "F", "Phe", "Phenylalanine", 147.1766f)); 068 aminoAcidCompoundCache.put("P", new AminoAcidCompound(this, "P", "Pro", "Proline", 97.1167f)); 069 aminoAcidCompoundCache.put("S", new AminoAcidCompound(this, "S", "Ser", "Serine", 87.0782f)); 070 aminoAcidCompoundCache.put("T", new AminoAcidCompound(this, "T", "Thr", "Threonine", 101.1051f)); 071 aminoAcidCompoundCache.put("W", new AminoAcidCompound(this, "W", "Trp", "Tryptophan", 186.2132f)); 072 aminoAcidCompoundCache.put("Y", new AminoAcidCompound(this, "Y", "Tyr", "Tyrosine", 163.1760f)); 073 aminoAcidCompoundCache.put("V", new AminoAcidCompound(this, "V", "Val", "Valine", 99.1326f)); 074 aminoAcidCompoundCache.put("B", new AminoAcidCompound(this, "B", "Asx", "Asparagine or Aspartic acid", null)); 075 aminoAcidCompoundCache.put("Z", new AminoAcidCompound(this, "Z", "Glx", "Glutamine or Glutamic acid", null)); 076 aminoAcidCompoundCache.put("J", new AminoAcidCompound(this, "J", "Xle", "Leucine or Isoleucine", null)); 077 aminoAcidCompoundCache.put("X", new AminoAcidCompound(this, "X", "Xaa", "Unspecified", null)); 078 aminoAcidCompoundCache.put("-", new AminoAcidCompound(this, "-", "---", "Unspecified", null)); 079 aminoAcidCompoundCache.put(".", new AminoAcidCompound(this, ".", "...", "Unspecified", null)); 080 aminoAcidCompoundCache.put("_", new AminoAcidCompound(this, "_", "___", "Unspecified", null)); 081 aminoAcidCompoundCache.put("*", new AminoAcidCompound(this, "*", "***", "Stop", null)); 082 083 //Selenocysteine - this is encoded by UGA with the presence 084 //of a SECIS element (SElenoCysteine Insertion Sequence) in the mRNA 085 //and is a post-translation modification 086 aminoAcidCompoundCache.put("U", new AminoAcidCompound(this, "U", "Sec", "Selenocysteine", 150.0388f)); 087 088 //Pyrrolysine is encoded by UAG in mRNA (normally Amber stop codon) which is translated to 089 //this amino acid under the presence of pylT which creates an anti-codon CUA & pylS 090 //which then does the actual conversion to Pyl. 091 aminoAcidCompoundCache.put("O", new AminoAcidCompound(this, "O", "Pyl", "Pyrrolysine", 255.3172f)); 092 093 for(String oneLtr : aminoAcidCompoundCache.keySet()) { 094 AminoAcidCompound aa = aminoAcidCompoundCache.get(oneLtr); 095 String threeLtr = aa.getLongName().toUpperCase(); 096 aminoAcidCompoundCache3Letter.put(threeLtr, aa); 097 } 098 } 099 100 @Override 101 public String getStringForCompound(AminoAcidCompound compound) { 102 return compound.toString(); 103 } 104 105 @Override 106 public AminoAcidCompound getCompoundForString(String string) { 107 if (string.length() == 0) { 108 return null; 109 } 110 if (string.length() == 3) { 111 return this.aminoAcidCompoundCache3Letter.get(string.toUpperCase()); 112 } 113 if (string.length() > this.getMaxSingleCompoundStringLength()) { 114 throw new IllegalArgumentException("String supplied ("+string+") is too long. Max is "+getMaxSingleCompoundStringLength()); 115 } 116 return this.aminoAcidCompoundCache.get(string.toUpperCase()); 117 } 118 119 @Override 120 public int getMaxSingleCompoundStringLength() { 121 return 1; 122 } 123 124 125 @Override 126 public boolean isCompoundStringLengthEqual() { 127 return true; 128 } 129 130 private final static AminoAcidCompoundSet aminoAcidCompoundSet = new AminoAcidCompoundSet(); 131 132 public static AminoAcidCompoundSet getAminoAcidCompoundSet() { 133 return aminoAcidCompoundSet; 134 } 135 136 @Override 137 public boolean compoundsEquivalent(AminoAcidCompound compoundOne, AminoAcidCompound compoundTwo) { 138 Set<AminoAcidCompound> equivalents = getEquivalentCompounds(compoundOne); 139 return (equivalents != null) && equivalents.contains(compoundTwo); 140 } 141 142 @Override 143 public Set<AminoAcidCompound> getEquivalentCompounds(AminoAcidCompound compound) { 144 if (equivalentsCache.isEmpty()) { 145 // most compounds are equivalent to themselves alone 146 for (AminoAcidCompound c : aminoAcidCompoundCache.values()) { 147 equivalentsCache.put(c, Collections.singleton(c)); 148 } 149 // ambiguous Asparagine or Aspartic acid 150 addAmbiguousEquivalents("N", "D", "B"); 151 // ambiguous Glutamine or Glutamic acid 152 addAmbiguousEquivalents("E", "Q", "Z"); 153 // ambiguous Leucine or Isoleucine 154 addAmbiguousEquivalents("I", "L", "J"); 155 // ambiguous gaps 156 AminoAcidCompound gap1, gap2, gap3; 157 Set<AminoAcidCompound> gaps = new HashSet<>(); 158 gaps.add(gap1 = aminoAcidCompoundCache.get("-")); 159 gaps.add(gap2 = aminoAcidCompoundCache.get(".")); 160 gaps.add(gap3 = aminoAcidCompoundCache.get("_")); 161 equivalentsCache.put(gap1, gaps); 162 equivalentsCache.put(gap2, gaps); 163 equivalentsCache.put(gap3, gaps); 164 // X is never equivalent, even to itself 165 equivalentsCache.put(aminoAcidCompoundCache.get("X"), new HashSet<AminoAcidCompound>()); 166 } 167 return equivalentsCache.get(compound); 168 } 169 170 // helper method to initialize the equivalent sets for 2 amino acid compounds and their ambiguity compound 171 private void addAmbiguousEquivalents(String one, String two, String either) { 172 Set<AminoAcidCompound> equivalents; 173 AminoAcidCompound cOne, cTwo, cEither; 174 175 equivalents = new HashSet<>(); 176 equivalents.add(cOne = aminoAcidCompoundCache.get(one)); 177 equivalents.add(cTwo = aminoAcidCompoundCache.get(two)); 178 equivalents.add(cEither = aminoAcidCompoundCache.get(either)); 179 equivalentsCache.put(cEither, equivalents); 180 181 equivalents = new HashSet<>(); 182 equivalents.add(cOne); 183 equivalents.add(cEither); 184 equivalentsCache.put(cOne, equivalents); 185 186 equivalents = new HashSet<>(); 187 equivalents.add(cTwo); 188 equivalents.add(cEither); 189 equivalentsCache.put(cTwo, equivalents); 190 } 191 192 @Override 193 public boolean hasCompound(AminoAcidCompound compound) { 194 return aminoAcidCompoundCache.containsValue(compound); 195 } 196 197 @Override 198 public boolean isValidSequence(Sequence<AminoAcidCompound> sequence) { 199 for (AminoAcidCompound compound: sequence) { 200 if (!hasCompound(compound)) { 201 return false; 202 } 203 } 204 return true; 205 } 206 207 @Override 208 public List<AminoAcidCompound> getAllCompounds() { 209 return new ArrayList<>(aminoAcidCompoundCache.values()); 210 } 211 212 213 @Override 214 public boolean isComplementable() { 215 return false; 216 } 217}