001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.symmetry.core; 022 023import org.biojava.nbio.structure.cluster.SubunitCluster; 024import org.biojava.nbio.structure.cluster.SubunitClustererMethod; 025 026import java.util.*; 027import java.util.function.Function; 028import java.util.stream.Collectors; 029 030/** 031 * A utility object that describes Stoichiometry (composition of a protein assembly), 032 * determined via clustering procedure {@link org.biojava.nbio.structure.cluster.SubunitClusterer}, 033 * and implements human-readable representation using various strategies. 034 * 035 * @author Dmytro Guzenko 036 * @since 5.0.0 037 */ 038 039public class Stoichiometry { 040 041 /** 042 * What to do when the number of {@link SubunitCluster} exceeds the length of the alphabet. 043 */ 044 public enum StringOverflowStrategy { 045 /** 046 * Put '?' symbol for every (alphabet.length+i)-th cluster 047 */ 048 QUESTIONMARK, 049 /** 050 * Cycle through the alphabet (e.g., ...xyzABC...) 051 */ 052 CYCLE, 053 /** 054 * Represent every cluster with two symbols from the alphabet, 055 * this forces us to specify number of subunits for every subunit (e.g., AA1AB1AC1...). 056 * This strategy will not work correctly if there are more than alphabet.length^2 subunit clusters. 057 */ 058 DOUBLE, 059 /** 060 * The strategy is defined via an external function, we do not have to do anything. 061 */ 062 CUSTOM 063 } 064 065 /** 066 * Alphabet (a sequence of characters) used in this stoichiometry to construct human-readable representation. 067 */ 068 private String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; 069 070 /** 071 * Strategy determines how this stoichiometry will construct human-readable representation in case number 072 * of clusters exceeds number of letters in the alphabet. 073 */ 074 private StringOverflowStrategy strategy = StringOverflowStrategy.CYCLE; 075 076 /** 077 * Full customisation of the string generation is supported via an external function 078 */ 079 private Function<List<SubunitCluster>,String> customStringGenerator = null; 080 /** 081 * Subunit clusters that define this stoichiometry. 082 */ 083 private List<SubunitCluster> orderedClusters = new ArrayList<>(); 084 085 /** Prevent instantiation **/ 086 @SuppressWarnings("unused") 087 private Stoichiometry() { 088 } 089 090 /** 091 * Constructor for Stoichiometry. The default strategy is CYCLE, 092 * the letters assigned for each cluster will be reset. 093 * 094 * @param clusters 095 * List of {@link SubunitCluster} that defines assembly composition. 096 */ 097 public Stoichiometry(List<SubunitCluster> clusters) { 098 this(clusters,StringOverflowStrategy.CYCLE,true); 099 } 100 101 /** 102 * Constructor for Stoichiometry. The default strategy is CYCLE. 103 * 104 * @param clusters 105 * List of {@link SubunitCluster} that defines assembly composition. 106 * @param resetAlphas 107 * Whether to keep alphas assigned to {@link SubunitCluster} object (useful for local symmetry detection) 108 * or to generate them anew. 109 */ 110 public Stoichiometry(List<SubunitCluster> clusters, boolean resetAlphas) { 111 this(clusters,StringOverflowStrategy.CYCLE,resetAlphas); 112 } 113 114 /** 115 * Constructor for Stoichiometry. The alphas assigned to {@link SubunitCluster} objects will be reset. 116 * 117 * @param clusters 118 * List of {@link SubunitCluster} that defines assembly composition. 119 * @param strategy 120 * What to do if number of {@link SubunitCluster} exceeds the alphabet length. 121 */ 122 public Stoichiometry(List<SubunitCluster> clusters, StringOverflowStrategy strategy) { 123 this(clusters,strategy,true); 124 } 125 126 /** 127 * Constructor for Stoichiometry. 128 * 129 * @param clusters 130 * List of {@link SubunitCluster} that defines assembly composition. 131 * @param strategy 132 * What to do if number of {@link SubunitCluster} exceeds the alphabet length. 133 * @param resetAlphas 134 * Whether to keep alphas assigned to {@link SubunitCluster} object (useful for local symmetry detection) 135 * or to generate them anew. 136 */ 137 public Stoichiometry(List<SubunitCluster> clusters, StringOverflowStrategy strategy, boolean resetAlphas) { 138 this.strategy = strategy; 139 this.orderedClusters = 140 clusters.stream(). 141 sorted(Comparator. 142 comparing(SubunitCluster::size). 143 reversed()). 144 collect(Collectors.toList()); 145 if (resetAlphas) { 146 doResetAlphas(); 147 } 148 } 149 150 /** 151 * Constructor for Stoichiometry. 152 * 153 * @param clusters 154 * List of {@link SubunitCluster} that defines assembly composition. 155 * @param customStringGenerator 156 * A function which produces a string for a composition (list of subunit clusters). 157 */ 158 public Stoichiometry(List<SubunitCluster> clusters, Function<List<SubunitCluster>,String> customStringGenerator) { 159 this(clusters,StringOverflowStrategy.CUSTOM,false); 160 this.customStringGenerator = customStringGenerator; 161 } 162 163 164 /** 165 * Reassign alpha-strings for each cluster according to the current strategy. 166 * Has no effect if custom string generator is used. 167 */ 168 public void resetAlphas() { 169 doResetAlphas(); 170 } 171 172 private void doResetAlphas() { 173 if(strategy == StringOverflowStrategy.CUSTOM) { 174 return; 175 } 176 for (int i = 0; i < this.orderedClusters.size(); i++) { 177 this.orderedClusters.get(i).setAlpha(generateAlpha(i)); 178 } 179 } 180 181 /** 182 * Produce a string ("alpha") that describes each component depending on the current strategy. 183 * @param clusterInd 184 * component index 185 * @return alphanumeric string. 186 */ 187 private String generateAlpha(int clusterInd) { 188 String key; 189 int alphabetInd; 190 switch (strategy) { 191 case CYCLE: 192 alphabetInd = clusterInd % alphabet.length(); 193 key = alphabet.substring(alphabetInd, alphabetInd + 1); 194 break; 195 196 case DOUBLE: 197 if (orderedClusters.size()>alphabet.length()) { 198 int alphabetInd1 = clusterInd / alphabet.length(); 199 int alphabetInd2 = clusterInd % alphabet.length(); 200 key = alphabet.substring(alphabetInd1, alphabetInd1 + 1); 201 key+=alphabet.substring(alphabetInd2, alphabetInd2 + 1); 202 } else { 203 key = alphabet.substring(clusterInd, clusterInd + 1); 204 } 205 break; 206 207 case QUESTIONMARK: 208 key = "?"; 209 if(clusterInd<alphabet.length()) { 210 key = alphabet.substring(clusterInd, clusterInd + 1); 211 } 212 break; 213 214 case CUSTOM: 215 throw new IllegalStateException("Alphas should be handled by the custom generator function."); 216 217 default: 218 key = "?"; 219 if(clusterInd<alphabet.length()) { 220 key = alphabet.substring(clusterInd, clusterInd + 1); 221 } 222 break; 223 } 224 return key; 225 } 226 /** 227 * @return list of {@link SubunitCluster}, ordered by the number of subunits (decreasing). 228 */ 229 public List<SubunitCluster> getClusters() { 230 return orderedClusters; 231 } 232 233 /** 234 * @return Number of distinct components in this stoichiometry. 235 */ 236 public int numberOfComponents() { 237 return orderedClusters.size(); 238 } 239 240 /** 241 * Make a combined Stoichiometry object of <i>this</> and the <i>other</>. 242 * The combined list of clusters will be ordered by the number of subunits. 243 * @return new {@link Stoichiometry} object. 244 */ 245 public Stoichiometry combineWith(Stoichiometry other) { 246 Set<SubunitCluster> combinedClusters = new LinkedHashSet<>(); 247 combinedClusters.addAll(this.orderedClusters); 248 combinedClusters.addAll(other.orderedClusters); 249 250 Stoichiometry combinedStoichiometry; 251 if (this.strategy == StringOverflowStrategy.CUSTOM) { 252 combinedStoichiometry = new Stoichiometry(new ArrayList<>(combinedClusters),this.customStringGenerator); 253 } else { 254 combinedStoichiometry = new Stoichiometry(new ArrayList<>(combinedClusters),this.strategy,false); 255 } 256 return combinedStoichiometry; 257 } 258 259 /** 260 * Make a Stoichiometry object that corresponds to a single component. 261 * @param i component index 262 * @return new {@link Stoichiometry} object. 263 */ 264 public Stoichiometry getComponent(int i) { 265 return new Stoichiometry(Collections.singletonList(orderedClusters.get(i)),this.strategy,false); 266 } 267 268 /** 269 * @return {@link StringOverflowStrategy} used in this stoichiometry 270 * to construct human-readable representation in case number 271 * of clusters exceeds number of letters in the alphabet. 272 */ 273 public StringOverflowStrategy getStrategy() { 274 return strategy; 275 } 276 277 /** 278 * Change string representation of a stoichiometry in case number of clusters exceeds number of letters in the alphabet. 279 * This action may invalidate alphas already assigned to the clusters. 280 * @param strategy 281 * {@link StringOverflowStrategy} used in this stoichiometry 282 * to construct human-readable representation in case number 283 * of clusters exceeds number of letters in the alphabet. 284 */ 285 public void setStrategy(StringOverflowStrategy strategy) { 286 if(strategy==StringOverflowStrategy.CUSTOM) { 287 throw new IllegalArgumentException("Set this strategy by providing a function of the type Function<List<SubunitCluster>,String>."); 288 } 289 290 if(this.strategy != strategy) { 291 this.strategy = strategy; 292 if(orderedClusters.size()>alphabet.length()) 293 doResetAlphas(); 294 } 295 } 296 297 298 /** 299 * Let a user-defined function handle the entire string representation of a stoichiometry. 300 * @param customStringGenerator 301 * A function which accepts a list of subunit clusters and returns a string. 302 */ 303 public void setCustomStringGenerator(Function<List<SubunitCluster>,String> customStringGenerator) { 304 this.strategy = StringOverflowStrategy.CUSTOM; 305 this.customStringGenerator = customStringGenerator; 306 } 307 308 /** 309 * @return Alphabet (a sequence of characters) used in this stoichiometry to construct human-readable representation. 310 */ 311 public String getAlphabet() { 312 return alphabet; 313 } 314 315 /** 316 * Change alphabet used for string representation of a stoichiometry. 317 * This action invalidates alphas already assigned to the clusters. 318 * @param alphabet 319 * a sequence of characters used in this stoichiometry to construct human-readable representation. 320 */ 321 public void setAlphabet(String alphabet) { 322 this.alphabet = alphabet; 323 doResetAlphas(); 324 } 325 326 /** 327 * @return Human-readable representation of this stoichiometry. 328 */ 329 @Override 330 public String toString() { 331 332 if(strategy == StringOverflowStrategy.CUSTOM) { 333 if(customStringGenerator == null) { 334 throw new IllegalStateException("The strategy is CUSTOM, yet the string generator function is not defined."); 335 } 336 return customStringGenerator.apply(orderedClusters); 337 } 338 339 StringBuilder formula = new StringBuilder(); 340 341 orderedClusters.forEach((SubunitCluster r) -> { 342 formula.append(r.getAlpha()); 343 if(r.getAlpha().length()>1 || r.size()>1) 344 formula.append(r.size()); 345 }); 346 347 return formula.toString(); 348 } 349 350 /** 351 * A pseudostoichiometric {@link SubunitCluster} was obtained using the 352 * {@link SubunitClustererMethod#STRUCTURE} similarity, 353 * or {@link SubunitClustererMethod#SEQUENCE} similarity with low scores. 354 * 355 * @return true if any of the clusters is pseudostoichiometric, false 356 * otherwise 357 */ 358 public boolean isPseudoStoichiometric() { 359 for (SubunitCluster c : orderedClusters) { 360 if(c.isPseudoStoichiometric()) 361 return true; 362 } 363 return false; 364 } 365 366}