001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.cluster; 022 023import java.util.ArrayList; 024import java.util.List; 025 026import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 027import org.biojava.nbio.structure.Structure; 028import org.biojava.nbio.structure.StructureException; 029import org.biojava.nbio.structure.symmetry.core.Stoichiometry; 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032 033/** 034 * The SubunitClusterer takes as input a collection of {@link Subunit} and 035 * returns a collection of {@link SubunitCluster}. 036 * 037 * @author Aleix Lafita 038 * @since 5.0.0 039 * 040 */ 041public class SubunitClusterer { 042 043 private static final Logger logger = LoggerFactory 044 .getLogger(SubunitClusterer.class); 045 046 /** Prevent instantiation **/ 047 private SubunitClusterer() { 048 } 049 050 public static Stoichiometry cluster(Structure structure, 051 SubunitClustererParameters params) { 052 List<Subunit> subunits = SubunitExtractor.extractSubunits(structure, 053 params.getAbsoluteMinimumSequenceLength(), 054 params.getMinimumSequenceLengthFraction(), 055 params.getMinimumSequenceLength()); 056 return cluster(subunits, params); 057 } 058 059 public static Stoichiometry cluster(List<Subunit> subunits, SubunitClustererParameters params) { 060 List<SubunitCluster> clusters = new ArrayList<>(); 061 if (subunits.size() == 0) 062 return new Stoichiometry(clusters); 063 064 // First generate a new cluster for each Subunit 065 for (Subunit s : subunits) 066 clusters.add(new SubunitCluster(s)); 067 068 if (params.getClustererMethod() == SubunitClustererMethod.SEQUENCE || 069 params.getClustererMethod() == SubunitClustererMethod.SEQUENCE_STRUCTURE) { 070 // Now merge clusters by SEQUENCE 071 for (int c1 = 0; c1 < clusters.size(); c1++) { 072 for (int c2 = clusters.size() - 1; c2 > c1; c2--) { 073 try { 074 if (params.isUseEntityIdForSeqIdentityDetermination() && 075 clusters.get(c1).mergeIdenticalByEntityId(clusters.get(c2))) { 076 // This we will only do if the switch is for entity id comparison is on. 077 // In some cases it can save enormous amounts of time, e.g. for clustering full 078 // chains of deposited PDB entries. For instance for 6NHJ: with pure alignments it 079 // takes ~ 6 hours, with entity id comparisons it takes 2 minutes. 080 clusters.remove(c2); 081 } else if (clusters.get(c1).mergeSequence(clusters.get(c2), params)) { 082 clusters.remove(c2); 083 } 084 085 } catch (CompoundNotFoundException e) { 086 logger.warn("Could not merge by Sequence. {}", 087 e.getMessage()); 088 } 089 } 090 } 091 } 092 093 if (params.getClustererMethod() == SubunitClustererMethod.STRUCTURE || 094 params.getClustererMethod() == SubunitClustererMethod.SEQUENCE_STRUCTURE) { 095 // Now merge clusters by STRUCTURE 096 for (int c1 = 0; c1 < clusters.size(); c1++) { 097 for (int c2 = clusters.size() - 1; c2 > c1; c2--) { 098 try { 099 if (clusters.get(c1).mergeStructure(clusters.get(c2), params)) { 100 clusters.remove(c2); 101 } 102 } catch (StructureException e) { 103 logger.warn("Could not merge by Structure. {}", e.getMessage()); 104 } 105 } 106 } 107 } 108 109 if (params.isInternalSymmetry()) { 110 // Now divide clusters by their INTERNAL SYMMETRY 111 for (int c = 0; c < clusters.size(); c++) { 112 try { 113 clusters.get(c).divideInternally(params); 114 } catch (StructureException e) { 115 logger.warn("Error analyzing internal symmetry. {}", 116 e.getMessage()); 117 } 118 } 119 120 // After internal symmetry merge again by structural similarity 121 // Use case: C8 propeller with 3 chains with 3+3+2 repeats each 122 for (int c1 = 0; c1 < clusters.size(); c1++) { 123 for (int c2 = clusters.size() - 1; c2 > c1; c2--) { 124 try { 125 if (clusters.get(c1).mergeStructure(clusters.get(c2), params)) 126 clusters.remove(c2); 127 } catch (StructureException e) { 128 logger.warn("Could not merge by Structure. {}", 129 e.getMessage()); 130 } 131 } 132 } 133 } 134 135 return new Stoichiometry(clusters); 136 } 137}