001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.symmetry.core;
022
023import org.biojava.nbio.structure.Atom;
024import org.biojava.nbio.structure.StructureException;
025import org.biojava.nbio.structure.align.StructureAlignment;
026import org.biojava.nbio.structure.align.StructureAlignmentFactory;
027import org.biojava.nbio.structure.align.ce.CeMain;
028import org.biojava.nbio.structure.align.ce.CeParameters;
029import org.biojava.nbio.structure.align.model.AFPChain;
030import org.biojava.nbio.structure.align.seq.SmithWaterman3Daligner;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034import java.util.*;
035
036/**
037 * Represents a cluster of equivalent sequences
038 *
039 */
040public class SequenceAlignmentCluster implements Cloneable {
041
042        private static final Logger logger = LoggerFactory.getLogger(SequenceAlignmentCluster.class);
043
044        private QuatSymmetryParameters parameters = null;
045        private List<UniqueSequenceList> uniqueSequenceList = new ArrayList<UniqueSequenceList>();
046        private List<Atom[]> alignedCAlphaAtoms = null;
047
048        private int alignmentLength = 0;
049        private double minSequenceIdentity = 1.0;
050        private double maxSequenceIdentity = 0.0;
051
052        private boolean modified = true;
053
054        public SequenceAlignmentCluster (QuatSymmetryParameters parameters) {
055                this.parameters = parameters;
056        }
057
058        public boolean isPseudoStoichiometric() {
059                return minSequenceIdentity < parameters.getSequencePseudoSymmetryThreshold();
060        }
061
062        public double getMinSequenceIdentity() {
063                if (! isPseudoStoichiometric()) {
064                        return 1.0;
065                }
066                return minSequenceIdentity;
067        }
068
069        public void setMinSequenceIdentity(double minSequenceIdentity) {
070                this.minSequenceIdentity = minSequenceIdentity;
071        }
072
073        public double getMaxSequenceIdentity() {
074                if (! isPseudoStoichiometric()) {
075                        return 1.0;
076                }
077                return maxSequenceIdentity;
078        }
079
080        public void setMaxSequenceIdentity(double maxSequenceIdentity) {
081                this.maxSequenceIdentity = maxSequenceIdentity;
082        }
083
084        public void addUniqueSequenceList(UniqueSequenceList sequenceList) {
085                uniqueSequenceList.add(sequenceList);
086                modified = true;
087        }
088
089        /**
090         * @return the number of sequences which have been added to this cluster
091         */
092        public int getSequenceCount() {
093                return uniqueSequenceList.size();
094        }
095
096        public int getSequenceAlignmentLength() {
097                run();
098                return alignmentLength;
099        }
100
101        public List<UniqueSequenceList> getUniqueSequenceList() {
102                return uniqueSequenceList;
103        }
104
105        public List<String> getChainIds() {
106                List<String> ids = new ArrayList<String>();
107                for (UniqueSequenceList list: uniqueSequenceList) {
108                        ids.add(list.getChainId());
109                }
110                return ids;
111        }
112
113        public List<Integer> getModelNumbers() {
114                List<Integer> numbers = new ArrayList<Integer>();
115                for (UniqueSequenceList list: uniqueSequenceList) {
116                        numbers.add(list.getModelNumber());
117                }
118                return numbers;
119        }
120
121        public List<Integer> getStructureIds() {
122                List<Integer> numbers = new ArrayList<Integer>();
123                for (UniqueSequenceList list: uniqueSequenceList) {
124                        numbers.add(list.getStructureId());
125                }
126                return numbers;
127        }
128
129        public List<Atom[]> getAlignedCalphaAtoms() {
130                run();
131                return alignedCAlphaAtoms;
132        }
133
134        /**
135         * Match a sequence to this cluster at 100% identity.
136         *
137         * If the given sequence matches the cluster seed (100%), then performs an
138         * alignment to the seed and adds it to the {@link #getUniqueSequenceList()
139         * unique sequence list}.
140         *
141         * @param cAlphaAtoms
142         * @param chainId
143         * @param modelNumber
144         * @param structureId
145         * @param sequence
146         * @return
147         */
148        public boolean identityMatch(Atom[] cAlphaAtoms, String chainId, int modelNumber, int structureId, String sequence) {
149                UniqueSequenceList u = uniqueSequenceList.get(0);
150
151                // check for 100% identity match of reference sequence
152                String refSequence = u.getSeqResSequence();
153                boolean seqMatch = refSequence.equals(sequence);
154
155                // if reference (SEQRES) sequences match 100%,
156                // find alignment of atom sequences by Smith-Waterman alignment
157                if (seqMatch) {
158                        List<Integer> alig1 = new ArrayList<Integer>();
159                        List<Integer> alig2 = new ArrayList<Integer>();
160                        Atom[] referenceAtoms = u.getCalphaAtoms();
161                        int inCommon = 0;
162                        try {
163                                inCommon = alignIdenticalSequence(referenceAtoms, cAlphaAtoms, alig1, alig2);
164                        } catch (StructureException e) {
165                                // this happens in some cases like all X sequences, e.g. 1s1o or 1s4a
166                                logger.warn("Could not align identical sequences {}: {} and {}: {}. Chains won't be clustered together.",
167                                                u.getChainId(),refSequence,chainId,sequence);
168                        }
169
170                        if (inCommon > 0) {
171                                UniqueSequenceList seqList = new UniqueSequenceList(cAlphaAtoms, chainId, modelNumber, structureId, sequence);
172                                seqList.setAlignment1(alig1);
173                                seqList.setAlignment2(alig2);
174                                //                      System.out.println(alig1);
175                                //                      System.out.println(alig2);
176                                addUniqueSequenceList(seqList);
177                                return true;
178                        }
179                }
180
181                return false;
182        }
183
184        public PairwiseAlignment getPairwiseAlignment(SequenceAlignmentCluster other) {
185                PairwiseAlignment alignment = new PairwiseAlignment(this, other);
186
187                Atom[] referenceAtoms1 = this.getUniqueSequenceList().get(0).getCalphaAtoms();
188                Atom[] referenceAtoms2 = other.getUniqueSequenceList().get(0).getCalphaAtoms();
189
190                double alignmentLengthFraction = (double)Math.min(referenceAtoms1.length, referenceAtoms2.length) /
191                                Math.max(referenceAtoms1.length, referenceAtoms2.length);
192
193                if (alignmentLengthFraction < parameters.getAlignmentFractionThreshold()) {
194                        return null;
195                }
196
197                AFPChain afp = alignPairByStructure(referenceAtoms1, referenceAtoms2,parameters.isVerbose());
198                if (afp == null) {
199                        return null;
200                }
201
202                if (! afp.isSignificantResult()) {
203                        return null;
204
205                        // alternative: tmSCore:
206                        // double tmScore = AFPChainScorer.getTMScore(afpChain, ca1, ca2);
207                        // if ( tmScore < 0.35) {
208                        // return null ...
209                }
210
211                int[][][] align = afp.getOptAln();
212                if (align == null) {
213                        return null;
214                }
215
216                alignmentLengthFraction = (double)afp.getOptLength()/Math.max(referenceAtoms1.length, referenceAtoms2.length);
217                alignment.setAlignmentLengthFraction(alignmentLengthFraction);
218                alignment.setRmsd(afp.getChainRmsd());
219                alignment.setSequenceIdentity(afp.getIdentity());
220                alignment.setAlignment(afp.getOptAln());
221
222                return alignment;
223        }
224
225        @Override
226        public Object clone() {
227                SequenceAlignmentCluster copy = null;
228                try {
229                        copy = (SequenceAlignmentCluster) super.clone();
230                } catch (CloneNotSupportedException e) {
231                        logger.error("CloneNotSupportedException caught",e);
232                }
233                // deep copy sequences
234                copy.uniqueSequenceList = new ArrayList<UniqueSequenceList>();
235                for (UniqueSequenceList seq: this.getUniqueSequenceList()) {
236                        copy.addUniqueSequenceList((UniqueSequenceList) seq.clone());
237                }
238                return copy;
239        }
240
241        @Override
242        public String toString() {
243                StringBuilder builder = new StringBuilder();
244                for (UniqueSequenceList u: uniqueSequenceList) {
245                        builder.append(u.toString());
246                        builder.append("\n");
247                }
248                return builder.toString();
249        }
250
251        private void run() {
252                if (modified) {
253                        alignedCAlphaAtoms = null;
254                        createAlignedCAlphaAtoms();
255                        modified = false;
256                }
257        }
258
259        private static AFPChain alignPairBySequence(Atom[] ca1Seq, Atom[] ca2Seq) throws StructureException {
260                SmithWaterman3Daligner aligner = new SmithWaterman3Daligner();
261                return aligner.align(ca1Seq, ca2Seq);
262        }
263
264        private static AFPChain alignPairByStructure(Atom[] ca1Seq, Atom[] ca2Seq, boolean verbose) {
265           CeParameters params = new CeParameters();
266
267                AFPChain afp = null;
268                try {
269                        StructureAlignment algorithm  = StructureAlignmentFactory.getAlgorithm(CeMain.algorithmName);
270                        afp = algorithm.align(ca1Seq,ca2Seq,params);
271                        if (verbose) {
272                                System.out.println(afp.toFatcat(ca1Seq, ca2Seq));
273                        }
274                } catch (StructureException e) {
275                        logger.error("StructureException caught",e);
276                }
277                return afp;
278        }
279
280
281        private static int alignIdenticalSequence(Atom[] ca1Seq, Atom[] ca2Seq, List<Integer> align1, List<Integer> align2) throws StructureException {
282                AFPChain afp = alignPairBySequence(ca1Seq, ca2Seq);
283                int[][][] align = afp.getOptAln();
284                if (align == null) {
285                        return 0;
286                }
287                int len =  afp.getOptLength();
288
289                List<Integer> delta = new ArrayList<Integer>();
290                Set<Integer> unique = new HashSet<Integer>();
291
292                for (int i = 0; i < len; i++) {
293                        Atom a1 = ca1Seq[align[0][0][i]];
294                        String residueName1 = a1.getGroup().getPDBName();
295                        Atom a2 = ca2Seq[align[0][1][i]];
296                        String residueName2 = a2.getGroup().getPDBName();
297                        if (residueName1.equals(residueName2)) {
298                                int n1 = a1.getGroup().getResidueNumber().getSeqNum();
299                                int n2 = a2.getGroup().getResidueNumber().getSeqNum();
300                                delta.add(n2-n1);
301                                unique.add(n2-n1);
302                        }
303                }
304
305                int offset = 0;
306                int frequency = 0;
307                for (Integer i: unique) {
308                        int freq = Collections.frequency(delta, i);
309                        if (freq > frequency) {
310                                offset = i;
311                                frequency = freq;
312                        }
313                }
314
315                for (int i = 0; i < len; i++) {
316                        Atom a1 = ca1Seq[align[0][0][i]];
317                        int n1 = a1.getGroup().getResidueNumber().getSeqNum();
318                        Atom a2 = ca2Seq[align[0][1][i]];
319                        int n2 = a2.getGroup().getResidueNumber().getSeqNum();
320                        if (n2 - offset == n1) {
321                                align1.add(align[0][0][i]);
322                                align2.add(align[0][1][i]);
323                        }
324                }
325//        System.out.println("PDB alignment: ");
326//        System.out.println(align1);
327//        System.out.println(align2);
328                return align1.size();
329        }
330
331        private void createAlignedCAlphaAtoms() {
332                List<Integer> indices = getReferenceResidueIndices();
333                alignmentLength = indices.size();
334                alignedCAlphaAtoms = new ArrayList<Atom[]>();
335                for (UniqueSequenceList u: uniqueSequenceList) {
336                        List<Integer> alignment1 = u.getAlignment1();
337                        List<Integer> alignment2 = u.getAlignment2();
338                        List<Integer> alignmentIndices = new ArrayList<Integer>();
339                        for (int i = 0; i < alignment1.size(); i++) {
340                                int a1 = alignment1.get(i);
341                                if (indices.contains(a1)) {
342                                        alignmentIndices.add(alignment2.get(i));
343                                }
344                        }
345                        Atom[] unalignedAtoms = u.getCalphaAtoms();
346                        Atom[] alignedAtoms = new Atom[alignmentIndices.size()];
347                        for (int j = 0; j < alignedAtoms.length; j++) {
348                                alignedAtoms[j] = unalignedAtoms[alignmentIndices.get(j)];
349                        }
350                        alignedCAlphaAtoms.add(alignedAtoms);
351                }
352        }
353
354        private List<Integer> getReferenceResidueIndices() {
355                List<Integer> indices = new ArrayList<Integer>(uniqueSequenceList.get(0).getAlignment1());
356                for (UniqueSequenceList u: uniqueSequenceList) {
357                   indices.retainAll(u.getAlignment1());
358                }
359                return indices;
360        }
361}