001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on DATE
021 *
022 */
023
024package org.biojava.nbio.core.sequence;
025
026import org.biojava.nbio.core.sequence.io.util.IOUtils;
027import org.biojava.nbio.core.sequence.template.Compound;
028import org.biojava.nbio.core.sequence.template.CompoundSet;
029import org.biojava.nbio.core.sequence.template.LightweightProfile;
030import org.biojava.nbio.core.sequence.template.Sequence;
031
032import java.util.ArrayList;
033import java.util.Collections;
034import java.util.List;
035
036/**
037 * Implements a minimal data structure for reading and writing a sequence alignment.  The full {@code Profile} data
038 * structure in the alignment module provides additional functionality.
039 *
040 * @author Scooter Willis
041 * @author Mark Chapman
042 */
043public class MultipleSequenceAlignment<S extends Sequence<C>, C extends Compound> implements LightweightProfile<S, C> {
044
045        private List<S> sequences = new ArrayList<>();
046        private Integer length = null;
047
048        /**
049         * A sequence that has been aligned to other sequences will have inserts.
050         * @param sequence
051         */
052        public void addAlignedSequence(S sequence){
053                if(length == null){
054                        length = sequence.getLength();
055                }
056                if(sequence.getLength() != length){
057                        throw new IllegalArgumentException(sequence.getAccession() + " length = " + sequence.getLength() +
058                                        " not equal to MSA length = " + length);
059                }
060                sequences.add(sequence);
061        }
062
063        /**
064         * Remove a sequence
065         * @param sequence
066         * @return flag
067         */
068        public boolean removeAlignedSequence(S sequence){
069                return sequences.remove(sequence);
070        }
071//methods for LightweightProfile
072
073        /**
074         * Uses bioIndex starting at 1 instead of 0
075         * @param listIndex
076         * @return sequence
077         */
078
079
080        @Override
081        public S getAlignedSequence(int listIndex) {
082                return sequences.get(listIndex - 1);
083        }
084
085        /**
086         * Get the list of sequences
087         * @return list of sequences
088         */
089        @Override
090        public List<S> getAlignedSequences() {
091                return Collections.unmodifiableList(sequences);
092        }
093
094        /**
095         * Get a list of compounds at a sequence position
096         * @param alignmentIndex
097         * @return compounds
098         */
099        @Override
100        public List<C> getCompoundsAt(int alignmentIndex) {
101                List<C> column = new ArrayList<>();
102                for (S s : sequences) {
103                        column.add(s.getCompoundAt(alignmentIndex));
104                }
105                return Collections.unmodifiableList(column);
106        }
107
108        /**
109         * Get the Compounds defined in the first sequence
110         * @return get compound set
111         */
112        @Override
113        public CompoundSet<C> getCompoundSet() {
114                return sequences.get(0).getCompoundSet();
115        }
116
117        /**
118         * Get the length of the MSA where it is assumed that
119         * all sequence position
120         * @return length of MSA
121         */
122        @Override
123        public int getLength() {
124                return length;
125        }
126
127        /**
128         * Get the number of sequences in the MSA
129         * @return nr of sequences
130         */
131        @Override
132        public int getSize() {
133                return sequences.size();
134        }
135
136        /**
137         * Get a string representation of the MSA with a fixed width
138         * @param width
139         * @return String
140         */
141        @Override
142        public String toString(int width) {
143                return toString(width, null, IOUtils.getIDFormat(sequences), true, true, true, false);
144        }
145
146        /**
147         * Support for different MSA formats
148         * @param format
149         * @return String in one of the supported file formats.
150         */
151        @Override
152        public String toString(StringFormat format) {
153                switch (format) {
154                case ALN:
155                case CLUSTALW:
156                default:
157                        return toString(60, String.format("CLUSTAL W MSA from BioJava%n%n"), IOUtils.getIDFormat(sequences) +
158                                        "   ", true, false, true, false);
159                case FASTA:
160                        return toString(60, null, ">%s%n", false, false, false, false);
161                case GCG:
162                case MSF:
163                        return toString(50, IOUtils.getGCGHeader(sequences), IOUtils.getIDFormat(sequences), true, false, false,
164                                        false);
165                case PDBWEB:
166                        return toString(60, null, "%s", true, false, true, true);
167                }
168        }
169
170        /**
171         * String representation of the MSA
172         * @return String
173         */
174
175        @Override
176        public String toString() {
177                return toString(getLength(), null, null, false, false, false, false);
178        }
179
180        // helper methods
181
182        /**
183         * Helper method that does all the formatting work
184         * @param width
185         * @param header
186         * @param idFormat
187         * @param interlaced
188         * @param aligIndices
189         * @param aligConservation
190         * @param webDisplay
191         * @return String
192         */
193        // creates formatted String
194        private String toString(int width, String header, String idFormat, boolean interlaced, boolean aligIndices,
195                        boolean aligConservation, boolean webDisplay) {
196
197                // TODO handle circular alignments
198                StringBuilder s = (header == null) ? new StringBuilder() : new StringBuilder(header);
199
200                if (webDisplay && sequences.size() == 2) {
201                        s.append("<div><pre>");
202                }
203
204                width = Math.max(1, width);
205                if (interlaced) {
206                        String aligIndFormat = "%-" + Math.max(1, width / 2) + "d %" + Math.max(1, width - (width / 2) - 1) +
207                                        "d%n";
208                        for (int i = 0; i < getLength(); i += width) {
209                                int start = i + 1, end = Math.min(getLength(), i + width);
210                                if (i > 0) {
211                                        s.append(String.format("%n"));
212                                }
213                                if (aligIndices) {
214                                        if (end < i + width) {
215                                                int line = end - start + 1;
216                                                aligIndFormat = "%-" + Math.max(1, line / 2) + "d %" + Math.max(1, line - (line / 2) - 1) +
217                                                                "d%n";
218                                        }
219                                        if (idFormat != null) {
220                                                s.append(String.format(idFormat, ""));
221                                        }
222                                        s.append(String.format(aligIndFormat, start, end));
223                                }
224                                int counter = 0;
225                                for (S as : sequences) {
226                                        counter++;
227                                        if (webDisplay && sequences.size() == 2) {
228                                                printSequenceAlignmentWeb(s, counter, idFormat, start, end);
229                                        } else {
230                                                if (idFormat != null) {
231                                                        s.append(String.format(idFormat, as.getAccession()));
232                                                }
233                                                s.append(as.getSubSequence(start, end).getSequenceAsString());
234                                                s.append(String.format("%n"));
235                                        }
236                                        if (aligConservation && sequences.size() == 2 && counter == 1) {
237                                                printConservation(s, idFormat, start, end, webDisplay);
238                                        }
239                                }
240                        }
241                } else {
242                        for (S as : sequences) {
243                                if (idFormat != null) {
244                                        s.append(String.format(idFormat, as.getAccession()));
245                                }
246                                for (int i = 0; i < getLength(); i += width) {
247                                        int start = i + 1, end = Math.min(getLength(), i + width);
248                                        s.append(as.getSubSequence(start, end).getSequenceAsString());
249                                        s.append(String.format("%n"));
250                                }
251                        }
252                }
253
254                if (webDisplay && aligConservation && sequences.size() == 2) {
255                        s.append(IOUtils.getPDBLegend());
256                }
257                return s.toString();
258        }
259
260        /**
261         *
262         * @param s
263         * @param counter
264         * @param idFormat
265         * @param start
266         * @param end
267         */
268        private void printSequenceAlignmentWeb(StringBuilder s, int counter, String idFormat, int start, int end) {
269                S as = sequences.get(counter - 1), seq1 = sequences.get(0), seq2 = sequences.get(1);
270
271                if (idFormat != null) {
272                        s.append(String.format(idFormat, as.getAccession()));
273                }
274
275                String mySeq = as.getSubSequence(start, end).getSequenceAsString();
276                String s1 = seq1.getSubSequence(start, end).getSequenceAsString();
277                String s2 = seq2.getSubSequence(start, end).getSequenceAsString();
278                CompoundSet<C> cs = getCompoundSet();
279
280                for (int i = 0; i < s1.length(); i++) {
281                        if (i >= s2.length() || i >= mySeq.length())
282                                break;
283                        char c1 = s1.charAt(i);
284                        char c2 = s2.charAt(i);
285                        char c = mySeq.charAt(i);
286                        s.append(IOUtils.getPDBCharacter(true, c1, c2, cs.compoundsEquivalent(seq1.getCompoundAt(i),
287                                        seq2.getCompoundAt(i)), c));
288                }
289
290                s.append(String.format("%n"));
291        }
292
293        /**
294         *
295         * @param s
296         * @param idFormat
297         * @param start
298         * @param end
299         * @param webDisplay
300         */
301        private void printConservation(StringBuilder s, String idFormat, int start, int end, boolean webDisplay) {
302                S seq1 = sequences.get(0), seq2 = sequences.get(1);
303
304                if (idFormat != null) {
305                        AccessionID ac1 = sequences.get(0).getAccession();
306                        String id1 = (ac1 == null) ? "null" : ac1.getID();
307                        id1 = id1.replaceAll("\\.", " ");
308                        s.append(String.format(idFormat, id1));
309                }
310
311                String s1 = seq1.getSubSequence(start, end).getSequenceAsString();
312                String s2 = seq2.getSubSequence(start, end).getSequenceAsString();
313                CompoundSet<C> cs = getCompoundSet();
314
315                for (int i = 0; i < s1.length(); i++) {
316                        if (i >= s2.length())
317                                break;
318                        char c1 = s1.charAt(i);
319                        char c2 = s2.charAt(i);
320                        s.append(IOUtils.getPDBConservation(webDisplay, c1, c2, cs.compoundsEquivalent(seq1.getCompoundAt(i),
321                                        seq2.getCompoundAt(i))));
322                }
323
324                s.append(String.format("%n"));
325        }
326
327}