001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.sequence.io;
022
023import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
024import org.biojava.nbio.core.sequence.ProteinSequence;
025import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
026import org.biojava.nbio.core.sequence.template.AbstractSequence;
027import org.biojava.nbio.core.sequence.template.CompoundSet;
028import org.biojava.nbio.core.sequence.template.ProxySequenceReader;
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031
032import java.util.ArrayList;
033import java.util.Collection;
034import java.util.List;
035import java.util.Locale;
036
037/**
038 * A sequence creator which preserves the case of its input string in
039 * the user collection of the returned ProteinSequence.
040 *
041 * <p>The user collection will be the same length as the resulting ProteinSequence.
042 * Each object can be cast to a Boolean. If true, the corresponding position in
043 * the input file was uppercase.
044 *
045 * <h3>Example</h3>
046 * <code><pre>CasePreservingProteinSequenceCreator creator =
047 *    new CasePreservingProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet());
048 *AbstractSequence<AminoAcidCompound> seq = creator.getSequence("aaAA",0);
049 *System.out.println(seq.getSequenceAsString()); //"AAAA"
050 *System.out.println(seq.getUserCollection()); //"[false, false, true, true]"
051 *</code></pre>
052 */
053public class CasePreservingProteinSequenceCreator extends ProteinSequenceCreator {
054
055
056        public CasePreservingProteinSequenceCreator(
057                        CompoundSet<AminoAcidCompound> compoundSet) {
058                super(compoundSet);
059        }
060
061        /**
062         *
063         * @see org.biojava.nbio.core.sequence.io.ProteinSequenceCreator#getSequence(org.biojava.nbio.core.sequence.template.ProxySequenceReader, long)
064         */
065        @Override
066        public AbstractSequence<AminoAcidCompound> getSequence(
067                        ProxySequenceReader<AminoAcidCompound> proxyLoader, long index) {
068                AbstractSequence<AminoAcidCompound> seq = super.getSequence(proxyLoader, index);
069                seq.setUserCollection(getStringCase(proxyLoader.getSequenceAsString()));
070                return seq;
071        }
072
073        /* (non-Javadoc)
074         * @see org.biojava.nbio.core.sequence.io.ProteinSequenceCreator#getSequence(java.lang.String, long)
075         */
076        @Override
077        public AbstractSequence<AminoAcidCompound> getSequence(String sequence,
078                        long index) throws CompoundNotFoundException {
079                AbstractSequence<AminoAcidCompound> seq = super.getSequence(sequence.toUpperCase(Locale.ENGLISH), index);
080                seq.setUserCollection(getStringCase(sequence));
081                return seq;
082        }
083
084
085        /**
086         * Assumes all compounds were uppercase
087         * @see org.biojava.nbio.core.sequence.io.ProteinSequenceCreator#getSequence(java.util.List)
088         */
089        @Override
090        public AbstractSequence<AminoAcidCompound> getSequence(
091                        List<AminoAcidCompound> list) {
092                AbstractSequence<AminoAcidCompound> seq =super.getSequence(list);
093                Collection<Object> strCase = new ArrayList<Object>(seq.getLength());
094                for(int i=0;i<seq.getLength();i++) {
095                        strCase.add(true);
096                }
097                seq.setUserCollection(strCase);
098                return seq;
099        }
100
101        /**
102         * Returns a list of Booleans of the same length as the input, specifying
103         * whether each character was uppercase or not.
104         * @param str A string. Should not contain unicode supplemental characters.
105         * @return a list of Booleans of the same length as the input, specifying
106         * whether each character was uppercase or not.
107         * This list contains only Booleans.
108         */
109        private static List<Object> getStringCase(String str) {
110                List<Object> types = new ArrayList<Object>(str.length());
111                for(int i=0;i<str.length();i++) {
112                        types.add(Character.isUpperCase(str.charAt(i)));
113                }
114                return types;
115        }
116
117
118        /**
119         * Takes a {@link ProteinSequence} which was created by a
120         * {@link CasePreservingProteinSequenceCreator}. Uses the case info
121         * stored in the user collection to modify the output array.
122         *
123         * <p>Sets elements of the output array which correspond to lowercase letters
124         * to null.
125         *
126         * @param seq Input sequence with case stored as the user collection
127         * @param out
128         */
129        public static void setLowercaseToNull( ProteinSequence seq,
130                        Object[] out) {
131                // should have been set by seq creator
132                Collection<Object> userCollection = seq.getUserCollection();
133                if(userCollection == null)
134                        throw new IllegalArgumentException("Sequence doesn't contain valid case info");
135                if(userCollection.size() != out.length)
136                        throw new IllegalArgumentException("Sequence length doesn't math output array length");
137
138                int pos = 0;
139                for(Object isAligned : userCollection) {
140                        assert(isAligned instanceof Boolean);
141                        if(!(Boolean)isAligned) {
142                                out[pos] = null;
143                        }
144                        pos++;
145                }
146        }
147}