001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.sequence.io;
022
023import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
024import org.biojava.nbio.core.sequence.ProteinSequence;
025import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
026import org.biojava.nbio.core.sequence.template.AbstractSequence;
027import org.biojava.nbio.core.sequence.template.CompoundSet;
028import org.biojava.nbio.core.sequence.template.ProxySequenceReader;
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031
032import java.util.ArrayList;
033import java.util.Collection;
034import java.util.List;
035import java.util.Locale;
036
037/**
038 * A sequence creator which preserves the case of its input string in
039 * the user collection of the returned ProteinSequence.
040 *
041 * <p>The user collection will be the same length as the resulting ProteinSequence.
042 * Each object can be cast to a Boolean. If true, the corresponding position in
043 * the input file was uppercase.
044 * <p>
045 * Example
046 * <code>
047 *    CasePreservingProteinSequenceCreator creator =
048 *    new CasePreservingProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet());
049 *    AbstractSequence&lt;AminoAcidCompound&gt; seq = creator.getSequence("aaAA",0);
050 *    System.out.println(seq.getSequenceAsString()); //"AAAA"
051 *    System.out.println(seq.getUserCollection()); //"[false, false, true, true]"
052 * </code>
053 */
054public class CasePreservingProteinSequenceCreator extends ProteinSequenceCreator {
055
056
057        public CasePreservingProteinSequenceCreator(
058                        CompoundSet<AminoAcidCompound> compoundSet) {
059                super(compoundSet);
060        }
061
062        /**
063         *
064         * @see org.biojava.nbio.core.sequence.io.ProteinSequenceCreator#getSequence(org.biojava.nbio.core.sequence.template.ProxySequenceReader, long)
065         */
066        @Override
067        public AbstractSequence<AminoAcidCompound> getSequence(
068                        ProxySequenceReader<AminoAcidCompound> proxyLoader, long index) {
069                AbstractSequence<AminoAcidCompound> seq = super.getSequence(proxyLoader, index);
070                seq.setUserCollection(getStringCase(proxyLoader.getSequenceAsString()));
071                return seq;
072        }
073
074        /* (non-Javadoc)
075         * @see org.biojava.nbio.core.sequence.io.ProteinSequenceCreator#getSequence(java.lang.String, long)
076         */
077        @Override
078        public AbstractSequence<AminoAcidCompound> getSequence(String sequence,
079                        long index) throws CompoundNotFoundException {
080                AbstractSequence<AminoAcidCompound> seq = super.getSequence(sequence.toUpperCase(Locale.ENGLISH), index);
081                seq.setUserCollection(getStringCase(sequence));
082                return seq;
083        }
084
085
086        /**
087         * Assumes all compounds were uppercase
088         * @see org.biojava.nbio.core.sequence.io.ProteinSequenceCreator#getSequence(java.util.List)
089         */
090        @Override
091        public AbstractSequence<AminoAcidCompound> getSequence(
092                        List<AminoAcidCompound> list) {
093                AbstractSequence<AminoAcidCompound> seq =super.getSequence(list);
094                Collection<Object> strCase = new ArrayList<>(seq.getLength());
095                for(int i=0;i<seq.getLength();i++) {
096                        strCase.add(true);
097                }
098                seq.setUserCollection(strCase);
099                return seq;
100        }
101
102        /**
103         * Returns a list of Booleans of the same length as the input, specifying
104         * whether each character was uppercase or not.
105         * @param str A string. Should not contain unicode supplemental characters.
106         * @return a list of Booleans of the same length as the input, specifying
107         * whether each character was uppercase or not.
108         * This list contains only Booleans.
109         */
110        private static List<Object> getStringCase(String str) {
111                List<Object> types = new ArrayList<>(str.length());
112                for(int i=0;i<str.length();i++) {
113                        types.add(Character.isUpperCase(str.charAt(i)));
114                }
115                return types;
116        }
117
118
119        /**
120         * Takes a {@link ProteinSequence} which was created by a
121         * {@link CasePreservingProteinSequenceCreator}. Uses the case info
122         * stored in the user collection to modify the output array.
123         *
124         * <p>Sets elements of the output array which correspond to lowercase letters
125         * to null.
126         *
127         * @param seq Input sequence with case stored as the user collection
128         * @param out
129         */
130        public static void setLowercaseToNull( ProteinSequence seq,
131                        Object[] out) {
132                // should have been set by seq creator
133                Collection<Object> userCollection = seq.getUserCollection();
134                if(userCollection == null)
135                        throw new IllegalArgumentException("Sequence doesn't contain valid case info");
136                if(userCollection.size() != out.length)
137                        throw new IllegalArgumentException("Sequence length doesn't math output array length");
138
139                int pos = 0;
140                for(Object isAligned : userCollection) {
141                        assert(isAligned instanceof Boolean);
142                        if(!(Boolean)isAligned) {
143                                out[pos] = null;
144                        }
145                        pos++;
146                }
147        }
148}