001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.search.io;
022
023import java.util.ArrayList;
024import java.util.List;
025import org.biojava.nbio.core.alignment.SimpleAlignedSequence;
026import org.biojava.nbio.core.alignment.SimpleSequencePair;
027import org.biojava.nbio.core.alignment.template.AlignedSequence.Step;
028import org.biojava.nbio.core.alignment.template.SequencePair;
029import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
030import org.biojava.nbio.core.sequence.DNASequence;
031import org.biojava.nbio.core.sequence.ProteinSequence;
032import org.biojava.nbio.core.sequence.RNASequence;
033import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
034import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
035import org.biojava.nbio.core.sequence.template.Compound;
036import org.biojava.nbio.core.sequence.template.Sequence;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040/**
041 * This class models a search Hsp.
042 * You will retrieve a list of this using iterator of a Hit
043 * <p>
044 * Designed by Paolo Pavan.
045 * You may want to find my contacts on Github and LinkedIn for code info
046 * or discuss major changes.
047 * https://github.com/paolopavan
048 *
049 * @author Paolo Pavan
050 * @param <C> the compound type
051 * @param <S> the sequence type
052 */
053public abstract class Hsp <S extends Sequence<C>, C extends Compound> {
054        private static final Logger logger = LoggerFactory.getLogger(Hsp.class);
055        private Integer hspNum;
056        private Double hspBitScore;
057        private Integer hspScore;
058        private Double hspEvalue;
059        private Integer hspQueryFrom;
060        private Integer hspQueryTo;
061        private Integer hspHitFrom;
062        private Integer hspHitTo;
063        private Integer hspQueryFrame;
064        private Integer hspHitFrame;
065        private Integer hspIdentity;
066        private Integer hspPositive;
067        private Integer hspGaps;
068        private Integer hspAlignLen;
069        private String hspQseq;
070        private String hspHseq;
071        private String hspIdentityString;
072        private Double percentageIdentity = null;
073        private Integer mismatchCount = null;
074        private SimpleSequencePair<S, C> returnAln;
075
076        @Override
077        public int hashCode() {
078                int hash = 5;
079                hash = 67 * hash + (this.hspQseq != null ? this.hspQseq.hashCode() : 0);
080                hash = 67 * hash + (this.hspHseq != null ? this.hspHseq.hashCode() : 0);
081                hash = 67 * hash + (this.hspIdentityString != null ? this.hspIdentityString.hashCode() : 0);
082                return hash;
083        }
084        /**
085         * Experimental.
086         * Wants to implement conceptual comparisons of search results.
087         * Fields unrelated to search are deliberately not considered.
088         *
089         * In HSP case, alignment representation strings are considered.
090         * @return true if HSP alignments are the same,
091         * false otherwise or if alignment strings are undetermined
092         */
093        @Override
094        public boolean equals(Object obj) {
095                if (obj == null) {
096                        return false;
097                }
098                if (getClass() != obj.getClass()) {
099                        return false;
100                }
101                final Hsp<?, ?> other = (Hsp<?, ?>) obj;
102                if ((this.hspQseq == null) ? (other.hspQseq != null) : !this.hspQseq.equals(other.hspQseq)) {
103                        return false;
104                }
105                if ((this.hspHseq == null) ? (other.hspHseq != null) : !this.hspHseq.equals(other.hspHseq)) {
106                        return false;
107                }
108                if ((this.hspIdentityString == null) ? (other.hspIdentityString != null) : !this.hspIdentityString.equals(other.hspIdentityString)) {
109                        return false;
110                }
111                return true;
112        }
113
114        public SequencePair<S,C> getAlignment(){
115                if (returnAln != null) return returnAln;
116
117                SimpleAlignedSequence<S,C> alignedQuery, alignedHit;
118                // queryFrom e hitTo?
119                int numBefore, numAfter;
120
121                alignedQuery = new SimpleAlignedSequence(getSequence(hspQseq), getAlignmentsSteps(hspQseq));
122                alignedHit = new SimpleAlignedSequence(getSequence(hspHseq), getAlignmentsSteps(hspHseq));
123
124                returnAln = new SimpleSequencePair<>(alignedQuery, alignedHit);
125
126                return returnAln;
127        }
128
129        private Sequence getSequence(String gappedSequenceString){
130                if (gappedSequenceString == null) return null;
131
132                Sequence returnSeq = null;
133                String sequenceString = gappedSequenceString.replace("-", "");
134
135                try {
136                        if (sequenceString.matches("^[ACTG]+$"))
137                                returnSeq = new DNASequence(sequenceString, DNACompoundSet.getDNACompoundSet());
138                        else if (sequenceString.matches("^[ACUG]+$"))
139                                returnSeq = new RNASequence(sequenceString, DNACompoundSet.getDNACompoundSet());
140                        else
141                                returnSeq = new ProteinSequence(sequenceString, AminoAcidCompoundSet.getAminoAcidCompoundSet());
142                } catch (CompoundNotFoundException ex) {
143                        logger.error("Unexpected error, could not find compound when creating Sequence object from Hsp", ex);
144                }
145                return returnSeq;
146        }
147
148        private List<Step> getAlignmentsSteps(String gappedSequenceString){
149                List<Step> returnList = new ArrayList<>();
150
151                for (char c: gappedSequenceString.toCharArray()){
152                        if (c=='-') returnList.add(Step.GAP); else returnList.add(Step.COMPOUND);
153                }
154                return returnList;
155        }
156
157        public int getHspNum() {
158                return hspNum;
159        }
160
161        public double getHspBitScore() {
162                return hspBitScore;
163        }
164
165        public int getHspScore() {
166                return hspScore;
167        }
168
169        public double getHspEvalue() {
170                return hspEvalue;
171        }
172
173        public int getHspQueryFrom() {
174                return hspQueryFrom;
175        }
176
177        public int getHspQueryTo() {
178                return hspQueryTo;
179        }
180
181        public int getHspHitFrom() {
182                return hspHitFrom;
183        }
184
185        public int getHspHitTo() {
186                return hspHitTo;
187        }
188
189        public int getHspQueryFrame() {
190                return hspQueryFrame;
191        }
192
193        public int getHspHitFrame() {
194                return hspHitFrame;
195        }
196
197        public int getHspIdentity() {
198                return hspIdentity;
199        }
200
201        public int getHspPositive() {
202                return hspPositive;
203        }
204
205        public int getHspGaps() {
206                return hspGaps;
207        }
208
209        public int getHspAlignLen() {
210                return hspAlignLen;
211        }
212        /**
213         * HSP aligned query sequence string
214         * @return
215         */
216        public String getHspQseq() {
217                return hspQseq;
218        }
219        /**
220         * HSP aligned hit sequence string
221         * @return
222         */
223        public String getHspHseq() {
224                return hspHseq;
225        }
226        /**
227         * Identity string representing correspondence between aligned residues
228         * @return
229         */
230        public String getHspIdentityString() {
231                return hspIdentityString;
232        }
233
234        public Double getPercentageIdentity() {
235                if (percentageIdentity != null) return percentageIdentity;
236                if (hspIdentity!= null && hspAlignLen != null) return (double)hspIdentity/hspAlignLen;
237                return null;
238        }
239
240        public Integer getMismatchCount() {
241                if (mismatchCount != null) return mismatchCount;
242                if (hspIdentity!= null && hspAlignLen != null) return hspIdentity-hspAlignLen;
243                return null;
244        }
245
246        public Hsp(int hspNum, double hspBitScore, int hspScore, double hspEvalue, int hspQueryFrom, int hspQueryTo, int hspHitFrom, int hspHitTo, int hspQueryFrame, int hspHitFrame, int hspIdentity, int hspPositive, int hspGaps, int hspAlignLen, String hspQseq, String hspHseq, String hspIdentityString, Double percentageIdentity, Integer mismatchCount) {
247                this.hspNum = hspNum;
248                this.hspBitScore = hspBitScore;
249                this.hspScore = hspScore;
250                this.hspEvalue = hspEvalue;
251                this.hspQueryFrom = hspQueryFrom;
252                this.hspQueryTo = hspQueryTo;
253                this.hspHitFrom = hspHitFrom;
254                this.hspHitTo = hspHitTo;
255                this.hspQueryFrame = hspQueryFrame;
256                this.hspHitFrame = hspHitFrame;
257                this.hspIdentity = hspIdentity;
258                this.hspPositive = hspPositive;
259                this.hspGaps = hspGaps;
260                this.hspIdentity = hspAlignLen;
261                this.hspQseq = hspQseq;
262                this.hspHseq = hspHseq;
263                this.hspIdentityString = hspIdentityString;
264                this.percentageIdentity = percentageIdentity;
265                this.mismatchCount = mismatchCount;
266
267                // sanity check
268                if (percentageIdentity != null && (percentageIdentity < 0 || percentageIdentity >1))
269                        throw new IllegalArgumentException("Percentage identity must be between 0 and 1");
270
271        }
272
273}