001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 01-21-2010 021 * 022 * @author Richard Holland 023 * 024 * 025 */ 026package org.biojava.nbio.core.sequence.storage; 027 028import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 029import org.biojava.nbio.core.sequence.AccessionID; 030import org.biojava.nbio.core.sequence.Strand; 031import org.biojava.nbio.core.sequence.template.*; 032import org.biojava.nbio.core.util.Equals; 033import org.biojava.nbio.core.util.Hashcoder; 034 035import java.util.ArrayList; 036import java.util.Iterator; 037import java.util.List; 038 039/** 040 * Stores a Sequence as a collection of compounds in an ArrayList 041 * 042 * @param <C> 043 */ 044public class ArrayListSequenceReader<C extends Compound> implements SequenceReader<C> { 045 046 private CompoundSet<C> compoundSet; 047 private ArrayList<C> parsedCompounds = new ArrayList<C>(); 048 049 private volatile Integer hashcode = null; 050 051 /** 052 * 053 */ 054 public ArrayListSequenceReader() { 055 //Do nothing 056 } 057 058 /** 059 * 060 * @param compounds 061 * @param compoundSet 062 */ 063 public ArrayListSequenceReader(List<C> compounds, CompoundSet<C> compoundSet) { 064 setCompoundSet(compoundSet); 065 setContents(compounds); 066 } 067 068 /** 069 * 070 * @param sequence 071 * @param compoundSet 072 * @throws CompoundNotFoundException 073 */ 074 public ArrayListSequenceReader(String sequence, CompoundSet<C> compoundSet) throws CompoundNotFoundException { 075 setCompoundSet(compoundSet); 076 setContents(sequence); 077 } 078 079 /** 080 * 081 * @return 082 */ 083 @Override 084 public String getSequenceAsString() { 085 return getSequenceAsString(1, getLength(), Strand.POSITIVE); 086 } 087 088 089 /** 090 * 091 * @param begin 092 * @param end 093 * @param strand 094 * @return 095 */ 096 public String getSequenceAsString(Integer begin, Integer end, Strand strand) { 097 // TODO Optimise/cache. 098 SequenceAsStringHelper<C> sequenceAsStringHelper = new SequenceAsStringHelper<C>(); 099 return sequenceAsStringHelper.getSequenceAsString(this.parsedCompounds, compoundSet, begin, end, strand); 100 } 101 102 /** 103 * 104 * @return 105 */ 106 @Override 107 public List<C> getAsList() { 108 return this.parsedCompounds; 109 } 110 111 /** 112 * 113 * @param position 114 * @return 115 */ 116 @Override 117 public C getCompoundAt(int position) { 118 return this.parsedCompounds.get(position - 1); 119 } 120 121 /** 122 * 123 * @param compound 124 * @return 125 */ 126 @Override 127 public int getIndexOf(C compound) { 128 return this.parsedCompounds.indexOf(compound) + 1; 129 } 130 131 /** 132 * 133 * @param compound 134 * @return 135 */ 136 @Override 137 public int getLastIndexOf(C compound) { 138 return this.parsedCompounds.lastIndexOf(compound) + 1; 139 } 140 141 /** 142 * 143 * @return 144 */ 145 @Override 146 public int getLength() { 147 return this.parsedCompounds.size(); 148 } 149 150 /** 151 * 152 * @return 153 */ 154 @Override 155 public Iterator<C> iterator() { 156 return this.parsedCompounds.iterator(); 157 } 158 159 /** 160 * 161 * @param compoundSet 162 */ 163 @Override 164 public void setCompoundSet(CompoundSet<C> compoundSet) { 165 this.compoundSet = compoundSet; 166 } 167 168 /** 169 * 170 * @return 171 */ 172 @Override 173 public CompoundSet<C> getCompoundSet() { 174 return compoundSet; 175 } 176 177 /** 178 * 179 * @param sequence 180 */ 181 @Override 182 public void setContents(String sequence) throws CompoundNotFoundException { 183 // Horrendously inefficient - pretty much the way the old BJ did things. 184 // TODO Should be optimised. 185 this.parsedCompounds.clear(); 186 hashcode = null; 187 int maxCompoundLength = compoundSet.getMaxSingleCompoundStringLength(); 188 boolean maxCompundLengthEqual1 = true; 189 if (maxCompoundLength > 1) { 190 maxCompundLengthEqual1 = false; 191 } 192 int length = sequence.length(); 193 parsedCompounds.ensureCapacity(length); //get the array size correct 194 for (int i = 0; i < length;) { 195 String compoundStr = null; 196 C compound = null; 197 if (maxCompundLengthEqual1) { // trying to save some steps where typically the answer is 1 so avoid complicated for loop 198 compoundStr = sequence.substring(i, i + 1); 199 compound = compoundSet.getCompoundForString(compoundStr); 200 } else { 201 for (int compoundStrLength = 1; compound == null && compoundStrLength <= maxCompoundLength; compoundStrLength++) { 202 compoundStr = sequence.substring(i, i + compoundStrLength); 203 compound = compoundSet.getCompoundForString(compoundStr); 204 } 205 } 206 if (compound == null) { 207 throw new CompoundNotFoundException("Cannot find compound for: " + compoundStr); 208 } else { 209 i += compoundStr.length(); 210 } 211 this.parsedCompounds.add(compound); 212 } 213 parsedCompounds.trimToSize(); // just in case it increases capacity free up extra memory 214 } 215 216 /** 217 * 218 * @param list 219 */ 220 public void setContents(List<C> list) { 221 parsedCompounds.clear(); 222 for (C c : list) { 223 parsedCompounds.add(c); 224 } 225 } 226 227 /** 228 * 229 * @param bioBegin 230 * @param bioEnd 231 * @return 232 */ 233 @Override 234 public SequenceView<C> getSubSequence(final Integer bioBegin, final Integer bioEnd) { 235 return new SequenceProxyView<C>(ArrayListSequenceReader.this, bioBegin, bioEnd); 236 } 237 238 /** 239 * 240 * @return 241 */ 242 @Override 243 public AccessionID getAccession() { 244 throw new UnsupportedOperationException("Not supported yet."); 245 } 246 247 /** 248 * 249 * @param compounds 250 * @return 251 */ 252 @Override 253 public int countCompounds(C... compounds) { 254 return SequenceMixin.countCompounds(this, compounds); 255 } 256 257 /** 258 * 259 * @return 260 */ 261 @Override 262 public SequenceView<C> getInverse() { 263 return SequenceMixin.inverse(this); 264 } 265 266 @Override 267 public int hashCode() { 268 if(hashcode == null) { 269 int s = Hashcoder.SEED; 270 s = Hashcoder.hash(s, parsedCompounds); 271 s = Hashcoder.hash(s, compoundSet); 272 hashcode = s; 273 } 274 return hashcode; 275 } 276 277 @Override 278 @SuppressWarnings("unchecked") 279 public boolean equals(Object o) { 280 if(Equals.classEqual(this, o)) { 281 ArrayListSequenceReader<C> that = (ArrayListSequenceReader<C>)o; 282 return Equals.equal(parsedCompounds, that.parsedCompounds) && 283 Equals.equal(compoundSet, that.compoundSet); 284 } 285 return false; 286 } 287}