001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 01-21-2010 021 * 022 * @author Richard Holland 023 * 024 * 025 */ 026package org.biojava.nbio.core.sequence.storage; 027 028import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 029import org.biojava.nbio.core.sequence.AccessionID; 030import org.biojava.nbio.core.sequence.Strand; 031import org.biojava.nbio.core.sequence.template.*; 032import org.biojava.nbio.core.util.Equals; 033import org.biojava.nbio.core.util.Hashcoder; 034 035import java.util.ArrayList; 036import java.util.Iterator; 037import java.util.List; 038 039/** 040 * Stores a Sequence as a collection of compounds in an ArrayList 041 * 042 * @param <C> 043 */ 044public class ArrayListSequenceReader<C extends Compound> implements SequenceReader<C> { 045 046 private CompoundSet<C> compoundSet; 047 private ArrayList<C> parsedCompounds = new ArrayList<C>(); 048 049 private volatile Integer hashcode = null; 050 051 /** 052 * 053 */ 054 public ArrayListSequenceReader() { 055 //Do nothing 056 } 057 058 /** 059 * 060 * @param compounds 061 * @param compoundSet 062 */ 063 public ArrayListSequenceReader(List<C> compounds, CompoundSet<C> compoundSet) { 064 setCompoundSet(compoundSet); 065 setContents(compounds); 066 } 067 068 /** 069 * 070 * @param sequence 071 * @param compoundSet 072 * @throws CompoundNotFoundException 073 */ 074 public ArrayListSequenceReader(String sequence, CompoundSet<C> compoundSet) throws CompoundNotFoundException { 075 setCompoundSet(compoundSet); 076 setContents(sequence); 077 } 078 079 /** 080 * 081 * @return 082 */ 083 @Override 084 public String getSequenceAsString() { 085 return getSequenceAsString(1, getLength(), Strand.POSITIVE); 086 } 087 088 /** 089 * 090 * @param begin 091 * @param end 092 * @param strand 093 * @return 094 */ 095 public String getSequenceAsString(Integer begin, Integer end, Strand strand) { 096 // TODO Optimise/cache. 097 SequenceAsStringHelper<C> sequenceAsStringHelper = new SequenceAsStringHelper<C>(); 098 return sequenceAsStringHelper.getSequenceAsString(this.parsedCompounds, compoundSet, begin, end, strand); 099 } 100 101 /** 102 * 103 * @return 104 */ 105 @Override 106 public List<C> getAsList() { 107 return this.parsedCompounds; 108 } 109 110 /** 111 * 112 * @param position 113 * @return 114 */ 115 @Override 116 public C getCompoundAt(int position) { 117 return this.parsedCompounds.get(position - 1); 118 } 119 120 /** 121 * 122 * @param compound 123 * @return 124 */ 125 @Override 126 public int getIndexOf(C compound) { 127 return this.parsedCompounds.indexOf(compound) + 1; 128 } 129 130 /** 131 * 132 * @param compound 133 * @return 134 */ 135 @Override 136 public int getLastIndexOf(C compound) { 137 return this.parsedCompounds.lastIndexOf(compound) + 1; 138 } 139 140 /** 141 * 142 * @return 143 */ 144 @Override 145 public int getLength() { 146 return this.parsedCompounds.size(); 147 } 148 149 /** 150 * 151 * @return 152 */ 153 @Override 154 public Iterator<C> iterator() { 155 return this.parsedCompounds.iterator(); 156 } 157 158 /** 159 * 160 * @param compoundSet 161 */ 162 @Override 163 public void setCompoundSet(CompoundSet<C> compoundSet) { 164 this.compoundSet = compoundSet; 165 } 166 167 /** 168 * 169 * @return 170 */ 171 @Override 172 public CompoundSet<C> getCompoundSet() { 173 return compoundSet; 174 } 175 176 /** 177 * 178 * @param sequence 179 */ 180 @Override 181 public void setContents(String sequence) throws CompoundNotFoundException { 182 // Horrendously inefficient - pretty much the way the old BJ did things. 183 // TODO Should be optimised. 184 this.parsedCompounds.clear(); 185 hashcode = null; 186 int maxCompoundLength = compoundSet.getMaxSingleCompoundStringLength(); 187 boolean maxCompundLengthEqual1 = true; 188 if (maxCompoundLength > 1) { 189 maxCompundLengthEqual1 = false; 190 } 191 int length = sequence.length(); 192 parsedCompounds.ensureCapacity(length); //get the array size correct 193 for (int i = 0; i < length;) { 194 String compoundStr = null; 195 C compound = null; 196 if (maxCompundLengthEqual1) { // trying to save some steps where typically the answer is 1 so avoid complicated for loop 197 compoundStr = sequence.substring(i, i + 1); 198 compound = compoundSet.getCompoundForString(compoundStr); 199 } else { 200 for (int compoundStrLength = 1; compound == null && compoundStrLength <= maxCompoundLength; compoundStrLength++) { 201 compoundStr = sequence.substring(i, i + compoundStrLength); 202 compound = compoundSet.getCompoundForString(compoundStr); 203 } 204 } 205 if (compound == null) { 206 throw new CompoundNotFoundException("Cannot find compound for: " + compoundStr); 207 } else { 208 i += compoundStr.length(); 209 } 210 this.parsedCompounds.add(compound); 211 } 212 parsedCompounds.trimToSize(); // just in case it increases capacity free up extra memory 213 } 214 215 /** 216 * 217 * @param list 218 */ 219 public void setContents(List<C> list) { 220 parsedCompounds.clear(); 221 for (C c : list) { 222 parsedCompounds.add(c); 223 } 224 } 225 226 /** 227 * 228 * @param bioBegin 229 * @param bioEnd 230 * @return 231 */ 232 @Override 233 public SequenceView<C> getSubSequence(final Integer bioBegin, final Integer bioEnd) { 234 return new SequenceProxyView<C>(ArrayListSequenceReader.this, bioBegin, bioEnd); 235 } 236 237 /** 238 * 239 * @return 240 */ 241 @Override 242 public AccessionID getAccession() { 243 throw new UnsupportedOperationException("Not supported yet."); 244 } 245 246 /** 247 * 248 * @param compounds 249 * @return 250 */ 251 @Override 252 public int countCompounds(C... compounds) { 253 return SequenceMixin.countCompounds(this, compounds); 254 } 255 256 /** 257 * 258 * @return 259 */ 260 @Override 261 public SequenceView<C> getInverse() { 262 return SequenceMixin.inverse(this); 263 } 264 265 @Override 266 public int hashCode() { 267 if(hashcode == null) { 268 int s = Hashcoder.SEED; 269 s = Hashcoder.hash(s, parsedCompounds); 270 s = Hashcoder.hash(s, compoundSet); 271 hashcode = s; 272 } 273 return hashcode; 274 } 275 276 @Override 277 @SuppressWarnings("unchecked") 278 public boolean equals(Object o) { 279 if(Equals.classEqual(this, o)) { 280 ArrayListSequenceReader<C> that = (ArrayListSequenceReader<C>)o; 281 return Equals.equal(parsedCompounds, that.parsedCompounds) && 282 Equals.equal(compoundSet, that.compoundSet); 283 } 284 return false; 285 } 286}