001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 2012-11-20 021 * 022 */ 023 024package org.biojava.nbio.structure; 025 026import java.util.*; 027import java.util.Map.Entry; 028import java.util.regex.Matcher; 029import java.util.regex.Pattern; 030 031/** 032 * A chain, a start residue, and an end residue. 033 * 034 * Chain may be null when referencing a single-chain structure; for multi-chain 035 * structures omitting the chain is an error. Start and/or end may also be null, 036 * which is interpreted as the first and last residues in the chain, respectively. 037 * 038 * @author dmyerstu 039 * @see ResidueNumber 040 * @see org.biojava.nbio.structure.ResidueRangeAndLength 041 */ 042public class ResidueRange { 043 044 private final String chain; 045 private final ResidueNumber end; 046 private final ResidueNumber start; 047 048 public static final Pattern RANGE_REGEX = Pattern.compile( 049 "^\\s*([a-zA-Z0-9]+|_)" + //chain ID. Be flexible here, rather than restricting to 4-char IDs 050 "(?:" + //begin range, this is a "non-capturing group" 051 "(?::|_|:$|_$|$)" + //colon or underscore, could be at the end of a line, another non-capt. group. 052 "(?:"+ // another non capturing group for the residue range 053 "([-+]?[0-9]+[A-Za-z]?)" + // first residue 054 "(?:" + 055 "\\s*-\\s*" + // - 056 "([-+]?[0-9]+[A-Za-z]?)" + // second residue 057 ")?+"+ 058 ")?+"+ 059 ")?" + //end range 060 "\\s*"); 061 062 public static final Pattern CHAIN_REGEX = Pattern.compile("^\\s*([a-zA-Z0-9]+|_)$"); 063 064 /** 065 * Parse the residue range from a string. Several formats are accepted: 066 * <ul> 067 * <li> chain.start-end 068 * <li> chain.residue 069 * <li> chain_start-end (for better filename compatibility) 070 * </ul> 071 * 072 * <p>Residues can be positive or negative and may include insertion codes. 073 * See {@link ResidueNumber#fromString(String)}. 074 * 075 * <p>Examples: 076 * <ul> 077 * <li><code>A.5-100</code> 078 * <li><code>A_5-100</code> 079 * <li><code>A_-5</code> 080 * <li><code>A.-12I-+12I 081 * 082 * @param s residue string to parse 083 * @return The unique ResidueRange corresponding to {@code s} 084 */ 085 public static ResidueRange parse(String s) { 086 Matcher matcher = RANGE_REGEX.matcher(s); 087 if (matcher.matches()) { 088 ResidueNumber start = null, end = null; 089 String chain = null; 090 try { 091 chain = matcher.group(1); 092 if (matcher.group(2) != null) { 093 start = ResidueNumber.fromString(matcher.group(2)); 094 start.setChainId(chain); 095 if(matcher.group(3) == null) { 096 // single-residue range 097 end = start; 098 } else { 099 end = ResidueNumber.fromString(matcher.group(3)); 100 end.setChainId(chain); 101 } 102 } 103 return new ResidueRange(chain, start, end); 104 } catch (IllegalStateException e) { 105 throw new IllegalArgumentException("Range " + s + " was not valid", e); 106 } 107 } else if (CHAIN_REGEX.matcher(s).matches()) { 108 return new ResidueRange(s, (ResidueNumber)null, null); 109 } 110 throw new IllegalArgumentException("Illegal ResidueRange format:" + s); 111 } 112 113 /** 114 * @param s 115 * A string of the form chain_start-end,chain_start-end, ... For example: 116 * <code>A.5-100,R_110-190,Z_200-250</code>. 117 * @return The unique ResidueRange corresponding to {@code s}. 118 * @see #parse(String) 119 */ 120 public static List<ResidueRange> parseMultiple(String s) { 121 s = s.trim(); 122 // trim parentheses, for backwards compatibility 123 if ( s.startsWith("(")) 124 s = s.substring(1); 125 if ( s.endsWith(")")) { 126 s = s.substring(0,s.length()-1); 127 } 128 129 String[] parts = s.split(","); 130 List<ResidueRange> list = new ArrayList<ResidueRange>(parts.length); 131 for (String part : parts) { 132 list.add(parse(part)); 133 } 134 return list; 135 } 136 137 public ResidueRange(String chain, String start, String end) { 138 this.chain = chain; 139 this.start = ResidueNumber.fromString(start); 140 this.start.setChainId(chain); 141 this.end = ResidueNumber.fromString(end); 142 this.end.setChainId(chain); 143 } 144 145 public ResidueRange(String chain, ResidueNumber start, ResidueNumber end) { 146 this.chain = chain; 147 this.start = start; 148 this.end = end; 149 } 150 151 @Override 152 public boolean equals(Object obj) { 153 if (this == obj) return true; 154 if (obj == null) return false; 155 if (getClass() != obj.getClass()) return false; 156 ResidueRange other = (ResidueRange) obj; 157 if (chain == null) { 158 if (other.chain != null) return false; 159 } else if (!chain.equals(other.chain)) return false; 160 if (end == null) { 161 if (other.end != null) return false; 162 } else if (!end.equals(other.end)) return false; 163 if (start == null) { 164 if (other.start != null) return false; 165 } else if (!start.equals(other.start)) return false; 166 return true; 167 } 168 169 public String getChainId() { 170 return chain; 171 } 172 173 public ResidueNumber getEnd() { 174 return end; 175 } 176 177 public ResidueNumber getStart() { 178 return start; 179 } 180 181 @Override 182 public int hashCode() { 183 final int prime = 31; 184 int result = 1; 185 result = prime * result + (chain == null ? 0 : chain.hashCode()); 186 result = prime * result + (end == null ? 0 : end.hashCode()); 187 result = prime * result + (start == null ? 0 : start.hashCode()); 188 return result; 189 } 190 191 @Override 192 public String toString() { 193 if( start == null && end == null) { 194 // Indicates the full chain 195 return chain; 196 } 197 return chain + "_" + start + "-" + end; 198 } 199 200 /** 201 * Returns the ResidueNumber that is at position {@code positionInRange} in 202 * <em>this</em> ResidueRange. 203 * @return The ResidueNumber, or false if it does not exist or is not within this ResidueRange 204 */ 205 public ResidueNumber getResidue(int positionInRange, AtomPositionMap map) { 206 if (map == null) throw new NullPointerException("The AtomPositionMap must be non-null"); 207 int i = 0; 208 for (Map.Entry<ResidueNumber, Integer> entry : map.getNavMap().entrySet()) { 209 if (i == positionInRange) return entry.getKey(); 210 if (contains(entry.getKey(), map)) { 211 i++; 212 } 213 } 214 return null; 215 } 216 217 /** 218 * @return True if and only if {@code residueNumber} is within this ResidueRange 219 */ 220 public boolean contains(ResidueNumber residueNumber, AtomPositionMap map) { 221 222 if (residueNumber == null) 223 throw new NullPointerException("Can't find the ResidueNumber because it is null"); 224 225 if (map == null) 226 throw new NullPointerException("The AtomPositionMap must be non-null"); 227 228 Integer pos = map.getPosition(residueNumber); 229 if (pos == null) throw new IllegalArgumentException("Couldn't find residue " + residueNumber.printFull()); 230 231 ResidueNumber startResidue = getStart()==null? map.getFirst(getChainId()) : getStart(); 232 Integer startPos = map.getPosition(startResidue); 233 if (startPos == null) throw new IllegalArgumentException("Couldn't find the start position"); 234 235 ResidueNumber endResidue = getEnd()==null? map.getLast(getChainId()) : getEnd(); 236 Integer endPos = map.getPosition(endResidue); 237 if (endPos == null) throw new IllegalArgumentException("Couldn't find the end position"); 238 return pos >= startPos && pos <= endPos; 239 } 240 241 /** 242 * Returns a new Iterator over every {@link ResidueNumber} in this ResidueRange. 243 * Stores the contents of {@code map} until the iterator is finished, so calling code should set the iterator to {@code null} if it did not finish. 244 */ 245 public Iterator<ResidueNumber> iterator(final AtomPositionMap map) { 246 //Use Entries to guarentee not null 247 final Iterator<Entry<ResidueNumber, Integer>> entryIt = map.getNavMap().entrySet().iterator(); 248 if(! entryIt.hasNext()) { 249 // empty iterator 250 return Arrays.asList(new ResidueNumber[0]).iterator(); 251 } 252 // Peek at upcoming entry 253 254 return new Iterator<ResidueNumber>() { 255 Entry<ResidueNumber,Integer> next = loadNext(); 256 257 private Entry<ResidueNumber,Integer> loadNext() { 258 259 while( entryIt.hasNext() ) { 260 next = entryIt.next(); 261 ResidueNumber nextPos = next.getKey(); 262 if( contains(nextPos, map)) { 263 // loaded a valid next value 264 return next; 265 } 266 } 267 next = null; 268 return next; 269 } 270 271 @Override 272 public boolean hasNext() { 273 return next != null; 274 } 275 276 @Override 277 public ResidueNumber next() { 278 ResidueNumber pos = next.getKey(); 279 loadNext(); 280 return pos; 281 } 282 283 @Override 284 public void remove() { 285 throw new UnsupportedOperationException("Not modifiable"); 286 } 287 }; 288 } 289 290 /** 291 * Returns a new Iterator over every {@link ResidueNumber} in the list of ResidueRanges. 292 * Stores the contents of {@code map} until the iterator is finished, so calling code should set the iterator to {@code null} if it did not finish. 293 */ 294 public static Iterator<ResidueNumber> multiIterator(final AtomPositionMap map, final ResidueRange... rrs) { 295 return new Iterator<ResidueNumber>() { 296 private int r = 0; 297 private Iterator<ResidueNumber> internal; 298 @Override 299 public boolean hasNext() { 300 if (r == rrs.length - 1) { 301 init(); 302 return internal.hasNext(); 303 } 304 return true; 305 } 306 private void init() { 307 if (internal == null) { 308 internal = rrs[r].iterator(map); 309 } 310 } 311 @Override 312 public ResidueNumber next() { 313 if (rrs.length == 0) throw new NoSuchElementException(); 314 init(); 315 if (!hasNext()) throw new NoSuchElementException(); 316 if (!internal.hasNext()) { 317 r++; 318 internal = rrs[r].iterator(map); 319 } 320 return internal.next(); 321 } 322 @Override 323 public void remove() { 324 throw new UnsupportedOperationException("Not modifiable"); 325 } 326 }; 327 } 328 329 /** 330 * Returns a new Iterator over every {@link ResidueNumber} in the list of ResidueRanges. 331 * Stores the contents of {@code map} until the iterator is finished, so calling code should set the iterator to {@code null} if it did not finish. 332 */ 333 public static Iterator<ResidueNumber> multiIterator(AtomPositionMap map, List<? extends ResidueRange> rrs) { 334 ResidueRange[] ranges = new ResidueRange[rrs.size()]; 335 for (int i = 0; i < rrs.size(); i++) { 336 ranges[i] = rrs.get(i); 337 } 338 return multiIterator(map, ranges); 339 } 340 341 public static List<ResidueRange> parseMultiple(List<String> ranges) { 342 List<ResidueRange> rrs = new ArrayList<ResidueRange>(ranges.size()); 343 for (String range : ranges) { 344 ResidueRange rr = ResidueRange.parse(range); 345 if (rr != null) rrs.add(rr); 346 } 347 return rrs; 348 } 349 350 public static List<String> toStrings(List<? extends ResidueRange> ranges) { 351 List<String> list = new ArrayList<String>(ranges.size()); 352 for (ResidueRange range : ranges) { 353 list.add(range.toString()); 354 } 355 return list; 356 } 357 358 public static String toString(List<? extends ResidueRange> ranges) { 359 StringBuilder sb = new StringBuilder(); 360 for (int i = 0; i < ranges.size(); i++) { 361 sb.append(ranges.get(i)); 362 if (i < ranges.size() - 1) sb.append(","); 363 } 364 return sb.toString(); 365 } 366 367}