001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 2012-11-20 021 * 022 */ 023 024package org.biojava.nbio.structure; 025 026import java.util.*; 027import java.util.Map.Entry; 028import java.util.regex.Matcher; 029import java.util.regex.Pattern; 030 031/** 032 * A chainName, a start residue, and an end residue. The chainName is matched 033 * to {@link Chain#getName()}, so for mmCIF files it indicates the authorId 034 * rather than the asymId. 035 * 036 * Chain may be null when referencing a single-chainName structure; for multi-chainName 037 * structures omitting the chainName is an error. Start and/or end may also be null, 038 * which is interpreted as the first and last residues in the chainName, respectively. 039 * 040 * @author dmyerstu 041 * @see ResidueNumber 042 * @see org.biojava.nbio.structure.ResidueRangeAndLength 043 */ 044public class ResidueRange { 045 046 private final String chainName; 047 private final ResidueNumber end; 048 private final ResidueNumber start; 049 050 public static final Pattern RANGE_REGEX = Pattern.compile( 051 "^\\s*([a-zA-Z0-9]+|_)" + //chainName ID. Be flexible here, rather than restricting to 4-char IDs 052 "(?:" + //begin range, this is a "non-capturing group" 053 "(?::|_|:$|_$|$)" + //colon or underscore, could be at the end of a line, another non-capt. group. 054 "(?:"+ // another non capturing group for the residue range 055 "([-+]?[0-9]+[A-Za-z]?|\\^)?" + // first residue 056 "(?:" + 057 "\\s*(-)\\s*" + // hyphen indicates a range was intended 058 "([-+]?[0-9]+[A-Za-z]?|\\$)?" + // second residue 059 ")?+"+ 060 ")?+"+ 061 ")?" + //end range 062 "\\s*"); 063 064 public static final Pattern CHAIN_REGEX = Pattern.compile("^\\s*([a-zA-Z0-9]+|_)$"); 065 066 /** 067 * Parse the residue range from a string. Several formats are accepted: 068 * <ul> 069 * <li> chainName.start-end 070 * <li> chainName.residue 071 * <li> chain_start-end (for better filename compatibility) 072 * </ul> 073 * 074 * <p>Residues can be positive or negative and may include insertion codes. 075 * See {@link ResidueNumber#fromString(String)}. 076 * 077 * <p>Examples: 078 * <ul> 079 * <li><code>A:5-100</code> 080 * <li><code>A_5-100</code> 081 * <li><code>A_-5</code> 082 * <li><code>A:-12I-+12I</code> 083 * <li><code>A:^-$</code> 084 * </ul> 085 * 086 * @param s residue string to parse 087 * @return The unique ResidueRange corresponding to {@code s} 088 */ 089 public static ResidueRange parse(String s) { 090 Matcher matcher = RANGE_REGEX.matcher(s); 091 if (matcher.matches()) { 092 ResidueNumber start = null, end = null; 093 String chain = null; 094 try { 095 chain = matcher.group(1); 096 if (matcher.group(2) != null) { 097 // ^ indicates first res (start==null) 098 if(!"^".equals(matcher.group(2)) ) { 099 start = ResidueNumber.fromString(matcher.group(2)); 100 start.setChainName(chain); 101 } 102 } 103 if(matcher.group(3) == null) { 104 // single-residue range 105 end = start; 106 } else 107 // $ indicates last res (end==null) 108 if( matcher.group(4) != null && !"$".equals(matcher.group(4)) ){ 109 end = ResidueNumber.fromString(matcher.group(4)); 110 end.setChainName(chain); 111 } 112 113 return new ResidueRange(chain, start, end); 114 } catch (IllegalStateException e) { 115 throw new IllegalArgumentException("Range " + s + " was not valid", e); 116 } 117 } else if (CHAIN_REGEX.matcher(s).matches()) { 118 return new ResidueRange(s, (ResidueNumber)null, null); 119 } 120 throw new IllegalArgumentException("Illegal ResidueRange format:" + s); 121 } 122 123 /** 124 * @param s 125 * A string of the form chain_start-end,chain_start-end, ... For example: 126 * <code>A.5-100,R_110-190,Z_200-250</code>. 127 * @return The unique ResidueRange corresponding to {@code s}. 128 * @see #parse(String) 129 */ 130 public static List<ResidueRange> parseMultiple(String s) { 131 s = s.trim(); 132 // trim parentheses, for backwards compatibility 133 if ( s.startsWith("(")) 134 s = s.substring(1); 135 if ( s.endsWith(")")) { 136 s = s.substring(0,s.length()-1); 137 } 138 139 String[] parts = s.split(","); 140 List<ResidueRange> list = new ArrayList<>(parts.length); 141 for (String part : parts) { 142 list.add(parse(part)); 143 } 144 return list; 145 } 146 147 public ResidueRange(String chainName, String start, String end) { 148 this.chainName = chainName; 149 this.start = ResidueNumber.fromString(start); 150 this.start.setChainName(chainName); 151 this.end = ResidueNumber.fromString(end); 152 this.end.setChainName(chainName); 153 } 154 155 public ResidueRange(String chainName, ResidueNumber start, ResidueNumber end) { 156 this.chainName = chainName; 157 this.start = start; 158 this.end = end; 159 } 160 161 @Override 162 public boolean equals(Object obj) { 163 if (this == obj) return true; 164 if (obj == null) return false; 165 if (getClass() != obj.getClass()) return false; 166 ResidueRange other = (ResidueRange) obj; 167 if (chainName == null) { 168 if (other.chainName != null) return false; 169 } else if (!chainName.equals(other.chainName)) return false; 170 if (end == null) { 171 if (other.end != null) return false; 172 } else if (!end.equals(other.end)) return false; 173 if (start == null) { 174 if (other.start != null) return false; 175 } else if (!start.equals(other.start)) return false; 176 return true; 177 } 178 179 public String getChainName() { 180 return chainName; 181 } 182 183 public ResidueNumber getEnd() { 184 return end; 185 } 186 187 public ResidueNumber getStart() { 188 return start; 189 } 190 191 @Override 192 public int hashCode() { 193 final int prime = 31; 194 int result = 1; 195 result = prime * result + (chainName == null ? 0 : chainName.hashCode()); 196 result = prime * result + (end == null ? 0 : end.hashCode()); 197 result = prime * result + (start == null ? 0 : start.hashCode()); 198 return result; 199 } 200 201 @Override 202 public String toString() { 203 if( start == null && end == null) { 204 // Indicates the full chainName 205 return chainName; 206 } 207 return chainName + "_" + start + "-" + end; 208 } 209 210 /** 211 * Returns the ResidueNumber that is at position {@code positionInRange} in 212 * <em>this</em> ResidueRange. 213 * @return The ResidueNumber, or false if it does not exist or is not within this ResidueRange 214 */ 215 public ResidueNumber getResidue(int positionInRange, AtomPositionMap map) { 216 if (map == null) throw new NullPointerException("The AtomPositionMap must be non-null"); 217 int i = 0; 218 for (Map.Entry<ResidueNumber, Integer> entry : map.getNavMap().entrySet()) { 219 if (i == positionInRange) return entry.getKey(); 220 if (contains(entry.getKey(), map)) { 221 i++; 222 } 223 } 224 return null; 225 } 226 227 /** 228 * @return True if and only if {@code residueNumber} is within this ResidueRange 229 */ 230 public boolean contains(ResidueNumber residueNumber, AtomPositionMap map) { 231 232 if (residueNumber == null) 233 throw new NullPointerException("Can't find the ResidueNumber because it is null"); 234 235 if (map == null) 236 throw new NullPointerException("The AtomPositionMap must be non-null"); 237 238 Integer pos = map.getPosition(residueNumber); 239 if (pos == null) throw new IllegalArgumentException("Couldn't find residue " + residueNumber.printFull()); 240 241 ResidueNumber startResidue = getStart()==null? map.getFirst(getChainName()) : getStart(); 242 Integer startPos = map.getPosition(startResidue); 243 if (startPos == null) throw new IllegalArgumentException("Couldn't find the start position"); 244 245 ResidueNumber endResidue = getEnd()==null? map.getLast(getChainName()) : getEnd(); 246 Integer endPos = map.getPosition(endResidue); 247 if (endPos == null) throw new IllegalArgumentException("Couldn't find the end position"); 248 return pos >= startPos && pos <= endPos; 249 } 250 251 /** 252 * Returns a new Iterator over every {@link ResidueNumber} in this ResidueRange. 253 * Stores the contents of {@code map} until the iterator is finished, so calling code should set the iterator to {@code null} if it did not finish. 254 */ 255 public Iterator<ResidueNumber> iterator(final AtomPositionMap map) { 256 //Use Entries to guarentee not null 257 final Iterator<Entry<ResidueNumber, Integer>> entryIt = map.getNavMap().entrySet().iterator(); 258 if(! entryIt.hasNext()) { 259 // empty iterator 260 return Arrays.asList(new ResidueNumber[0]).iterator(); 261 } 262 // Peek at upcoming entry 263 264 return new Iterator<ResidueNumber>() { 265 Entry<ResidueNumber,Integer> next = loadNext(); 266 267 private Entry<ResidueNumber,Integer> loadNext() { 268 269 while( entryIt.hasNext() ) { 270 next = entryIt.next(); 271 ResidueNumber nextPos = next.getKey(); 272 if( contains(nextPos, map)) { 273 // loaded a valid next value 274 return next; 275 } 276 } 277 next = null; 278 return next; 279 } 280 281 @Override 282 public boolean hasNext() { 283 return next != null; 284 } 285 286 @Override 287 public ResidueNumber next() { 288 if(!hasNext()){ 289 throw new NoSuchElementException(); 290 } 291 ResidueNumber pos = next.getKey(); 292 loadNext(); 293 return pos; 294 } 295 296 @Override 297 public void remove() { 298 throw new UnsupportedOperationException("Not modifiable"); 299 } 300 }; 301 } 302 303 /** 304 * Returns a new Iterator over every {@link ResidueNumber} in the list of ResidueRanges. 305 * Stores the contents of {@code map} until the iterator is finished, so calling code should set the iterator to {@code null} if it did not finish. 306 */ 307 public static Iterator<ResidueNumber> multiIterator(final AtomPositionMap map, final ResidueRange... rrs) { 308 return new Iterator<ResidueNumber>() { 309 private int r = 0; 310 private Iterator<ResidueNumber> internal; 311 @Override 312 public boolean hasNext() { 313 if (r == rrs.length - 1) { 314 init(); 315 return internal.hasNext(); 316 } 317 return true; 318 } 319 private void init() { 320 if (internal == null) { 321 internal = rrs[r].iterator(map); 322 } 323 } 324 @Override 325 public ResidueNumber next() { 326 if (rrs.length == 0) throw new NoSuchElementException(); 327 init(); 328 if (!hasNext()) throw new NoSuchElementException(); 329 if (!internal.hasNext()) { 330 r++; 331 internal = rrs[r].iterator(map); 332 } 333 return internal.next(); 334 } 335 @Override 336 public void remove() { 337 throw new UnsupportedOperationException("Not modifiable"); 338 } 339 }; 340 } 341 342 /** 343 * Returns a new Iterator over every {@link ResidueNumber} in the list of ResidueRanges. 344 * Stores the contents of {@code map} until the iterator is finished, so calling code should set the iterator to {@code null} if it did not finish. 345 */ 346 public static Iterator<ResidueNumber> multiIterator(AtomPositionMap map, List<? extends ResidueRange> rrs) { 347 ResidueRange[] ranges = new ResidueRange[rrs.size()]; 348 for (int i = 0; i < rrs.size(); i++) { 349 ranges[i] = rrs.get(i); 350 } 351 return multiIterator(map, ranges); 352 } 353 354 public static List<ResidueRange> parseMultiple(List<String> ranges) { 355 List<ResidueRange> rrs = new ArrayList<>(ranges.size()); 356 for (String range : ranges) { 357 ResidueRange rr = ResidueRange.parse(range); 358 if (rr != null) rrs.add(rr); 359 } 360 return rrs; 361 } 362 363 public static List<String> toStrings(List<? extends ResidueRange> ranges) { 364 List<String> list = new ArrayList<>(ranges.size()); 365 for (ResidueRange range : ranges) { 366 list.add(range.toString()); 367 } 368 return list; 369 } 370 371 public static String toString(List<? extends ResidueRange> ranges) { 372 StringBuilder sb = new StringBuilder(); 373 for (int i = 0; i < ranges.size(); i++) { 374 sb.append(ranges.get(i)); 375 if (i < ranges.size() - 1) sb.append(","); 376 } 377 return sb.toString(); 378 } 379 380}