001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 2012-11-20
021 *
022 */
023
024package org.biojava.nbio.structure;
025
026import java.util.*;
027import java.util.Map.Entry;
028import java.util.regex.Matcher;
029import java.util.regex.Pattern;
030
031/**
032 * A chainName, a start residue, and an end residue. The chainName is matched
033 * to {@link Chain#getName()}, so for mmCIF files it indicates the authorId
034 * rather than the asymId.
035 *
036 * Chain may be null when referencing a single-chainName structure; for multi-chainName
037 * structures omitting the chainName is an error. Start and/or end may also be null,
038 * which is interpreted as the first and last residues in the chainName, respectively.
039 *
040 * @author dmyerstu
041 * @see ResidueNumber
042 * @see org.biojava.nbio.structure.ResidueRangeAndLength
043 */
044public class ResidueRange {
045
046        private final String chainName;
047        private final ResidueNumber end;
048        private final ResidueNumber start;
049
050        public static final Pattern RANGE_REGEX = Pattern.compile(
051                        "^\\s*([a-zA-Z0-9]+|_)" + //chainName ID. Be flexible here, rather than restricting to 4-char IDs
052                        "(?:" + //begin range, this is a "non-capturing group"
053                                "(?::|_|:$|_$|$)" + //colon or underscore, could be at the end of a line, another non-capt. group.
054                                "(?:"+ // another non capturing group for the residue range
055                                        "([-+]?[0-9]+[A-Za-z]?|\\^)?" + // first residue
056                                        "(?:" +
057                                                "\\s*(-)\\s*" + // hyphen indicates a range was intended
058                                                "([-+]?[0-9]+[A-Za-z]?|\\$)?" + // second residue
059                                        ")?+"+
060                                ")?+"+
061                        ")?" + //end range
062                        "\\s*");
063
064        public static final Pattern CHAIN_REGEX = Pattern.compile("^\\s*([a-zA-Z0-9]+|_)$");
065
066        /**
067         * Parse the residue range from a string. Several formats are accepted:
068         * <ul>
069         *   <li> chainName.start-end
070         *   <li> chainName.residue
071         *   <li> chain_start-end (for better filename compatibility)
072         * </ul>
073         *
074         * <p>Residues can be positive or negative and may include insertion codes.
075         * See {@link ResidueNumber#fromString(String)}.
076         *
077         * <p>Examples:
078         * <ul>
079         * <li><code>A:5-100</code>
080         * <li><code>A_5-100</code>
081         * <li><code>A_-5</code>
082         * <li><code>A:-12I-+12I</code>
083         * <li><code>A:^-$</code>
084         * </ul>
085         *
086         * @param s   residue string to parse
087         * @return The unique ResidueRange corresponding to {@code s}
088         */
089        public static ResidueRange parse(String s) {
090                Matcher matcher = RANGE_REGEX.matcher(s);
091                if (matcher.matches()) {
092                        ResidueNumber start = null, end = null;
093                        String chain = null;
094                        try {
095                                chain = matcher.group(1);
096                                if (matcher.group(2) != null) {
097                                        // ^ indicates first res (start==null)
098                                        if(!"^".equals(matcher.group(2)) ) {
099                                                start = ResidueNumber.fromString(matcher.group(2));
100                                                start.setChainName(chain);
101                                        }
102                                }
103                                if(matcher.group(3) == null) {
104                                        // single-residue range
105                                        end = start;
106                                } else
107                                        // $ indicates last res (end==null)
108                                        if( matcher.group(4) != null && !"$".equals(matcher.group(4)) ){
109                                                end = ResidueNumber.fromString(matcher.group(4));
110                                                end.setChainName(chain);
111                                        }
112
113                                return new ResidueRange(chain, start, end);
114                        } catch (IllegalStateException e) {
115                                throw new IllegalArgumentException("Range " + s + " was not valid", e);
116                        }
117                } else if (CHAIN_REGEX.matcher(s).matches()) {
118                        return new ResidueRange(s, (ResidueNumber)null, null);
119                }
120                throw new IllegalArgumentException("Illegal ResidueRange format:" + s);
121        }
122
123        /**
124         * @param s
125         *            A string of the form chain_start-end,chain_start-end, ... For example:
126         *            <code>A.5-100,R_110-190,Z_200-250</code>.
127         * @return The unique ResidueRange corresponding to {@code s}.
128         * @see #parse(String)
129         */
130        public static List<ResidueRange> parseMultiple(String s) {
131                s = s.trim();
132                // trim parentheses, for backwards compatibility
133                if ( s.startsWith("("))
134                        s = s.substring(1);
135                if ( s.endsWith(")")) {
136                        s = s.substring(0,s.length()-1);
137                }
138
139                String[] parts = s.split(",");
140                List<ResidueRange> list = new ArrayList<>(parts.length);
141                for (String part : parts) {
142                        list.add(parse(part));
143                }
144                return list;
145        }
146
147        public ResidueRange(String chainName, String start, String end) {
148                this.chainName = chainName;
149                this.start = ResidueNumber.fromString(start);
150                this.start.setChainName(chainName);
151                this.end = ResidueNumber.fromString(end);
152                this.end.setChainName(chainName);
153        }
154
155        public ResidueRange(String chainName, ResidueNumber start, ResidueNumber end) {
156                this.chainName = chainName;
157                this.start = start;
158                this.end = end;
159        }
160
161        @Override
162        public boolean equals(Object obj) {
163                if (this == obj) return true;
164                if (obj == null) return false;
165                if (getClass() != obj.getClass()) return false;
166                ResidueRange other = (ResidueRange) obj;
167                if (chainName == null) {
168                        if (other.chainName != null) return false;
169                } else if (!chainName.equals(other.chainName)) return false;
170                if (end == null) {
171                        if (other.end != null) return false;
172                } else if (!end.equals(other.end)) return false;
173                if (start == null) {
174                        if (other.start != null) return false;
175                } else if (!start.equals(other.start)) return false;
176                return true;
177        }
178
179        public String getChainName() {
180                return chainName;
181        }
182
183        public ResidueNumber getEnd() {
184                return end;
185        }
186
187        public ResidueNumber getStart() {
188                return start;
189        }
190
191        @Override
192        public int hashCode() {
193                final int prime = 31;
194                int result = 1;
195                result = prime * result + (chainName == null ? 0 : chainName.hashCode());
196                result = prime * result + (end == null ? 0 : end.hashCode());
197                result = prime * result + (start == null ? 0 : start.hashCode());
198                return result;
199        }
200
201        @Override
202        public String toString() {
203                if( start == null && end == null) {
204                        // Indicates the full chainName
205                        return chainName;
206                }
207                return chainName + "_" + start + "-" + end;
208        }
209
210        /**
211         * Returns the ResidueNumber that is at position {@code positionInRange} in
212         * <em>this</em> ResidueRange.
213         * @return The ResidueNumber, or false if it does not exist or is not within this ResidueRange
214         */
215        public ResidueNumber getResidue(int positionInRange, AtomPositionMap map) {
216                if (map == null) throw new NullPointerException("The AtomPositionMap must be non-null");
217                int i = 0;
218                for (Map.Entry<ResidueNumber, Integer> entry : map.getNavMap().entrySet()) {
219                        if (i == positionInRange) return entry.getKey();
220                        if (contains(entry.getKey(), map)) {
221                                i++;
222                        }
223                }
224                return null;
225        }
226
227        /**
228         * @return True if and only if {@code residueNumber} is within this ResidueRange
229         */
230        public boolean contains(ResidueNumber residueNumber, AtomPositionMap map) {
231
232                if (residueNumber == null)
233                        throw new NullPointerException("Can't find the ResidueNumber because it is null");
234
235                if (map == null)
236                        throw new NullPointerException("The AtomPositionMap must be non-null");
237
238                Integer pos = map.getPosition(residueNumber);
239                if (pos == null) throw new IllegalArgumentException("Couldn't find residue " + residueNumber.printFull());
240
241                ResidueNumber startResidue = getStart()==null? map.getFirst(getChainName()) : getStart();
242                Integer startPos = map.getPosition(startResidue);
243                if (startPos == null) throw new IllegalArgumentException("Couldn't find the start position");
244
245                ResidueNumber endResidue = getEnd()==null? map.getLast(getChainName()) : getEnd();
246                Integer endPos = map.getPosition(endResidue);
247                if (endPos == null) throw new IllegalArgumentException("Couldn't find the end position");
248                return pos >= startPos && pos <= endPos;
249        }
250
251        /**
252         * Returns a new Iterator over every {@link ResidueNumber} in this ResidueRange.
253         * Stores the contents of {@code map} until the iterator is finished, so calling code should set the iterator to {@code null} if it did not finish.
254         */
255        public Iterator<ResidueNumber> iterator(final AtomPositionMap map) {
256                //Use Entries to guarentee not null
257                final Iterator<Entry<ResidueNumber, Integer>> entryIt = map.getNavMap().entrySet().iterator();
258                if(! entryIt.hasNext()) {
259                        // empty iterator
260                        return Arrays.asList(new ResidueNumber[0]).iterator();
261                }
262                // Peek at upcoming entry
263
264                return new Iterator<ResidueNumber>() {
265                        Entry<ResidueNumber,Integer> next = loadNext();
266
267                        private Entry<ResidueNumber,Integer> loadNext() {
268
269                                while( entryIt.hasNext() ) {
270                                        next = entryIt.next();
271                                        ResidueNumber nextPos = next.getKey();
272                                        if( contains(nextPos, map)) {
273                                                // loaded a valid next value
274                                                return next;
275                                        }
276                                }
277                                next = null;
278                                return next;
279                        }
280
281                        @Override
282                        public boolean hasNext() {
283                                return next != null;
284                        }
285
286                        @Override
287                        public ResidueNumber next() {
288                                if(!hasNext()){
289                                        throw new NoSuchElementException();
290                                }
291                                ResidueNumber pos = next.getKey();
292                                loadNext();
293                                return pos;
294                        }
295
296                        @Override
297                        public void remove() {
298                                throw new UnsupportedOperationException("Not modifiable");
299                        }
300                };
301        }
302
303        /**
304         * Returns a new Iterator over every {@link ResidueNumber} in the list of ResidueRanges.
305         * Stores the contents of {@code map} until the iterator is finished, so calling code should set the iterator to {@code null} if it did not finish.
306         */
307        public static Iterator<ResidueNumber> multiIterator(final AtomPositionMap map, final ResidueRange... rrs) {
308                return new Iterator<ResidueNumber>() {
309                        private int r = 0;
310                        private Iterator<ResidueNumber> internal;
311                        @Override
312                        public boolean hasNext() {
313                                if (r == rrs.length - 1) {
314                                        init();
315                                        return internal.hasNext();
316                                }
317                                return true;
318                        }
319                        private void init() {
320                                if (internal == null) {
321                                        internal = rrs[r].iterator(map);
322                                }
323                        }
324                        @Override
325                        public ResidueNumber next() {
326                                if (rrs.length == 0) throw new NoSuchElementException();
327                                init();
328                                if (!hasNext()) throw new NoSuchElementException();
329                                if (!internal.hasNext()) {
330                                        r++;
331                                        internal = rrs[r].iterator(map);
332                                }
333                                return internal.next();
334                        }
335                        @Override
336                        public void remove() {
337                                throw new UnsupportedOperationException("Not modifiable");
338                        }
339                };
340        }
341
342        /**
343         * Returns a new Iterator over every {@link ResidueNumber} in the list of ResidueRanges.
344         * Stores the contents of {@code map} until the iterator is finished, so calling code should set the iterator to {@code null} if it did not finish.
345         */
346        public static Iterator<ResidueNumber> multiIterator(AtomPositionMap map, List<? extends ResidueRange> rrs) {
347                ResidueRange[] ranges = new ResidueRange[rrs.size()];
348                for (int i = 0; i < rrs.size(); i++) {
349                        ranges[i] = rrs.get(i);
350                }
351                return multiIterator(map, ranges);
352        }
353
354        public static List<ResidueRange> parseMultiple(List<String> ranges) {
355                List<ResidueRange> rrs = new ArrayList<>(ranges.size());
356                for (String range : ranges) {
357                        ResidueRange rr = ResidueRange.parse(range);
358                        if (rr != null) rrs.add(rr);
359                }
360                return rrs;
361        }
362
363        public static List<String> toStrings(List<? extends ResidueRange> ranges) {
364                List<String> list = new ArrayList<>(ranges.size());
365                for (ResidueRange range : ranges) {
366                        list.add(range.toString());
367                }
368                return list;
369        }
370
371        public static String toString(List<? extends ResidueRange> ranges) {
372                StringBuilder sb = new StringBuilder();
373                for (int i = 0; i < ranges.size(); i++) {
374                        sb.append(ranges.get(i));
375                        if (i < ranges.size() - 1) sb.append(",");
376                }
377                return sb.toString();
378        }
379
380}