001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 2012-11-20
021 *
022 */
023
024package org.biojava.nbio.structure;
025
026import java.util.*;
027import java.util.Map.Entry;
028import java.util.regex.Matcher;
029import java.util.regex.Pattern;
030
031/**
032 * A chain, a start residue, and an end residue.
033 *
034 * Chain may be null when referencing a single-chain structure; for multi-chain
035 * structures omitting the chain is an error. Start and/or end may also be null,
036 * which is interpreted as the first and last residues in the chain, respectively.
037 *
038 * @author dmyerstu
039 * @see ResidueNumber
040 * @see org.biojava.nbio.structure.ResidueRangeAndLength
041 */
042public class ResidueRange {
043
044        private final String chain;
045        private final ResidueNumber end;
046        private final ResidueNumber start;
047
048        public static final Pattern RANGE_REGEX = Pattern.compile(
049                        "^\\s*([a-zA-Z0-9]+|_)" + //chain ID. Be flexible here, rather than restricting to 4-char IDs
050                        "(?:" + //begin range, this is a "non-capturing group"
051                                "(?::|_|:$|_$|$)" + //colon or underscore, could be at the end of a line, another non-capt. group.
052                                "(?:"+ // another non capturing group for the residue range
053                                        "([-+]?[0-9]+[A-Za-z]?)" + // first residue
054                                        "(?:" +
055                                                "\\s*-\\s*" + // -
056                                                "([-+]?[0-9]+[A-Za-z]?)" + // second residue
057                                        ")?+"+
058                                ")?+"+
059                        ")?" + //end range
060                        "\\s*");
061
062        public static final Pattern CHAIN_REGEX = Pattern.compile("^\\s*([a-zA-Z0-9]+|_)$");
063
064        /**
065         * Parse the residue range from a string. Several formats are accepted:
066         * <ul>
067         *   <li> chain.start-end
068         *   <li> chain.residue
069         *   <li> chain_start-end (for better filename compatibility)
070         * </ul>
071         *
072         * <p>Residues can be positive or negative and may include insertion codes.
073         * See {@link ResidueNumber#fromString(String)}.
074         *
075         * <p>Examples:
076         * <ul>
077         * <li><code>A.5-100</code>
078         * <li><code>A_5-100</code>
079         * <li><code>A_-5</code>
080         * <li><code>A.-12I-+12I
081         *
082         * @param s   residue string to parse
083         * @return The unique ResidueRange corresponding to {@code s}
084         */
085        public static ResidueRange parse(String s) {
086                Matcher matcher = RANGE_REGEX.matcher(s);
087                if (matcher.matches()) {
088                        ResidueNumber start = null, end = null;
089                        String chain = null;
090                        try {
091                                chain = matcher.group(1);
092                                if (matcher.group(2) != null) {
093                                        start = ResidueNumber.fromString(matcher.group(2));
094                                        start.setChainId(chain);
095                                        if(matcher.group(3) == null) {
096                                                // single-residue range
097                                                end = start;
098                                        } else {
099                                                end = ResidueNumber.fromString(matcher.group(3));
100                                                end.setChainId(chain);
101                                        }
102                                }
103                                return new ResidueRange(chain, start, end);
104                        } catch (IllegalStateException e) {
105                                throw new IllegalArgumentException("Range " + s + " was not valid", e);
106                        }
107                } else if (CHAIN_REGEX.matcher(s).matches()) {
108                        return new ResidueRange(s, (ResidueNumber)null, null);
109                }
110                throw new IllegalArgumentException("Illegal ResidueRange format:" + s);
111        }
112
113        /**
114         * @param s
115         *            A string of the form chain_start-end,chain_start-end, ... For example:
116         *            <code>A.5-100,R_110-190,Z_200-250</code>.
117         * @return The unique ResidueRange corresponding to {@code s}.
118         * @see #parse(String)
119         */
120        public static List<ResidueRange> parseMultiple(String s) {
121                s = s.trim();
122                // trim parentheses, for backwards compatibility
123                if ( s.startsWith("("))
124                        s = s.substring(1);
125                if ( s.endsWith(")")) {
126                        s = s.substring(0,s.length()-1);
127                }
128
129                String[] parts = s.split(",");
130                List<ResidueRange> list = new ArrayList<ResidueRange>(parts.length);
131                for (String part : parts) {
132                        list.add(parse(part));
133                }
134                return list;
135        }
136
137        public ResidueRange(String chain, String start, String end) {
138                this.chain = chain;
139                this.start = ResidueNumber.fromString(start);
140                this.start.setChainId(chain);
141                this.end = ResidueNumber.fromString(end);
142                this.end.setChainId(chain);
143        }
144
145        public ResidueRange(String chain, ResidueNumber start, ResidueNumber end) {
146                this.chain = chain;
147                this.start = start;
148                this.end = end;
149        }
150
151        @Override
152        public boolean equals(Object obj) {
153                if (this == obj) return true;
154                if (obj == null) return false;
155                if (getClass() != obj.getClass()) return false;
156                ResidueRange other = (ResidueRange) obj;
157                if (chain == null) {
158                        if (other.chain != null) return false;
159                } else if (!chain.equals(other.chain)) return false;
160                if (end == null) {
161                        if (other.end != null) return false;
162                } else if (!end.equals(other.end)) return false;
163                if (start == null) {
164                        if (other.start != null) return false;
165                } else if (!start.equals(other.start)) return false;
166                return true;
167        }
168
169        public String getChainId() {
170                return chain;
171        }
172
173        public ResidueNumber getEnd() {
174                return end;
175        }
176
177        public ResidueNumber getStart() {
178                return start;
179        }
180
181        @Override
182        public int hashCode() {
183                final int prime = 31;
184                int result = 1;
185                result = prime * result + (chain == null ? 0 : chain.hashCode());
186                result = prime * result + (end == null ? 0 : end.hashCode());
187                result = prime * result + (start == null ? 0 : start.hashCode());
188                return result;
189        }
190
191        @Override
192        public String toString() {
193                if( start == null && end == null) {
194                        // Indicates the full chain
195                        return chain;
196                }
197                return chain + "_" + start + "-" + end;
198        }
199
200        /**
201         * Returns the ResidueNumber that is at position {@code positionInRange} in
202         * <em>this</em> ResidueRange.
203         * @return The ResidueNumber, or false if it does not exist or is not within this ResidueRange
204         */
205        public ResidueNumber getResidue(int positionInRange, AtomPositionMap map) {
206                if (map == null) throw new NullPointerException("The AtomPositionMap must be non-null");
207                int i = 0;
208                for (Map.Entry<ResidueNumber, Integer> entry : map.getNavMap().entrySet()) {
209                        if (i == positionInRange) return entry.getKey();
210                        if (contains(entry.getKey(), map)) {
211                                i++;
212                        }
213                }
214                return null;
215        }
216
217        /**
218         * @return True if and only if {@code residueNumber} is within this ResidueRange
219         */
220        public boolean contains(ResidueNumber residueNumber, AtomPositionMap map) {
221
222                if (residueNumber == null)
223                        throw new NullPointerException("Can't find the ResidueNumber because it is null");
224
225                if (map == null)
226                        throw new NullPointerException("The AtomPositionMap must be non-null");
227
228                Integer pos = map.getPosition(residueNumber);
229                if (pos == null) throw new IllegalArgumentException("Couldn't find residue " + residueNumber.printFull());
230
231                ResidueNumber startResidue = getStart()==null? map.getFirst(getChainId()) : getStart();
232                Integer startPos = map.getPosition(startResidue);
233                if (startPos == null) throw new IllegalArgumentException("Couldn't find the start position");
234
235                ResidueNumber endResidue = getEnd()==null? map.getLast(getChainId()) : getEnd();
236                Integer endPos = map.getPosition(endResidue);
237                if (endPos == null) throw new IllegalArgumentException("Couldn't find the end position");
238                return pos >= startPos && pos <= endPos;
239        }
240
241        /**
242         * Returns a new Iterator over every {@link ResidueNumber} in this ResidueRange.
243         * Stores the contents of {@code map} until the iterator is finished, so calling code should set the iterator to {@code null} if it did not finish.
244         */
245        public Iterator<ResidueNumber> iterator(final AtomPositionMap map) {
246                //Use Entries to guarentee not null
247                final Iterator<Entry<ResidueNumber, Integer>> entryIt = map.getNavMap().entrySet().iterator();
248                if(! entryIt.hasNext()) {
249                        // empty iterator
250                        return Arrays.asList(new ResidueNumber[0]).iterator();
251                }
252                // Peek at upcoming entry
253
254                return new Iterator<ResidueNumber>() {
255                        Entry<ResidueNumber,Integer> next = loadNext();
256
257                        private Entry<ResidueNumber,Integer> loadNext() {
258
259                                while( entryIt.hasNext() ) {
260                                        next = entryIt.next();
261                                        ResidueNumber nextPos = next.getKey();
262                                        if( contains(nextPos, map)) {
263                                                // loaded a valid next value
264                                                return next;
265                                        }
266                                }
267                                next = null;
268                                return next;
269                        }
270
271                        @Override
272                        public boolean hasNext() {
273                                return next != null;
274                        }
275
276                        @Override
277                        public ResidueNumber next() {
278                                ResidueNumber pos = next.getKey();
279                                loadNext();
280                                return pos;
281                        }
282
283                        @Override
284                        public void remove() {
285                                throw new UnsupportedOperationException("Not modifiable");
286                        }
287                };
288        }
289
290        /**
291         * Returns a new Iterator over every {@link ResidueNumber} in the list of ResidueRanges.
292         * Stores the contents of {@code map} until the iterator is finished, so calling code should set the iterator to {@code null} if it did not finish.
293         */
294        public static Iterator<ResidueNumber> multiIterator(final AtomPositionMap map, final ResidueRange... rrs) {
295                return new Iterator<ResidueNumber>() {
296                        private int r = 0;
297                        private Iterator<ResidueNumber> internal;
298                        @Override
299                        public boolean hasNext() {
300                                if (r == rrs.length - 1) {
301                                        init();
302                                        return internal.hasNext();
303                                }
304                                return true;
305                        }
306                        private void init() {
307                                if (internal == null) {
308                                        internal = rrs[r].iterator(map);
309                                }
310                        }
311                        @Override
312                        public ResidueNumber next() {
313                                if (rrs.length == 0) throw new NoSuchElementException();
314                                init();
315                                if (!hasNext()) throw new NoSuchElementException();
316                                if (!internal.hasNext()) {
317                                        r++;
318                                        internal = rrs[r].iterator(map);
319                                }
320                                return internal.next();
321                        }
322                        @Override
323                        public void remove() {
324                                throw new UnsupportedOperationException("Not modifiable");
325                        }
326                };
327        }
328
329        /**
330         * Returns a new Iterator over every {@link ResidueNumber} in the list of ResidueRanges.
331         * Stores the contents of {@code map} until the iterator is finished, so calling code should set the iterator to {@code null} if it did not finish.
332         */
333        public static Iterator<ResidueNumber> multiIterator(AtomPositionMap map, List<? extends ResidueRange> rrs) {
334                ResidueRange[] ranges = new ResidueRange[rrs.size()];
335                for (int i = 0; i < rrs.size(); i++) {
336                        ranges[i] = rrs.get(i);
337                }
338                return multiIterator(map, ranges);
339        }
340
341        public static List<ResidueRange> parseMultiple(List<String> ranges) {
342                List<ResidueRange> rrs = new ArrayList<ResidueRange>(ranges.size());
343                for (String range : ranges) {
344                        ResidueRange rr = ResidueRange.parse(range);
345                        if (rr != null) rrs.add(rr);
346                }
347                return rrs;
348        }
349
350        public static List<String> toStrings(List<? extends ResidueRange> ranges) {
351                List<String> list = new ArrayList<String>(ranges.size());
352                for (ResidueRange range : ranges) {
353                        list.add(range.toString());
354                }
355                return list;
356        }
357
358        public static String toString(List<? extends ResidueRange> ranges) {
359                StringBuilder sb = new StringBuilder();
360                for (int i = 0; i < ranges.size(); i++) {
361                        sb.append(ranges.get(i));
362                        if (i < ranges.size() - 1) sb.append(",");
363                }
364                return sb.toString();
365        }
366
367}