001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 2012-12-01
021 *
022 */
023
024package org.biojava.nbio.structure;
025
026import java.util.ArrayList;
027import java.util.Comparator;
028import java.util.HashMap;
029import java.util.List;
030import java.util.Map;
031import java.util.NavigableMap;
032import java.util.TreeMap;
033
034import org.biojava.nbio.structure.io.mmcif.chem.PolymerType;
035import org.biojava.nbio.structure.io.mmcif.chem.ResidueType;
036import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040/**
041 * A map from {@link ResidueNumber ResidueNumbers} to ATOM record positions in a PDB file.
042 *
043 * <p>To use:
044 *
045 * <pre>
046 * Atom[] atoms = new AtomCache().getAtoms("1w0p");
047 * AtomPositionMap map = new AtomPositionMap(atoms);
048 * ResidueNumber start = new ResidueNumber("A", 100, null);
049 * ResidueNumber end = map.getEnd("A");
050 * int pos = map.getPosition(start);
051 * int length = map.calcLength(start, end);
052 * </pre>
053 *
054 * <p>Note: The getLength() methods were introduced in BioJava 4.0.0 to replace
055 * the calcLength methods. The new method returns the number of residues between
056 * two residues, inclusive, whereas the previous method returned 1 less than that.
057 * @author dmyerstu
058 */
059public class AtomPositionMap {
060
061        private static final Logger logger = LoggerFactory.getLogger(AtomPositionMap.class);
062
063        private HashMap<ResidueNumber, Integer> hashMap;
064        private TreeMap<ResidueNumber, Integer> treeMap;
065
066
067        /**
068         * Used as a Predicate to indicate whether a particular Atom should be mapped
069         */
070        public static interface GroupMatcher {
071                boolean matches(Group group);
072        }
073
074        /**
075         * Matches CA atoms of protein groups
076         */
077        public static final GroupMatcher AMINO_ACID_MATCHER = new GroupMatcher() {
078                @Override
079                public boolean matches(Group group) {
080                        if( group == null )
081                                return false;
082                        ChemComp chem = group.getChemComp();
083                        if(chem == null)
084                                return false;
085                        // Get polymer type
086                        PolymerType polyType = chem.getPolymerType();
087                        if( polyType == null) {
088                                ResidueType type = chem.getResidueType();
089                                if(type != null ) {
090                                        polyType = type.getPolymerType();
091                                }
092                        }
093                        if( polyType == null ) {
094                                return false;
095                        }
096
097                        return PolymerType.PROTEIN_ONLY.contains(polyType)
098                                        && group.hasAtom(StructureTools.CA_ATOM_NAME);
099                }
100        };
101
102        /**
103         * Matches all atoms
104         */
105        public static final GroupMatcher ANYTHING_MATCHER = new GroupMatcher() {
106                @Override
107                public boolean matches(Group group) {
108                        return true;
109                }
110        };
111
112        /**
113         * A map that is sorted by its values. Used for the treemap
114         *
115         * @author dmyerstu
116         *
117         * @param <T>
118         *            The key type
119         * @param <V>
120         *            The value type
121         */
122        private static class ValueComparator<T, V extends Comparable<V>> implements Comparator<T> {
123
124                private Map<T, V> map;
125
126                public ValueComparator(Map<T, V> map) {
127                        this.map = map;
128                }
129
130                @Override
131                public int compare(T o1, T o2) {
132                        return map.get(o1).compareTo(map.get(o2));
133                }
134
135        }
136
137        /**
138         * Creates a new AtomPositionMap containing peptide alpha-carbon atoms
139         *
140         * @param atoms
141         */
142        public AtomPositionMap(Atom[] atoms) {
143                this(atoms, AMINO_ACID_MATCHER);
144        }
145
146        /**
147         * Creates a new AtomPositionMap containing only atoms matched by {@code matcher}.
148         *
149         * If multiple atoms are present from a group, the first atom encountered will
150         * be used.
151         * @param atoms
152         */
153        public AtomPositionMap(Atom[] atoms, GroupMatcher matcher) {
154                hashMap = new HashMap<ResidueNumber, Integer>();
155                for (int i = 0; i < atoms.length; i++) {
156                        Group group = atoms[i].getGroup();
157                        ResidueNumber rn = group.getResidueNumber();
158                        if (matcher.matches(group)) {
159                                if (!hashMap.containsKey(rn)) {
160                                        hashMap.put(rn, i);
161                                }
162                        }
163                }
164                Comparator<ResidueNumber> vc = new ValueComparator<ResidueNumber, Integer>(hashMap);
165                treeMap = new TreeMap<ResidueNumber, Integer>(vc);
166                treeMap.putAll(hashMap);
167        }
168
169        /**
170         * Creates a new AtomPositionMap containing representative atoms
171         * from a structure.
172         * @param s
173         */
174        public AtomPositionMap(Structure s) {
175                this(StructureTools.getRepresentativeAtomArray(s));
176        }
177
178        /**
179         * Calculates the number of residues of the specified chain in a given range, inclusive.
180         * @param positionA index of the first atom to count
181         * @param positionB index of the last atom to count
182         * @param startingChain Case-sensitive chain
183         * @return The number of atoms between A and B inclusive belonging to the given chain
184         */
185        public int getLength(int positionA, int positionB, String startingChain) {
186
187                int positionStart, positionEnd;
188                if (positionA <= positionB) {
189                        positionStart = positionA;
190                        positionEnd = positionB;
191                } else {
192                        positionStart = positionB;
193                        positionEnd = positionA;
194                }
195
196                int count = 0;
197                // Inefficient search
198                for (Map.Entry<ResidueNumber, Integer> entry : treeMap.entrySet()) {
199                        if (entry.getKey().getChainId().equals(startingChain)
200                                        && positionStart <= entry.getValue()
201                                        && entry.getValue() <= positionEnd)
202                        {
203                                count++;
204                        }
205                }
206                return count;
207        }
208
209
210        /**
211         * Calculates the number of residues of the specified chain in a given range.
212         * Will return a negative value if the start is past the end.
213         * @param positionStart index of the first atom to count
214         * @param positionEnd index of the last atom to count
215         * @param startingChain Case-sensitive chain
216         * @return The number of atoms from A to B inclusive belonging to the given chain
217         */
218        public int getLengthDirectional(int positionStart, int positionEnd, String startingChain) {
219                int count = getLength(positionStart,positionEnd,startingChain);
220                if(positionStart <= positionEnd) {
221                        return count;
222                } else {
223                        return -count;
224                }
225        }
226
227        /**
228         * Calculates the number of atoms between two ResidueNumbers, inclusive. Both residues
229         * must belong to the same chain.
230         * @param start First residue
231         * @param end Last residue
232         * @return The number of atoms from A to B inclusive
233         * @throws IllegalArgumentException if start and end are on different chains,
234         *  or if either of the residues doesn't exist
235         */
236        public int getLength(ResidueNumber start, ResidueNumber end) {
237                if( ! start.getChainId().equals(end.getChainId())) {
238                        throw new IllegalArgumentException(String.format(
239                                        "Chains differ between %s and %s. Unable to calculate length.",
240                                        start,end));
241                }
242                Integer startPos = getPosition(start);
243                Integer endPos = getPosition(end);
244                if(startPos == null) {
245                        throw new IllegalArgumentException("Residue "+start+" was not found.");
246                }
247                if(endPos == null) {
248                        throw new IllegalArgumentException("Residue "+start+" was not found.");
249                }
250                return getLength(startPos, endPos, start.getChainId());
251        }
252
253        /**
254         * Calculates the number of atoms between two ResidueNumbers, inclusive. Both residues
255         * must belong to the same chain.
256         * Will return a negative value if the start is past the end.
257         * @param start First residue
258         * @param end Last residue
259         * @return The number of atoms from A to B inclusive
260         * @throws IllegalArgumentException if start and end are on different chains,
261         *  or if either of the residues doesn't exist
262         */
263        public int getLengthDirectional(ResidueNumber start, ResidueNumber end) {
264                if( ! start.getChainId().equals(end.getChainId())) {
265                        throw new IllegalArgumentException(String.format(
266                                        "Chains differ between %s and %s. Unable to calculate length.",
267                                        start,end));
268                }
269                Integer startPos = getPosition(start);
270                Integer endPos = getPosition(end);
271                if(startPos == null) {
272                        throw new IllegalArgumentException("Residue "+start+" was not found.");
273                }
274                if(endPos == null) {
275                        throw new IllegalArgumentException("Residue "+start+" was not found.");
276                }
277                return getLengthDirectional(startPos, endPos, start.getChainId());
278        }
279
280
281        public NavigableMap<ResidueNumber, Integer> getNavMap() {
282                return treeMap;
283        }
284
285        /**
286         * Gets the 0-based index of residueNumber to the matched atoms
287         * @param residueNumber
288         * @return The position of the ATOM record in the PDB file corresponding to
289         * the {@code residueNumber}, or null if the residueNumber was not found
290         */
291        public Integer getPosition(ResidueNumber residueNumber) {
292                return hashMap.get(residueNumber);
293        }
294
295        /**
296         * @param chainId
297         * @return The first {@link ResidueNumber} of the specified chain (the one highest down in the PDB file)
298         */
299        public ResidueNumber getFirst(String chainId) {
300                Map.Entry<ResidueNumber,Integer> entry = treeMap.firstEntry();
301                while (true) {
302                        if (entry.getKey().getChainId().equals(chainId)) return entry.getKey();
303                        entry = treeMap.higherEntry(entry.getKey());
304                        if (entry == null) return null;
305                }
306        }
307
308        /**
309         * @param chainId
310         * @return The last {@link ResidueNumber} of the specified chain (the one farthest down in the PDB file)
311         */
312        public ResidueNumber getLast(String chainId) {
313                Map.Entry<ResidueNumber,Integer> entry = treeMap.lastEntry();
314                while (true) {
315                        if (entry.getKey().getChainId().equals(chainId)) return entry.getKey();
316                        entry = treeMap.lowerEntry(entry.getKey());
317                        if (entry == null) return null;
318                }
319        }
320
321        /**
322         * @return The first {@link ResidueNumber} of any chain (the one farthest down in the PDB file)
323         */
324        public ResidueNumber getFirst() {
325                return treeMap.firstKey();
326        }
327
328        /**
329         * @return The last {@link ResidueNumber} of any chain (the one farthest down in the PDB file)
330         */
331        public ResidueNumber getLast() {
332                return treeMap.lastKey();
333        }
334
335        /**
336         * Returns a list of {@link ResidueRange ResidueRanges} corresponding to this entire AtomPositionMap.
337         */
338        public List<ResidueRangeAndLength> getRanges() {
339                String currentChain = "";
340                ResidueNumber first = null;
341                ResidueNumber prev = null;
342                List<ResidueRangeAndLength> ranges = new ArrayList<ResidueRangeAndLength>();
343                for (ResidueNumber rn : treeMap.keySet()) {
344                        if (!rn.getChainId().equals(currentChain)) {
345                                if (first != null) {
346                                        ResidueRangeAndLength newRange = new ResidueRangeAndLength(currentChain, first, prev, this.getLength(first, prev));
347                                        ranges.add(newRange);
348                                }
349                                first = rn;
350                        }
351                        prev = rn;
352                        currentChain = rn.getChainId();
353                }
354                ResidueRangeAndLength newRange = new ResidueRangeAndLength(currentChain, first, prev, this.getLength(first, prev));
355                ranges.add(newRange);
356                return ranges;
357        }
358
359        /**
360         * Trims a residue range so that both endpoints are contained in this map.
361         * @param rr
362         * @param map
363         * @return
364         */
365        public ResidueRangeAndLength trimToValidResidues(ResidueRange rr) {
366                ResidueNumber start = rr.getStart();
367                ResidueNumber end = rr.getEnd();
368                String chain = rr.getChainId();
369                // Add chainId
370                if(start.getChainId() == null) {
371                        start = new ResidueNumber(chain,start.getSeqNum(),start.getInsCode());
372                }
373                if(end.getChainId() == null) {
374                        end = new ResidueNumber(chain,end.getSeqNum(),end.getInsCode());
375                }
376                // Check that start and end are present in the map.
377                // If not, try to find the next valid residue
378                // (terminal residues sometimes lack CA atoms, so they don't appear)
379                Integer startIndex = getPosition(start);
380                if( startIndex == null) {
381                        // Assume that the residue numbers are sequential
382                        // Find startIndex such that the SeqNum is bigger than start's seqNum
383                        for(ResidueNumber key :  treeMap.keySet()) {
384                                if( !key.getChainId().equals(chain) )
385                                        continue;
386                                if( start.getSeqNum() <= key.getSeqNum() ) {
387                                        start = key;
388                                        startIndex = getPosition(key);
389                                        break;
390                                }
391                        }
392                        if( startIndex == null ) {
393                                logger.error("Unable to find Residue {} in AtomPositionMap, and no plausible substitute.",start);
394                                return null;
395                        } else {
396                                logger.warn("Unable to find Residue {}, so substituting {}.",rr.getStart(),start);
397                        }
398                }
399                Integer endIndex = getPosition(end);
400                if( endIndex == null) {
401                        // Assume that the residue numbers are sequential
402                        // Find startIndex such that the SeqNum is bigger than start's seqNum
403                        for(ResidueNumber key :  treeMap.descendingKeySet()) {
404                                if( !key.getChainId().equals(chain) )
405                                        continue;
406                                Integer value = getPosition(key);
407                                if( value < startIndex ) {
408                                        // start is before the end!
409                                        break;
410                                }
411                                if( end.getSeqNum() >= key.getSeqNum() ) {
412                                        end = key;
413                                        endIndex = value;
414                                        break;
415                                }
416                        }
417                        if( endIndex == null ) {
418                                logger.error("Unable to find Residue {} in AtomPositionMap, and no plausible substitute.",end);
419                                return null;
420                        } else {
421                                logger.warn("Unable to find Residue {}, so substituting {}.",rr.getEnd(),end);
422                        }
423                }
424
425                // now use those to calculate the length
426                // if start or end is null, will throw NPE
427                int length = getLength(startIndex, endIndex,chain);
428
429                return new ResidueRangeAndLength(chain, start, end, length);
430        }
431}