001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 2012-12-01
021 *
022 */
023
024package org.biojava.nbio.structure;
025
026import java.io.Serializable;
027import java.util.ArrayList;
028import java.util.Comparator;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032import java.util.NavigableMap;
033import java.util.TreeMap;
034
035import org.biojava.nbio.structure.chem.ChemComp;
036import org.biojava.nbio.structure.chem.PolymerType;
037import org.biojava.nbio.structure.chem.ResidueType;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041/**
042 * A map from {@link ResidueNumber ResidueNumbers} to ATOM record positions in a PDB file.
043 *
044 * <p>To use:
045 *
046 * <pre>
047 * Atom[] atoms = new AtomCache().getAtoms("1w0p");
048 * AtomPositionMap map = new AtomPositionMap(atoms);
049 * ResidueNumber start = new ResidueNumber("A", 100, null);
050 * ResidueNumber end = map.getEnd("A");
051 * int pos = map.getPosition(start);
052 * int length = map.calcLength(start, end);
053 * </pre>
054 *
055 * <p>Note: The getLength() methods were introduced in BioJava 4.0.0 to replace
056 * the calcLength methods. The new method returns the number of residues between
057 * two residues, inclusive, whereas the previous method returned 1 less than that.
058 * @author Douglas Myers-Turnbull
059 */
060public class AtomPositionMap {
061
062        private static final Logger logger = LoggerFactory.getLogger(AtomPositionMap.class);
063
064        private HashMap<ResidueNumber, Integer> hashMap;
065        private TreeMap<ResidueNumber, Integer> treeMap;
066
067
068        /**
069         * Used as a Predicate to indicate whether a particular Atom should be mapped
070         */
071        public static interface GroupMatcher {
072                boolean matches(Group group);
073        }
074
075        /**
076         * Matches CA atoms of protein groups
077         */
078        public static final GroupMatcher AMINO_ACID_MATCHER = new GroupMatcher() {
079                @Override
080                public boolean matches(Group group) {
081                        if( group == null )
082                                return false;
083                        ChemComp chem = group.getChemComp();
084                        if(chem == null)
085                                return false;
086                        // Get polymer type
087                        PolymerType polyType = chem.getPolymerType();
088                        if( polyType == null) {
089                                ResidueType type = chem.getResidueType();
090                                if(type != null ) {
091                                        polyType = type.getPolymerType();
092                                }
093                        }
094                        if( polyType == null ) {
095                                return false;
096                        }
097
098                        return PolymerType.PROTEIN_ONLY.contains(polyType)
099                                        && group.hasAtom(StructureTools.CA_ATOM_NAME);
100                }
101        };
102
103        /**
104         * Matches all atoms
105         */
106        public static final GroupMatcher ANYTHING_MATCHER = new GroupMatcher() {
107                @Override
108                public boolean matches(Group group) {
109                        return true;
110                }
111        };
112
113        /**
114         * A map that is sorted by its values. Used for the treemap
115         *
116         * @author dmyerstu
117         *
118         * @param <T>
119         *            The key type
120         * @param <V>
121         *            The value type
122         */
123        private static class ValueComparator<T, V extends Comparable<V>> implements Comparator<T>, Serializable {
124        private static final long serialVersionUID = 1;
125
126                private Map<T, V> map;
127
128                public ValueComparator(Map<T, V> map) {
129                        this.map = map;
130                }
131
132                @Override
133                public int compare(T o1, T o2) {
134                        return map.get(o1).compareTo(map.get(o2));
135                }
136
137        }
138
139        /**
140         * Creates a new AtomPositionMap containing peptide alpha-carbon atoms
141         *
142         * @param atoms
143         */
144        public AtomPositionMap(Atom[] atoms) {
145                this(atoms, AMINO_ACID_MATCHER);
146        }
147
148        /**
149         * Creates a new AtomPositionMap containing only atoms matched by {@code matcher}.
150         *
151         * If multiple atoms are present from a group, the first atom encountered will
152         * be used.
153         * @param atoms
154         */
155        public AtomPositionMap(Atom[] atoms, GroupMatcher matcher) {
156                hashMap = new HashMap<ResidueNumber, Integer>();
157                for (int i = 0; i < atoms.length; i++) {
158                        Group group = atoms[i].getGroup();
159                        ResidueNumber rn = group.getResidueNumber();
160                        if (matcher.matches(group)) {
161                                if (!hashMap.containsKey(rn)) {
162                                        hashMap.put(rn, i);
163                                }
164                        }
165                }
166                Comparator<ResidueNumber> vc = new ValueComparator<ResidueNumber, Integer>(hashMap);
167                treeMap = new TreeMap<ResidueNumber, Integer>(vc);
168                treeMap.putAll(hashMap);
169        }
170
171        /**
172         * Creates a new AtomPositionMap containing representative atoms
173         * from a structure.
174         * @param s
175         */
176        public AtomPositionMap(Structure s) {
177                this(StructureTools.getRepresentativeAtomArray(s));
178        }
179
180        /**
181         * Calculates the number of residues of the specified chain in a given range, inclusive.
182         * @param positionA index of the first atom to count
183         * @param positionB index of the last atom to count
184         * @param startingChain Case-sensitive chain
185         * @return The number of atoms between A and B inclusive belonging to the given chain
186         */
187        public int getLength(int positionA, int positionB, String startingChain) {
188
189                int positionStart, positionEnd;
190                if (positionA <= positionB) {
191                        positionStart = positionA;
192                        positionEnd = positionB;
193                } else {
194                        positionStart = positionB;
195                        positionEnd = positionA;
196                }
197
198                int count = 0;
199                // Inefficient search
200                for (Map.Entry<ResidueNumber, Integer> entry : treeMap.entrySet()) {
201                        if (entry.getKey().getChainName().equals(startingChain)
202                                        && positionStart <= entry.getValue()
203                                        && entry.getValue() <= positionEnd)
204                        {
205                                count++;
206                        }
207                }
208                return count;
209        }
210
211
212        /**
213         * Calculates the number of residues of the specified chain in a given range.
214         * Will return a negative value if the start is past the end.
215         * @param positionStart index of the first atom to count
216         * @param positionEnd index of the last atom to count
217         * @param startingChain Case-sensitive chain
218         * @return The number of atoms from A to B inclusive belonging to the given chain
219         */
220        public int getLengthDirectional(int positionStart, int positionEnd, String startingChain) {
221                int count = getLength(positionStart,positionEnd,startingChain);
222                if(positionStart <= positionEnd) {
223                        return count;
224                } else {
225                        return -count;
226                }
227        }
228
229        /**
230         * Calculates the number of atoms between two ResidueNumbers, inclusive. Both residues
231         * must belong to the same chain.
232         * @param start First residue
233         * @param end Last residue
234         * @return The number of atoms from A to B inclusive
235         * @throws IllegalArgumentException if start and end are on different chains,
236         *  or if either of the residues doesn't exist
237         */
238        public int getLength(ResidueNumber start, ResidueNumber end) {
239                if( ! start.getChainName().equals(end.getChainName())) {
240                        throw new IllegalArgumentException(String.format(
241                                        "Chains differ between %s and %s. Unable to calculate length.",
242                                        start,end));
243                }
244                Integer startPos = getPosition(start);
245                Integer endPos = getPosition(end);
246                if(startPos == null) {
247                        throw new IllegalArgumentException("Residue "+start+" was not found.");
248                }
249                if(endPos == null) {
250                        throw new IllegalArgumentException("Residue "+start+" was not found.");
251                }
252                return getLength(startPos, endPos, start.getChainName());
253        }
254
255        /**
256         * Calculates the number of atoms between two ResidueNumbers, inclusive. Both residues
257         * must belong to the same chain.
258         * Will return a negative value if the start is past the end.
259         * @param start First residue
260         * @param end Last residue
261         * @return The number of atoms from A to B inclusive
262         * @throws IllegalArgumentException if start and end are on different chains,
263         *  or if either of the residues doesn't exist
264         */
265        public int getLengthDirectional(ResidueNumber start, ResidueNumber end) {
266                if( ! start.getChainName().equals(end.getChainName())) {
267                        throw new IllegalArgumentException(String.format(
268                                        "Chains differ between %s and %s. Unable to calculate length.",
269                                        start,end));
270                }
271                Integer startPos = getPosition(start);
272                Integer endPos = getPosition(end);
273                if(startPos == null) {
274                        throw new IllegalArgumentException("Residue "+start+" was not found.");
275                }
276                if(endPos == null) {
277                        throw new IllegalArgumentException("Residue "+start+" was not found.");
278                }
279                return getLengthDirectional(startPos, endPos, start.getChainName());
280        }
281
282
283        public NavigableMap<ResidueNumber, Integer> getNavMap() {
284                return treeMap;
285        }
286
287        /**
288         * Gets the 0-based index of residueNumber to the matched atoms
289         * @param residueNumber
290         * @return The position of the ATOM record in the PDB file corresponding to
291         * the {@code residueNumber}, or null if the residueNumber was not found
292         */
293        public Integer getPosition(ResidueNumber residueNumber) {
294                return hashMap.get(residueNumber);
295        }
296
297        /**
298         * @param chainId
299         * @return The first {@link ResidueNumber} of the specified chain (the one highest down in the PDB file)
300         */
301        public ResidueNumber getFirst(String chainId) {
302                Map.Entry<ResidueNumber,Integer> entry = treeMap.firstEntry();
303                while (true) {
304                        if (entry.getKey().getChainName().equals(chainId)) return entry.getKey();
305                        entry = treeMap.higherEntry(entry.getKey());
306                        if (entry == null) return null;
307                }
308        }
309
310        /**
311         * @param chainId
312         * @return The last {@link ResidueNumber} of the specified chain (the one farthest down in the PDB file)
313         */
314        public ResidueNumber getLast(String chainId) {
315                Map.Entry<ResidueNumber,Integer> entry = treeMap.lastEntry();
316                while (true) {
317                        if (entry.getKey().getChainName().equals(chainId)) return entry.getKey();
318                        entry = treeMap.lowerEntry(entry.getKey());
319                        if (entry == null) return null;
320                }
321        }
322
323        /**
324         * @return The first {@link ResidueNumber} of any chain (the one farthest down in the PDB file)
325         */
326        public ResidueNumber getFirst() {
327                return treeMap.firstKey();
328        }
329
330        /**
331         * @return The last {@link ResidueNumber} of any chain (the one farthest down in the PDB file)
332         */
333        public ResidueNumber getLast() {
334                return treeMap.lastKey();
335        }
336
337        /**
338         * Returns a list of {@link ResidueRange ResidueRanges} corresponding to this entire AtomPositionMap.
339         */
340        public List<ResidueRangeAndLength> getRanges() {
341                String currentChain = "";
342                ResidueNumber first = null;
343                ResidueNumber prev = null;
344                List<ResidueRangeAndLength> ranges = new ArrayList<ResidueRangeAndLength>();
345                for (ResidueNumber rn : treeMap.keySet()) {
346                        if (!rn.getChainName().equals(currentChain)) {
347                                if (first != null) {
348                                        ResidueRangeAndLength newRange = new ResidueRangeAndLength(currentChain, first, prev, this.getLength(first, prev));
349                                        ranges.add(newRange);
350                                }
351                                first = rn;
352                        }
353                        prev = rn;
354                        currentChain = rn.getChainName();
355                }
356                ResidueRangeAndLength newRange = new ResidueRangeAndLength(currentChain, first, prev, this.getLength(first, prev));
357                ranges.add(newRange);
358                return ranges;
359        }
360
361        /**
362         * Trims a residue range so that both endpoints are contained in this map.
363         * @param rr residue range
364         * @return residue range and length
365         */
366        public ResidueRangeAndLength trimToValidResidues(ResidueRange rr) {
367                ResidueNumber start = rr.getStart();
368                ResidueNumber end = rr.getEnd();
369                String chain = rr.getChainName();
370                // Add chainName
371                if(start.getChainName() == null) {
372                        start = new ResidueNumber(chain,start.getSeqNum(),start.getInsCode());
373                }
374                if(end.getChainName() == null) {
375                        end = new ResidueNumber(chain,end.getSeqNum(),end.getInsCode());
376                }
377                // Check that start and end are present in the map.
378                // If not, try to find the next valid residue
379                // (terminal residues sometimes lack CA atoms, so they don't appear)
380                Integer startIndex = getPosition(start);
381                if( startIndex == null) {
382                        // Assume that the residue numbers are sequential
383                        // Find startIndex such that the SeqNum is bigger than start's seqNum
384                        for(ResidueNumber key :  treeMap.keySet()) {
385                                if( !key.getChainName().equals(chain) )
386                                        continue;
387                                if( start.getSeqNum() <= key.getSeqNum() ) {
388                                        start = key;
389                                        startIndex = getPosition(key);
390                                        break;
391                                }
392                        }
393                        if( startIndex == null ) {
394                                logger.error("Unable to find Residue {} in AtomPositionMap, and no plausible substitute.",start);
395                                return null;
396                        } else {
397                                logger.warn("Unable to find Residue {}, so substituting {}.",rr.getStart(),start);
398                        }
399                }
400                Integer endIndex = getPosition(end);
401                if( endIndex == null) {
402                        // Assume that the residue numbers are sequential
403                        // Find startIndex such that the SeqNum is bigger than start's seqNum
404                        for(ResidueNumber key :  treeMap.descendingKeySet()) {
405                                if( !key.getChainName().equals(chain) )
406                                        continue;
407                                Integer value = getPosition(key);
408                                if( value < startIndex ) {
409                                        // start is before the end!
410                                        break;
411                                }
412                                if( end.getSeqNum() >= key.getSeqNum() ) {
413                                        end = key;
414                                        endIndex = value;
415                                        break;
416                                }
417                        }
418                        if( endIndex == null ) {
419                                logger.error("Unable to find Residue {} in AtomPositionMap, and no plausible substitute.",end);
420                                return null;
421                        } else {
422                                logger.warn("Unable to find Residue {}, so substituting {}.",rr.getEnd(),end);
423                        }
424                }
425
426                // now use those to calculate the length
427                // if start or end is null, will throw NPE
428                int length = getLength(startIndex, endIndex,chain);
429
430                return new ResidueRangeAndLength(chain, start, end, length);
431        }
432}