001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 2012-12-01 021 * 022 */ 023 024package org.biojava.nbio.structure; 025 026import java.util.ArrayList; 027import java.util.Comparator; 028import java.util.HashMap; 029import java.util.List; 030import java.util.Map; 031import java.util.NavigableMap; 032import java.util.TreeMap; 033 034import org.biojava.nbio.structure.io.mmcif.chem.PolymerType; 035import org.biojava.nbio.structure.io.mmcif.chem.ResidueType; 036import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 037import org.slf4j.Logger; 038import org.slf4j.LoggerFactory; 039 040/** 041 * A map from {@link ResidueNumber ResidueNumbers} to ATOM record positions in a PDB file. 042 * 043 * <p>To use: 044 * 045 * <pre> 046 * Atom[] atoms = new AtomCache().getAtoms("1w0p"); 047 * AtomPositionMap map = new AtomPositionMap(atoms); 048 * ResidueNumber start = new ResidueNumber("A", 100, null); 049 * ResidueNumber end = map.getEnd("A"); 050 * int pos = map.getPosition(start); 051 * int length = map.calcLength(start, end); 052 * </pre> 053 * 054 * <p>Note: The getLength() methods were introduced in BioJava 4.0.0 to replace 055 * the calcLength methods. The new method returns the number of residues between 056 * two residues, inclusive, whereas the previous method returned 1 less than that. 057 * @author dmyerstu 058 */ 059public class AtomPositionMap { 060 061 private static final Logger logger = LoggerFactory.getLogger(AtomPositionMap.class); 062 063 private HashMap<ResidueNumber, Integer> hashMap; 064 private TreeMap<ResidueNumber, Integer> treeMap; 065 066 067 /** 068 * Used as a Predicate to indicate whether a particular Atom should be mapped 069 */ 070 public static interface GroupMatcher { 071 boolean matches(Group group); 072 } 073 074 /** 075 * Matches CA atoms of protein groups 076 */ 077 public static final GroupMatcher AMINO_ACID_MATCHER = new GroupMatcher() { 078 @Override 079 public boolean matches(Group group) { 080 if( group == null ) 081 return false; 082 ChemComp chem = group.getChemComp(); 083 if(chem == null) 084 return false; 085 // Get polymer type 086 PolymerType polyType = chem.getPolymerType(); 087 if( polyType == null) { 088 ResidueType type = chem.getResidueType(); 089 if(type != null ) { 090 polyType = type.getPolymerType(); 091 } 092 } 093 if( polyType == null ) { 094 return false; 095 } 096 097 return PolymerType.PROTEIN_ONLY.contains(polyType) 098 && group.hasAtom(StructureTools.CA_ATOM_NAME); 099 } 100 }; 101 102 /** 103 * Matches all atoms 104 */ 105 public static final GroupMatcher ANYTHING_MATCHER = new GroupMatcher() { 106 @Override 107 public boolean matches(Group group) { 108 return true; 109 } 110 }; 111 112 /** 113 * A map that is sorted by its values. Used for the treemap 114 * 115 * @author dmyerstu 116 * 117 * @param <T> 118 * The key type 119 * @param <V> 120 * The value type 121 */ 122 private static class ValueComparator<T, V extends Comparable<V>> implements Comparator<T> { 123 124 private Map<T, V> map; 125 126 public ValueComparator(Map<T, V> map) { 127 this.map = map; 128 } 129 130 @Override 131 public int compare(T o1, T o2) { 132 return map.get(o1).compareTo(map.get(o2)); 133 } 134 135 } 136 137 /** 138 * Creates a new AtomPositionMap containing peptide alpha-carbon atoms 139 * 140 * @param atoms 141 */ 142 public AtomPositionMap(Atom[] atoms) { 143 this(atoms, AMINO_ACID_MATCHER); 144 } 145 146 /** 147 * Creates a new AtomPositionMap containing only atoms matched by {@code matcher}. 148 * 149 * If multiple atoms are present from a group, the first atom encountered will 150 * be used. 151 * @param atoms 152 */ 153 public AtomPositionMap(Atom[] atoms, GroupMatcher matcher) { 154 hashMap = new HashMap<ResidueNumber, Integer>(); 155 for (int i = 0; i < atoms.length; i++) { 156 Group group = atoms[i].getGroup(); 157 ResidueNumber rn = group.getResidueNumber(); 158 if (matcher.matches(group)) { 159 if (!hashMap.containsKey(rn)) { 160 hashMap.put(rn, i); 161 } 162 } 163 } 164 Comparator<ResidueNumber> vc = new ValueComparator<ResidueNumber, Integer>(hashMap); 165 treeMap = new TreeMap<ResidueNumber, Integer>(vc); 166 treeMap.putAll(hashMap); 167 } 168 169 /** 170 * Creates a new AtomPositionMap containing representative atoms 171 * from a structure. 172 * @param s 173 */ 174 public AtomPositionMap(Structure s) { 175 this(StructureTools.getRepresentativeAtomArray(s)); 176 } 177 178 /** 179 * Calculates the number of residues of the specified chain in a given range, inclusive. 180 * @param positionA index of the first atom to count 181 * @param positionB index of the last atom to count 182 * @param startingChain Case-sensitive chain 183 * @return The number of atoms between A and B inclusive belonging to the given chain 184 */ 185 public int getLength(int positionA, int positionB, String startingChain) { 186 187 int positionStart, positionEnd; 188 if (positionA <= positionB) { 189 positionStart = positionA; 190 positionEnd = positionB; 191 } else { 192 positionStart = positionB; 193 positionEnd = positionA; 194 } 195 196 int count = 0; 197 // Inefficient search 198 for (Map.Entry<ResidueNumber, Integer> entry : treeMap.entrySet()) { 199 if (entry.getKey().getChainId().equals(startingChain) 200 && positionStart <= entry.getValue() 201 && entry.getValue() <= positionEnd) 202 { 203 count++; 204 } 205 } 206 return count; 207 } 208 209 210 /** 211 * Calculates the number of residues of the specified chain in a given range. 212 * Will return a negative value if the start is past the end. 213 * @param positionStart index of the first atom to count 214 * @param positionEnd index of the last atom to count 215 * @param startingChain Case-sensitive chain 216 * @return The number of atoms from A to B inclusive belonging to the given chain 217 */ 218 public int getLengthDirectional(int positionStart, int positionEnd, String startingChain) { 219 int count = getLength(positionStart,positionEnd,startingChain); 220 if(positionStart <= positionEnd) { 221 return count; 222 } else { 223 return -count; 224 } 225 } 226 227 /** 228 * Calculates the number of atoms between two ResidueNumbers, inclusive. Both residues 229 * must belong to the same chain. 230 * @param start First residue 231 * @param end Last residue 232 * @return The number of atoms from A to B inclusive 233 * @throws IllegalArgumentException if start and end are on different chains, 234 * or if either of the residues doesn't exist 235 */ 236 public int getLength(ResidueNumber start, ResidueNumber end) { 237 if( ! start.getChainId().equals(end.getChainId())) { 238 throw new IllegalArgumentException(String.format( 239 "Chains differ between %s and %s. Unable to calculate length.", 240 start,end)); 241 } 242 Integer startPos = getPosition(start); 243 Integer endPos = getPosition(end); 244 if(startPos == null) { 245 throw new IllegalArgumentException("Residue "+start+" was not found."); 246 } 247 if(endPos == null) { 248 throw new IllegalArgumentException("Residue "+start+" was not found."); 249 } 250 return getLength(startPos, endPos, start.getChainId()); 251 } 252 253 /** 254 * Calculates the number of atoms between two ResidueNumbers, inclusive. Both residues 255 * must belong to the same chain. 256 * Will return a negative value if the start is past the end. 257 * @param start First residue 258 * @param end Last residue 259 * @return The number of atoms from A to B inclusive 260 * @throws IllegalArgumentException if start and end are on different chains, 261 * or if either of the residues doesn't exist 262 */ 263 public int getLengthDirectional(ResidueNumber start, ResidueNumber end) { 264 if( ! start.getChainId().equals(end.getChainId())) { 265 throw new IllegalArgumentException(String.format( 266 "Chains differ between %s and %s. Unable to calculate length.", 267 start,end)); 268 } 269 Integer startPos = getPosition(start); 270 Integer endPos = getPosition(end); 271 if(startPos == null) { 272 throw new IllegalArgumentException("Residue "+start+" was not found."); 273 } 274 if(endPos == null) { 275 throw new IllegalArgumentException("Residue "+start+" was not found."); 276 } 277 return getLengthDirectional(startPos, endPos, start.getChainId()); 278 } 279 280 281 public NavigableMap<ResidueNumber, Integer> getNavMap() { 282 return treeMap; 283 } 284 285 /** 286 * Gets the 0-based index of residueNumber to the matched atoms 287 * @param residueNumber 288 * @return The position of the ATOM record in the PDB file corresponding to 289 * the {@code residueNumber}, or null if the residueNumber was not found 290 */ 291 public Integer getPosition(ResidueNumber residueNumber) { 292 return hashMap.get(residueNumber); 293 } 294 295 /** 296 * @param chainId 297 * @return The first {@link ResidueNumber} of the specified chain (the one highest down in the PDB file) 298 */ 299 public ResidueNumber getFirst(String chainId) { 300 Map.Entry<ResidueNumber,Integer> entry = treeMap.firstEntry(); 301 while (true) { 302 if (entry.getKey().getChainId().equals(chainId)) return entry.getKey(); 303 entry = treeMap.higherEntry(entry.getKey()); 304 if (entry == null) return null; 305 } 306 } 307 308 /** 309 * @param chainId 310 * @return The last {@link ResidueNumber} of the specified chain (the one farthest down in the PDB file) 311 */ 312 public ResidueNumber getLast(String chainId) { 313 Map.Entry<ResidueNumber,Integer> entry = treeMap.lastEntry(); 314 while (true) { 315 if (entry.getKey().getChainId().equals(chainId)) return entry.getKey(); 316 entry = treeMap.lowerEntry(entry.getKey()); 317 if (entry == null) return null; 318 } 319 } 320 321 /** 322 * @return The first {@link ResidueNumber} of any chain (the one farthest down in the PDB file) 323 */ 324 public ResidueNumber getFirst() { 325 return treeMap.firstKey(); 326 } 327 328 /** 329 * @return The last {@link ResidueNumber} of any chain (the one farthest down in the PDB file) 330 */ 331 public ResidueNumber getLast() { 332 return treeMap.lastKey(); 333 } 334 335 /** 336 * Returns a list of {@link ResidueRange ResidueRanges} corresponding to this entire AtomPositionMap. 337 */ 338 public List<ResidueRangeAndLength> getRanges() { 339 String currentChain = ""; 340 ResidueNumber first = null; 341 ResidueNumber prev = null; 342 List<ResidueRangeAndLength> ranges = new ArrayList<ResidueRangeAndLength>(); 343 for (ResidueNumber rn : treeMap.keySet()) { 344 if (!rn.getChainId().equals(currentChain)) { 345 if (first != null) { 346 ResidueRangeAndLength newRange = new ResidueRangeAndLength(currentChain, first, prev, this.getLength(first, prev)); 347 ranges.add(newRange); 348 } 349 first = rn; 350 } 351 prev = rn; 352 currentChain = rn.getChainId(); 353 } 354 ResidueRangeAndLength newRange = new ResidueRangeAndLength(currentChain, first, prev, this.getLength(first, prev)); 355 ranges.add(newRange); 356 return ranges; 357 } 358 359 /** 360 * Trims a residue range so that both endpoints are contained in this map. 361 * @param rr 362 * @param map 363 * @return 364 */ 365 public ResidueRangeAndLength trimToValidResidues(ResidueRange rr) { 366 ResidueNumber start = rr.getStart(); 367 ResidueNumber end = rr.getEnd(); 368 String chain = rr.getChainId(); 369 // Add chainId 370 if(start.getChainId() == null) { 371 start = new ResidueNumber(chain,start.getSeqNum(),start.getInsCode()); 372 } 373 if(end.getChainId() == null) { 374 end = new ResidueNumber(chain,end.getSeqNum(),end.getInsCode()); 375 } 376 // Check that start and end are present in the map. 377 // If not, try to find the next valid residue 378 // (terminal residues sometimes lack CA atoms, so they don't appear) 379 Integer startIndex = getPosition(start); 380 if( startIndex == null) { 381 // Assume that the residue numbers are sequential 382 // Find startIndex such that the SeqNum is bigger than start's seqNum 383 for(ResidueNumber key : treeMap.keySet()) { 384 if( !key.getChainId().equals(chain) ) 385 continue; 386 if( start.getSeqNum() <= key.getSeqNum() ) { 387 start = key; 388 startIndex = getPosition(key); 389 break; 390 } 391 } 392 if( startIndex == null ) { 393 logger.error("Unable to find Residue {} in AtomPositionMap, and no plausible substitute.",start); 394 return null; 395 } else { 396 logger.warn("Unable to find Residue {}, so substituting {}.",rr.getStart(),start); 397 } 398 } 399 Integer endIndex = getPosition(end); 400 if( endIndex == null) { 401 // Assume that the residue numbers are sequential 402 // Find startIndex such that the SeqNum is bigger than start's seqNum 403 for(ResidueNumber key : treeMap.descendingKeySet()) { 404 if( !key.getChainId().equals(chain) ) 405 continue; 406 Integer value = getPosition(key); 407 if( value < startIndex ) { 408 // start is before the end! 409 break; 410 } 411 if( end.getSeqNum() >= key.getSeqNum() ) { 412 end = key; 413 endIndex = value; 414 break; 415 } 416 } 417 if( endIndex == null ) { 418 logger.error("Unable to find Residue {} in AtomPositionMap, and no plausible substitute.",end); 419 return null; 420 } else { 421 logger.warn("Unable to find Residue {}, so substituting {}.",rr.getEnd(),end); 422 } 423 } 424 425 // now use those to calculate the length 426 // if start or end is null, will throw NPE 427 int length = getLength(startIndex, endIndex,chain); 428 429 return new ResidueRangeAndLength(chain, start, end, length); 430 } 431}