001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 2012-12-01 021 * 022 */ 023 024package org.biojava.nbio.structure; 025 026import java.io.Serializable; 027import java.util.ArrayList; 028import java.util.Comparator; 029import java.util.HashMap; 030import java.util.List; 031import java.util.Map; 032import java.util.NavigableMap; 033import java.util.TreeMap; 034 035import org.biojava.nbio.structure.io.mmcif.chem.PolymerType; 036import org.biojava.nbio.structure.io.mmcif.chem.ResidueType; 037import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040 041/** 042 * A map from {@link ResidueNumber ResidueNumbers} to ATOM record positions in a PDB file. 043 * 044 * <p>To use: 045 * 046 * <pre> 047 * Atom[] atoms = new AtomCache().getAtoms("1w0p"); 048 * AtomPositionMap map = new AtomPositionMap(atoms); 049 * ResidueNumber start = new ResidueNumber("A", 100, null); 050 * ResidueNumber end = map.getEnd("A"); 051 * int pos = map.getPosition(start); 052 * int length = map.calcLength(start, end); 053 * </pre> 054 * 055 * <p>Note: The getLength() methods were introduced in BioJava 4.0.0 to replace 056 * the calcLength methods. The new method returns the number of residues between 057 * two residues, inclusive, whereas the previous method returned 1 less than that. 058 * @author Douglas Myers-Turnbull 059 */ 060public class AtomPositionMap { 061 062 private static final Logger logger = LoggerFactory.getLogger(AtomPositionMap.class); 063 064 private HashMap<ResidueNumber, Integer> hashMap; 065 private TreeMap<ResidueNumber, Integer> treeMap; 066 067 068 /** 069 * Used as a Predicate to indicate whether a particular Atom should be mapped 070 */ 071 public static interface GroupMatcher { 072 boolean matches(Group group); 073 } 074 075 /** 076 * Matches CA atoms of protein groups 077 */ 078 public static final GroupMatcher AMINO_ACID_MATCHER = new GroupMatcher() { 079 @Override 080 public boolean matches(Group group) { 081 if( group == null ) 082 return false; 083 ChemComp chem = group.getChemComp(); 084 if(chem == null) 085 return false; 086 // Get polymer type 087 PolymerType polyType = chem.getPolymerType(); 088 if( polyType == null) { 089 ResidueType type = chem.getResidueType(); 090 if(type != null ) { 091 polyType = type.getPolymerType(); 092 } 093 } 094 if( polyType == null ) { 095 return false; 096 } 097 098 return PolymerType.PROTEIN_ONLY.contains(polyType) 099 && group.hasAtom(StructureTools.CA_ATOM_NAME); 100 } 101 }; 102 103 /** 104 * Matches all atoms 105 */ 106 public static final GroupMatcher ANYTHING_MATCHER = new GroupMatcher() { 107 @Override 108 public boolean matches(Group group) { 109 return true; 110 } 111 }; 112 113 /** 114 * A map that is sorted by its values. Used for the treemap 115 * 116 * @author dmyerstu 117 * 118 * @param <T> 119 * The key type 120 * @param <V> 121 * The value type 122 */ 123 private static class ValueComparator<T, V extends Comparable<V>> implements Comparator<T>, Serializable { 124 private static final long serialVersionUID = 1; 125 126 private Map<T, V> map; 127 128 public ValueComparator(Map<T, V> map) { 129 this.map = map; 130 } 131 132 @Override 133 public int compare(T o1, T o2) { 134 return map.get(o1).compareTo(map.get(o2)); 135 } 136 137 } 138 139 /** 140 * Creates a new AtomPositionMap containing peptide alpha-carbon atoms 141 * 142 * @param atoms 143 */ 144 public AtomPositionMap(Atom[] atoms) { 145 this(atoms, AMINO_ACID_MATCHER); 146 } 147 148 /** 149 * Creates a new AtomPositionMap containing only atoms matched by {@code matcher}. 150 * 151 * If multiple atoms are present from a group, the first atom encountered will 152 * be used. 153 * @param atoms 154 */ 155 public AtomPositionMap(Atom[] atoms, GroupMatcher matcher) { 156 hashMap = new HashMap<ResidueNumber, Integer>(); 157 for (int i = 0; i < atoms.length; i++) { 158 Group group = atoms[i].getGroup(); 159 ResidueNumber rn = group.getResidueNumber(); 160 if (matcher.matches(group)) { 161 if (!hashMap.containsKey(rn)) { 162 hashMap.put(rn, i); 163 } 164 } 165 } 166 Comparator<ResidueNumber> vc = new ValueComparator<ResidueNumber, Integer>(hashMap); 167 treeMap = new TreeMap<ResidueNumber, Integer>(vc); 168 treeMap.putAll(hashMap); 169 } 170 171 /** 172 * Creates a new AtomPositionMap containing representative atoms 173 * from a structure. 174 * @param s 175 */ 176 public AtomPositionMap(Structure s) { 177 this(StructureTools.getRepresentativeAtomArray(s)); 178 } 179 180 /** 181 * Calculates the number of residues of the specified chain in a given range, inclusive. 182 * @param positionA index of the first atom to count 183 * @param positionB index of the last atom to count 184 * @param startingChain Case-sensitive chain 185 * @return The number of atoms between A and B inclusive belonging to the given chain 186 */ 187 public int getLength(int positionA, int positionB, String startingChain) { 188 189 int positionStart, positionEnd; 190 if (positionA <= positionB) { 191 positionStart = positionA; 192 positionEnd = positionB; 193 } else { 194 positionStart = positionB; 195 positionEnd = positionA; 196 } 197 198 int count = 0; 199 // Inefficient search 200 for (Map.Entry<ResidueNumber, Integer> entry : treeMap.entrySet()) { 201 if (entry.getKey().getChainName().equals(startingChain) 202 && positionStart <= entry.getValue() 203 && entry.getValue() <= positionEnd) 204 { 205 count++; 206 } 207 } 208 return count; 209 } 210 211 212 /** 213 * Calculates the number of residues of the specified chain in a given range. 214 * Will return a negative value if the start is past the end. 215 * @param positionStart index of the first atom to count 216 * @param positionEnd index of the last atom to count 217 * @param startingChain Case-sensitive chain 218 * @return The number of atoms from A to B inclusive belonging to the given chain 219 */ 220 public int getLengthDirectional(int positionStart, int positionEnd, String startingChain) { 221 int count = getLength(positionStart,positionEnd,startingChain); 222 if(positionStart <= positionEnd) { 223 return count; 224 } else { 225 return -count; 226 } 227 } 228 229 /** 230 * Calculates the number of atoms between two ResidueNumbers, inclusive. Both residues 231 * must belong to the same chain. 232 * @param start First residue 233 * @param end Last residue 234 * @return The number of atoms from A to B inclusive 235 * @throws IllegalArgumentException if start and end are on different chains, 236 * or if either of the residues doesn't exist 237 */ 238 public int getLength(ResidueNumber start, ResidueNumber end) { 239 if( ! start.getChainName().equals(end.getChainName())) { 240 throw new IllegalArgumentException(String.format( 241 "Chains differ between %s and %s. Unable to calculate length.", 242 start,end)); 243 } 244 Integer startPos = getPosition(start); 245 Integer endPos = getPosition(end); 246 if(startPos == null) { 247 throw new IllegalArgumentException("Residue "+start+" was not found."); 248 } 249 if(endPos == null) { 250 throw new IllegalArgumentException("Residue "+start+" was not found."); 251 } 252 return getLength(startPos, endPos, start.getChainName()); 253 } 254 255 /** 256 * Calculates the number of atoms between two ResidueNumbers, inclusive. Both residues 257 * must belong to the same chain. 258 * Will return a negative value if the start is past the end. 259 * @param start First residue 260 * @param end Last residue 261 * @return The number of atoms from A to B inclusive 262 * @throws IllegalArgumentException if start and end are on different chains, 263 * or if either of the residues doesn't exist 264 */ 265 public int getLengthDirectional(ResidueNumber start, ResidueNumber end) { 266 if( ! start.getChainName().equals(end.getChainName())) { 267 throw new IllegalArgumentException(String.format( 268 "Chains differ between %s and %s. Unable to calculate length.", 269 start,end)); 270 } 271 Integer startPos = getPosition(start); 272 Integer endPos = getPosition(end); 273 if(startPos == null) { 274 throw new IllegalArgumentException("Residue "+start+" was not found."); 275 } 276 if(endPos == null) { 277 throw new IllegalArgumentException("Residue "+start+" was not found."); 278 } 279 return getLengthDirectional(startPos, endPos, start.getChainName()); 280 } 281 282 283 public NavigableMap<ResidueNumber, Integer> getNavMap() { 284 return treeMap; 285 } 286 287 /** 288 * Gets the 0-based index of residueNumber to the matched atoms 289 * @param residueNumber 290 * @return The position of the ATOM record in the PDB file corresponding to 291 * the {@code residueNumber}, or null if the residueNumber was not found 292 */ 293 public Integer getPosition(ResidueNumber residueNumber) { 294 return hashMap.get(residueNumber); 295 } 296 297 /** 298 * @param chainId 299 * @return The first {@link ResidueNumber} of the specified chain (the one highest down in the PDB file) 300 */ 301 public ResidueNumber getFirst(String chainId) { 302 Map.Entry<ResidueNumber,Integer> entry = treeMap.firstEntry(); 303 while (true) { 304 if (entry.getKey().getChainName().equals(chainId)) return entry.getKey(); 305 entry = treeMap.higherEntry(entry.getKey()); 306 if (entry == null) return null; 307 } 308 } 309 310 /** 311 * @param chainId 312 * @return The last {@link ResidueNumber} of the specified chain (the one farthest down in the PDB file) 313 */ 314 public ResidueNumber getLast(String chainId) { 315 Map.Entry<ResidueNumber,Integer> entry = treeMap.lastEntry(); 316 while (true) { 317 if (entry.getKey().getChainName().equals(chainId)) return entry.getKey(); 318 entry = treeMap.lowerEntry(entry.getKey()); 319 if (entry == null) return null; 320 } 321 } 322 323 /** 324 * @return The first {@link ResidueNumber} of any chain (the one farthest down in the PDB file) 325 */ 326 public ResidueNumber getFirst() { 327 return treeMap.firstKey(); 328 } 329 330 /** 331 * @return The last {@link ResidueNumber} of any chain (the one farthest down in the PDB file) 332 */ 333 public ResidueNumber getLast() { 334 return treeMap.lastKey(); 335 } 336 337 /** 338 * Returns a list of {@link ResidueRange ResidueRanges} corresponding to this entire AtomPositionMap. 339 */ 340 public List<ResidueRangeAndLength> getRanges() { 341 String currentChain = ""; 342 ResidueNumber first = null; 343 ResidueNumber prev = null; 344 List<ResidueRangeAndLength> ranges = new ArrayList<ResidueRangeAndLength>(); 345 for (ResidueNumber rn : treeMap.keySet()) { 346 if (!rn.getChainName().equals(currentChain)) { 347 if (first != null) { 348 ResidueRangeAndLength newRange = new ResidueRangeAndLength(currentChain, first, prev, this.getLength(first, prev)); 349 ranges.add(newRange); 350 } 351 first = rn; 352 } 353 prev = rn; 354 currentChain = rn.getChainName(); 355 } 356 ResidueRangeAndLength newRange = new ResidueRangeAndLength(currentChain, first, prev, this.getLength(first, prev)); 357 ranges.add(newRange); 358 return ranges; 359 } 360 361 /** 362 * Trims a residue range so that both endpoints are contained in this map. 363 * @param rr residue range 364 * @return residue range and length 365 */ 366 public ResidueRangeAndLength trimToValidResidues(ResidueRange rr) { 367 ResidueNumber start = rr.getStart(); 368 ResidueNumber end = rr.getEnd(); 369 String chain = rr.getChainName(); 370 // Add chainName 371 if(start.getChainName() == null) { 372 start = new ResidueNumber(chain,start.getSeqNum(),start.getInsCode()); 373 } 374 if(end.getChainName() == null) { 375 end = new ResidueNumber(chain,end.getSeqNum(),end.getInsCode()); 376 } 377 // Check that start and end are present in the map. 378 // If not, try to find the next valid residue 379 // (terminal residues sometimes lack CA atoms, so they don't appear) 380 Integer startIndex = getPosition(start); 381 if( startIndex == null) { 382 // Assume that the residue numbers are sequential 383 // Find startIndex such that the SeqNum is bigger than start's seqNum 384 for(ResidueNumber key : treeMap.keySet()) { 385 if( !key.getChainName().equals(chain) ) 386 continue; 387 if( start.getSeqNum() <= key.getSeqNum() ) { 388 start = key; 389 startIndex = getPosition(key); 390 break; 391 } 392 } 393 if( startIndex == null ) { 394 logger.error("Unable to find Residue {} in AtomPositionMap, and no plausible substitute.",start); 395 return null; 396 } else { 397 logger.warn("Unable to find Residue {}, so substituting {}.",rr.getStart(),start); 398 } 399 } 400 Integer endIndex = getPosition(end); 401 if( endIndex == null) { 402 // Assume that the residue numbers are sequential 403 // Find startIndex such that the SeqNum is bigger than start's seqNum 404 for(ResidueNumber key : treeMap.descendingKeySet()) { 405 if( !key.getChainName().equals(chain) ) 406 continue; 407 Integer value = getPosition(key); 408 if( value < startIndex ) { 409 // start is before the end! 410 break; 411 } 412 if( end.getSeqNum() >= key.getSeqNum() ) { 413 end = key; 414 endIndex = value; 415 break; 416 } 417 } 418 if( endIndex == null ) { 419 logger.error("Unable to find Residue {} in AtomPositionMap, and no plausible substitute.",end); 420 return null; 421 } else { 422 logger.warn("Unable to find Residue {}, so substituting {}.",rr.getEnd(),end); 423 } 424 } 425 426 // now use those to calculate the length 427 // if start or end is null, will throw NPE 428 int length = getLength(startIndex, endIndex,chain); 429 430 return new ResidueRangeAndLength(chain, start, end, length); 431 } 432}