001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * 021 */ 022package org.biojava.nbio.structure.basepairs; 023 024import org.biojava.nbio.structure.Structure; 025import org.biojava.nbio.structure.Chain; 026import org.biojava.nbio.structure.Group; 027import org.biojava.nbio.structure.Atom; 028import org.biojava.nbio.structure.contact.Pair; 029import org.biojava.nbio.structure.geometry.SuperPosition; 030import org.biojava.nbio.structure.geometry.SuperPositionQCP; 031import org.biojava.nbio.structure.io.PDBFileReader; 032 033import javax.vecmath.Matrix4d; 034import javax.vecmath.Point3d; 035import java.io.ByteArrayInputStream; 036import java.io.IOException; 037import java.io.Serializable; 038import java.util.List; 039import java.util.Locale; 040import java.util.Arrays; 041import java.util.ArrayList; 042import java.util.Map; 043import java.util.HashMap; 044 045import org.slf4j.Logger; 046import org.slf4j.LoggerFactory; 047 048import static java.lang.Math.sin; 049import static java.lang.Math.cos; 050import static java.lang.Math.atan2; 051import static java.lang.Math.acos; 052import static java.lang.Math.PI; 053 054/** 055 * This module calculates the el Hassan-Calladine Base Pairing and Base-pair Step Parameters for any nucleic 056 * acid containing structure that has the information about the core base-pair rings. 057 * Citation: https://www.ncbi.nlm.nih.gov/pubmed/11601858 058 * 059 * The method that is usually overridden is findPairs(), this base implementation is used for a large-scale 060 * analysis of the most proper helical regions in almost 4000 protein-DNA structures, almost 061 * 2000 structures containing only DNA, or almost 1300 structures containing only RNA. (as of 7/2017). 062 * Those who study tertiary structures for RNA folding should use the TertiaryBasePairParameters, 063 * because this base class is only looking for base pairs between separate strands that exactly line up. 064 * To relax the lining up policy and allow for non-canonical base pairs, use the MismatchedBasePairParameters 065 * class, which will not consider intra-strand base pairing. 066 * 067 * @author Luke Czapla 068 * @since 5.0.0 069 * 070 */ 071public class BasePairParameters implements Serializable { 072 073 private static final long serialVersionUID = 6214502385L; 074 private static Logger log = LoggerFactory.getLogger(BasePairParameters.class); 075 076 // See URL http://ndbserver.rutgers.edu/ndbmodule/archives/reports/tsukuba/Table1.html 077 // and the paper cited at the top of this class (also as Table 1). 078 // These are hard-coded to avoid problems with resource paths. 079 public static final String[] STANDARD_BASES = new String[] { 080 "SEQRES 1 A 1 A\n" + 081 "ATOM 2 N9 A A 1 -1.291 4.498 0.000\n" + 082 "ATOM 3 C8 A A 1 0.024 4.897 0.000\n" + 083 "ATOM 4 N7 A A 1 0.877 3.902 0.000\n" + 084 "ATOM 5 C5 A A 1 0.071 2.771 0.000\n" + 085 "ATOM 6 C6 A A 1 0.369 1.398 0.000\n" + 086 "ATOM 8 N1 A A 1 -0.668 0.532 0.000\n" + 087 "ATOM 9 C2 A A 1 -1.912 1.023 0.000\n" + 088 "ATOM 10 N3 A A 1 -2.320 2.290 0.000\n" + 089 "ATOM 11 C4 A A 1 -1.267 3.124 0.000\n" + 090 "END", 091 "SEQRES 1 A 1 G\n" + 092 "ATOM 2 N9 G A 1 -1.289 4.551 0.000\n" + 093 "ATOM 3 C8 G A 1 0.023 4.962 0.000\n" + 094 "ATOM 4 N7 G A 1 0.870 3.969 0.000\n" + 095 "ATOM 5 C5 G A 1 0.071 2.833 0.000\n" + 096 "ATOM 6 C6 G A 1 0.424 1.460 0.000\n" + 097 "ATOM 8 N1 G A 1 -0.700 0.641 0.000\n" + 098 "ATOM 9 C2 G A 1 -1.999 1.087 0.000\n" + 099 "ATOM 11 N3 G A 1 -2.342 2.364 0.001\n" + 100 "ATOM 12 C4 G A 1 -1.265 3.177 0.000\n" + 101 "END", 102 "SEQRES 1 A 1 T\n" + 103 "ATOM 2 N1 T A 1 -1.284 4.500 0.000\n" + 104 "ATOM 3 C2 T A 1 -1.462 3.135 0.000\n" + 105 "ATOM 5 N3 T A 1 -0.298 2.407 0.000\n" + 106 "ATOM 6 C4 T A 1 0.994 2.897 0.000\n" + 107 "ATOM 8 C5 T A 1 1.106 4.338 0.000\n" + 108 "ATOM 10 C6 T A 1 -0.024 5.057 0.000\n" + 109 "END", 110 "SEQRES 1 A 1 C\n" + 111 "ATOM 2 N1 C A 1 -1.285 4.542 0.000\n" + 112 "ATOM 3 C2 C A 1 -1.472 3.158 0.000\n" + 113 "ATOM 5 N3 C A 1 -0.391 2.344 0.000\n" + 114 "ATOM 6 C4 C A 1 0.837 2.868 0.000\n" + 115 "ATOM 8 C5 C A 1 1.056 4.275 0.000\n" + 116 "ATOM 9 C6 C A 1 -0.023 5.068 0.000\n" + 117 "END", 118 "SEQRES 1 A 1 U\n" + 119 "ATOM 2 N1 U A 1 -1.284 4.500 0.000\n" + 120 "ATOM 3 C2 U A 1 -1.462 3.131 0.000\n" + 121 "ATOM 5 N3 U A 1 -0.302 2.397 0.000\n" + 122 "ATOM 6 C4 U A 1 0.989 2.884 0.000\n" + 123 "ATOM 8 C5 U A 1 1.089 4.311 0.000\n" + 124 "ATOM 9 C6 U A 1 -0.024 5.053 0.000\n" 125 }; 126 127 // this is also hard-coded data about standard WC base pairs for both DNA and RNA 128 protected static final String[] BASE_LIST_DNA = {"A", "G", "T", "C"}; 129 protected static final String[] BASE_LIST_RNA = {"A", "G", "U", "C"}; 130 protected static final Map<String, Integer> BASE_MAP; 131 // private static List<String> RNAspecific = Arrays.asList("U", "URA"), 132 // DNAspecific = Arrays.asList("DC", "C", "CYT"); 133 protected static final Map<Integer, List<String>> RING_MAP; 134 static { 135 BASE_MAP = new HashMap<>(); 136 BASE_MAP.put("DA", 0); BASE_MAP.put("ADE", 0); BASE_MAP.put("A", 0); 137 BASE_MAP.put("DG", 1); BASE_MAP.put("GUA", 1); BASE_MAP.put("G", 1); 138 BASE_MAP.put("DT", 2); BASE_MAP.put("THY", 2); BASE_MAP.put("T", 2); BASE_MAP.put("U", 2); BASE_MAP.put("URA", 2); 139 BASE_MAP.put("DC", 3); BASE_MAP.put("CYT", 3); BASE_MAP.put("C", 3); 140 141 RING_MAP = new HashMap<>(); 142 RING_MAP.put(0, Arrays.asList("C8", "C2", "N3", "C4", "C5", "C6", "N7", "N1", "N9")); 143 RING_MAP.put(1, Arrays.asList("C8", "C2", "N3", "C4", "C5", "C6", "N7", "N1", "N9")); 144 RING_MAP.put(2, Arrays.asList("C6", "C2", "N3", "C4", "C5", "N1")); 145 RING_MAP.put(3, Arrays.asList("C6", "C2", "N3", "C4", "C5", "N1")); 146 } 147 148 protected Structure structure; 149 protected boolean canonical = true; 150 protected boolean useRNA = false; 151 protected boolean nonredundant = false; 152 protected double[] pairParameters; 153 154 // this is the main data that the user wants to get back out from the procedure. 155 protected String pairSequence = ""; 156 protected double[][] pairingParameters; 157 protected double[][] stepParameters; 158 protected List<String> pairingNames = new ArrayList<>(); 159 protected List<Matrix4d> referenceFrames = new ArrayList<>(); 160 161 162 /** 163 * This constructor takes a Structure object, finds base pair and base-pair step parameters 164 * for double-helical regions within the structure. 165 * @param structure The already-loaded structure to analyze. 166 * @param useRNA whether to look for canonical RNA pairs. By default (false) it analyzes DNA. 167 * @param removeDups whether to only look for base-pair parameters for each unique sequence in 168 * the structure (if set to <i>true</i>) 169 * @param canonical Whether to consider only Watson-Crick base pairs 170 */ 171 public BasePairParameters(Structure structure, boolean useRNA, boolean removeDups, boolean canonical) { 172 this.structure = structure; 173 this.useRNA = useRNA; 174 this.canonical = canonical; 175 this.nonredundant = removeDups; 176 177 } 178 179 /** 180 * This constructor takes a Structure object, whether to use RNA, and whether to remove duplicate sequences. 181 * @param structure The already-loaded structure to analyze. 182 * @param useRNA if true, the RNA standard bases will be used. Otherwise, if false, it will work on standard DNA bases. 183 * @param removeDups if true, duplicate sequences will not be considered. This is for the analysis of X-ray structures from 184 * RCSB, where there may be identical or similar units. 185 */ 186 public BasePairParameters(Structure structure, boolean useRNA, boolean removeDups) { 187 this(structure, useRNA, removeDups, false); 188 } 189 190 /** 191 * This constructor takes a Structure object, and whether to use the RNA standard bases. 192 * @param structure The already-loaded structure to analyze. 193 * @param useRNA if true, the RNA standard bases will be used. Otherwise, if false, it will work on standard DNA bases. 194 */ 195 public BasePairParameters(Structure structure, boolean useRNA) { 196 this(structure, useRNA, false, false); 197 } 198 199 /** 200 * This constructor takes a Structure object, finds base pair and base-pair step parameters 201 * for double-helical regions within the structure for only canonical DNA pairs. 202 * @param structure The already-loaded structure to analyze. 203 */ 204 public BasePairParameters(Structure structure) { 205 this(structure, false, false, true); 206 } 207 208 209 /** 210 * This method is the main function call to extract all step parameters, pairing parameters, and sequence 211 * information from the Structure object provided to the constructor. 212 * @return This same object with the populated data, convenient for output 213 * (e.g. <i>log.info(new BasePairParameters(structure).analyze());</i>) 214 */ 215 public BasePairParameters analyze() { 216 if (structure == null) { 217 pairingParameters = null; 218 stepParameters = null; 219 return this; 220 } 221 List<Chain> nucleics = this.getNucleicChains(nonredundant); 222 List<Pair<Group>> pairs = this.findPairs(nucleics); 223 this.pairingParameters = new double[pairs.size()][6]; 224 this.stepParameters = new double[pairs.size()][6]; 225 Matrix4d lastStep; 226 Matrix4d currentStep = null; 227 for (int i = 0; i < pairs.size(); i++) { 228 lastStep = currentStep; 229 currentStep = this.basePairReferenceFrame(pairs.get(i)); 230 referenceFrames.add((Matrix4d)currentStep.clone()); 231 for (int j = 0; j < 6; j++) pairingParameters[i][j] = pairParameters[j]; 232 if (i != 0) { 233 lastStep.invert(); 234 lastStep.mul(currentStep); 235 double[] sparms = calculateTp(lastStep); 236 for (int j = 0; j < 6; j++) stepParameters[i][j] = sparms[j]; 237 } 238 } 239 return this; 240 } 241 242 243 244 /** 245 * This method returns the total number of base pairs that were found, used after the call to analyze(). 246 * @return An integer value, number of base pairs 247 */ 248 public int getLength() { 249 if (structure == null || pairParameters == null) throw new IllegalArgumentException("This structure is not analyzed or not initialized."); 250 return pairingParameters.length; 251 } 252 253 254 /** 255 * This method reports all the pair parameters, in the order of: 256 * buckle, propeller, opening (in degrees), shear, stagger, stretch (in Å). 257 * @return A double[][] with length equal to number of base pairs for rows, and 6 columns 258 */ 259 public double[][] getPairingParameters() { 260 return pairingParameters; 261 } 262 263 /** 264 * This method reports all the base-pair step parameters, in the order of: 265 * tilt, roll, twist (in degrees), shift, slide, rise (in Å). 266 * @return A double[][] with length equal to number of base pairs (the first row 0 has no step 267 * and therefore is six zeroes), and 6 columns. 268 */ 269 public double[][] getStepParameters() { 270 return stepParameters; 271 } 272 273 274 /** 275 * This method returns the primary strand's sequence where parameters were found. 276 * There are spaces in the string anywhere there was a break in the helix or when 277 * it goes from one helix to another helix in the structure. (the "step" is still returned) 278 * @return String of primary sequence with spaces between gaps and new helices. 279 */ 280 public String getPairSequence() { 281 return pairSequence; 282 } 283 284 285 /** 286 * This method returns the names of the pairs in terms of A, G, T/U, and C for each base pair group in the 287 * list. The first character is the leading strand base and the second character is the complementary base 288 * @return 289 */ 290 public List<String> getPairingNames() { 291 return pairingNames; 292 } 293 294 public List<Matrix4d> getReferenceFrames() { 295 return referenceFrames; 296 } 297 298 /** 299 * This method is an internal test that the base pair specified is within a valid range. If not, it throws an exception 300 * with a message. 301 * @param bp The index of the base pair or base-pair step to return. 302 */ 303 private void checkArgument(int bp) { 304 if (bp < 0 || bp >= getPairingParameters().length) throw new IllegalArgumentException("Base pair number is out of range."); 305 } 306 307 /** 308 * This method returns the buckle in degrees for the given base pair 309 * @param bp the number of the base pair (starting with 0) 310 * @return the value as a double (in degrees) 311 */ 312 public Double getBuckle(int bp) { 313 checkArgument(bp); 314 return pairingParameters[bp][0]; 315 } 316 317 /** 318 * This method returns the propeller ("propeller-twist") in degrees for the given base pair 319 * @param bp the number of the base pair (starting with 0) 320 * @return the value as a double (in degrees) 321 */ 322 public Double getPropeller(int bp) { 323 checkArgument(bp); 324 return pairingParameters[bp][1]; 325 } 326 327 /** 328 * This method returns the opening in degrees for the given base pair 329 * @param bp the number of the base pair (starting with 0) 330 * @return the value as a double (in degrees) 331 */ 332 public Double getOpening(int bp) { 333 checkArgument(bp); 334 return pairingParameters[bp][2]; 335 } 336 337 /** 338 * This method returns the shear in Å for the given base pair 339 * @param bp the number of the base pair (starting with 0) 340 * @return the value as a double (in Å) 341 */ 342 public Double getShear(int bp) { 343 checkArgument(bp); 344 return pairingParameters[bp][3]; 345 } 346 347 /** 348 * This method returns the stretch in Å for the given base pair 349 * @param bp the number of the base pair (starting with 0) 350 * @return the value as a double (in Å) 351 */ 352 public Double getStretch(int bp) { 353 checkArgument(bp); 354 return pairingParameters[bp][4]; 355 } 356 357 /** 358 * This method returns the stagger in Å for the given base pair 359 * @param bp the number of the base pair (starting with 0) 360 * @return the value as a double (in Å) 361 */ 362 public Double getStagger(int bp) { 363 checkArgument(bp); 364 return pairingParameters[bp][5]; 365 } 366 367 /** 368 * This method returns the tilt for the given base pair, relative to the one before it. 369 * @param bp the number of the base pair (starting with 0) 370 * @return the value as a double (in degrees) 371 */ 372 public Double getTilt(int bp) { 373 checkArgument(bp); 374 return stepParameters[bp][0]; 375 } 376 377 /** 378 * This method returns the roll for the given base pair, relative to the one before it. 379 * @param bp the number of the base pair (starting with 0) 380 * @return the value as a double (in degrees) 381 */ 382 public Double getRoll(int bp) { 383 if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range."); 384 return stepParameters[bp][1]; 385 } 386 387 /** 388 * This method returns the twist for the given base pair, relative to the one before it. 389 * @param bp the number of the base pair (starting with 0) 390 * @return the value as a double (in degrees) 391 */ 392 public Double getTwist(int bp) { 393 if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range."); 394 return stepParameters[bp][2]; 395 } 396 397 /** 398 * Return the shift for the given base pair, relative to the one before it. 399 * @param bp the number of the base pair (starting with 0) 400 * @return the value as a double (in Å) 401 */ 402 public Double getShift(int bp) { 403 if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range."); 404 return stepParameters[bp][3]; 405 } 406 407 /** 408 * This method returns the slide for the given base pair, relative to the one before it. 409 * @param bp the number of the base pair (starting with 0) 410 * @return the value as a double (in Å) 411 */ 412 public Double getSlide(int bp) { 413 if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range."); 414 return stepParameters[bp][4]; 415 } 416 417 /** 418 * This method returns the rise for the given base pair, relative to the one before it. 419 * @param bp the number of the base pair (starting with 0) 420 * @return the value as a double (in Å) 421 */ 422 public Double getRise(int bp) { 423 if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range."); 424 return stepParameters[bp][5]; 425 } 426 427 428 /** 429 * This method reports all the nucleic acid chains and has an option to remove duplicates if you 430 * are considering an analysis of only unique DNA or RNA helices in the Structure. 431 * @param removeDups If true, it will ignore duplicate chains 432 * @return A list of all the nucleic acid chains in order of the Structure 433 */ 434 public List<Chain> getNucleicChains(boolean removeDups) { 435 if (structure == null) return new ArrayList<>(); 436 List<Chain> chains = structure.getChains(); 437 List<Chain> result = new ArrayList<>(); 438 for (Chain c: chains) { 439 if (c.isNucleicAcid()) { 440 result.add(c); 441 } 442 } 443 if (removeDups) for (int i = 0; i < result.size(); i++) { 444 for (int j = i+2; j < result.size(); j++) { 445 // remove duplicate sequences (structures with two or more identical units) 446 if (result.get(i).getAtomSequence().equals(result.get(j).getAtomSequence())) { 447 result.remove(j); 448 } 449 } 450 } 451 return result; 452 } 453 454 /** 455 * This method performs a search for base pairs in the structure. The criteria is alignment of 456 * sequences and the canonical base pairs of DNA or RNA. Use MismatchedBasePairParameters 457 * or TertiaryBasePairParameters for finding higher-order associations. 458 * @param chains The list of chains already found to be nucleic acids 459 * @return The list of corresponding Watson-Crick groups as pairs, as a Pair of nucleic acid Groups 460 */ 461 public List<Pair<Group>> findPairs(List<Chain> chains) { 462 List<Pair<Group>> result = new ArrayList<>(); 463 for (int i = 0; i < chains.size(); i++) { 464 Chain c = chains.get(i); 465 for (int j = i+1; j < chains.size(); j++) { 466 String complement = complement(chains.get(j).getAtomSequence(), useRNA); 467 String match = longestCommonSubstring(c.getAtomSequence(), complement); 468 if (log.isDebugEnabled()) { 469 log.debug(c.getAtomSequence() + " " + chains.get(j).getAtomSequence() + " " + match); 470 } 471 int index1 = c.getAtomSequence().indexOf(match); 472 int index2 = complement.length() - complement.indexOf(match) - 1; 473 for (int k = 0; k < match.length(); k++) { 474 Group g1 = c.getAtomGroup(index1+k); 475 Group g2 = chains.get(j).getAtomGroup(index2-k); 476 Integer type1 = BASE_MAP.get(g1.getPDBName()); 477 Integer type2 = BASE_MAP.get(g2.getPDBName()); 478 if (type1 == null || type2 == null) { 479 if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' '; 480 continue; 481 } 482 Atom a1 = g1.getAtom(RING_MAP.get(type1).get(0)); 483 Atom a2 = g2.getAtom(RING_MAP.get(type2).get(0)); 484 485 if (a1 == null) { 486 log.info("Error processing " + g1.getPDBName()); 487 if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' '; 488 continue; 489 } 490 if (a2 == null) { 491 log.info("Error processing " + g2.getPDBName()); 492 if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' '; 493 continue; 494 } 495 496 double dx = a1.getX()-a2.getX(); 497 double dy = a1.getY()-a2.getY(); 498 double dz = a1.getZ()-a2.getZ(); 499 double distance = Math.sqrt(dx*dx+dy*dy+dz*dz); 500 //log.info("C8-C6 Distance (Å): " + distance); 501 // could be a base pair 502 if (Math.abs(distance-10.0) < 4.0) { 503 boolean valid = true; 504 for (String atomname : RING_MAP.get(type1)) { 505 Atom a = g1.getAtom(atomname); 506 if (a == null) valid = false; 507 } 508 if (valid) for (String atomname: RING_MAP.get(type2)) { 509 Atom a = g2.getAtom(atomname); 510 if (a == null) valid = false; 511 } 512 if (valid) { 513 result.add(new Pair<Group>(g1, g2)); 514 pairingNames.add((useRNA ? BASE_LIST_RNA[type1]+ BASE_LIST_RNA[type2] : BASE_LIST_DNA[type1]+ BASE_LIST_DNA[type2])); 515 pairSequence += c.getAtomSequence().charAt(index1 + k); 516 } else if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' '; 517 } else if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' '; 518 } 519 if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' '; 520 } 521 //log.info(); 522 } 523 log.info("Matched: " + pairSequence); 524 return result; 525 } 526 527 528 /** 529 * This method calculates the central frame (4x4 transformation matrix) of a single base pair. 530 * @param pair An array of the two groups that make a hypothetical pair 531 * @return The middle frame of the center of the base-pair formed 532 */ 533 public Matrix4d basePairReferenceFrame(Pair<Group> pair) { 534 Integer type1 = BASE_MAP.get(pair.getFirst().getPDBName()); 535 Integer type2 = BASE_MAP.get(pair.getSecond().getPDBName()); 536 SuperPosition sp = new SuperPositionQCP(true); 537 if (type1 == null || type2 == null) return null; 538 PDBFileReader pdbFileReader = new PDBFileReader(); 539 Structure s1, s2; 540 try { 541 s1 = pdbFileReader.getStructure(new ByteArrayInputStream(STANDARD_BASES[type1].getBytes())); 542 s2 = pdbFileReader.getStructure(new ByteArrayInputStream(STANDARD_BASES[type2].getBytes())); 543 } catch (IOException e) { 544 e.printStackTrace(); 545 return null; 546 } 547 Group std1 = s1.getChain("A").getAtomGroup(0); 548 Group std2 = s2.getChain("A").getAtomGroup(0); 549 550 Point3d[] pointref = new Point3d[std1.getAtoms().size()]; 551 Point3d[] pointact = new Point3d[std1.getAtoms().size()]; 552 int count = 0; 553 554 for (Atom a : std1.getAtoms()) { 555 if (pair.getFirst().getAtom(a.getName()) == null) return null; 556 pointref[count] = a.getCoordsAsPoint3d(); 557 pointact[count] = pair.getFirst().getAtom(a.getName()).getCoordsAsPoint3d(); 558 count++; 559 } 560 assert count == std1.getAtoms().size(); 561 Matrix4d ref1 = (Matrix4d)sp.superposeAndTransform(pointact, pointref).clone(); 562 563 pointref = new Point3d[std2.getAtoms().size()]; 564 pointact = new Point3d[std2.getAtoms().size()]; 565 566 count = 0; 567 for (Atom a : std2.getAtoms()) { 568 if (pair.getSecond().getAtom(a.getName()) == null) return null; 569 pointref[count] = a.getCoordsAsPoint3d(); 570 pointact[count] = pair.getSecond().getAtom(a.getName()).getCoordsAsPoint3d(); 571 count++; 572 } 573 assert count == std2.getAtoms().size(); 574 575 Matrix4d temp = (Matrix4d)ref1.clone(); 576 Matrix4d temp2 = (Matrix4d)temp.clone(); 577 Matrix4d ref2 = sp.superposeAndTransform(pointact, pointref); 578 579 double[][] v = new double[3][4]; 580 double[] y3 = new double[4]; 581 double[] z3 = new double[4]; 582 ref2.getColumn(1, y3); 583 ref2.getColumn(2, z3); 584 double[] z31 = new double[4]; 585 ref1.getColumn(2, z31); 586 if (z3[0]*z31[0]+z3[1]*z31[1]+z3[2]*z31[2] < 0.0) { 587 for (int i = 0; i < 3; i++) { 588 y3[i] *= -1.0; 589 z3[i] *= -1.0; 590 } 591 } 592 ref2.setColumn(1, y3); 593 ref2.setColumn(2, z3); 594 595 temp.add(ref2); 596 temp.mul(0.5); 597 double[] x3 = new double[4]; 598 temp.getColumn(0, x3); 599 temp.getColumn(1, y3); 600 temp.getColumn(2, z3); 601 x3 = removeComponent(x3, z3); 602 x3 = removeComponent(x3, y3); 603 y3 = removeComponent(y3, z3); 604 temp.setColumn(0, x3); 605 temp.setColumn(1, y3); 606 temp.setColumn(2, z3); 607 608 // normalize the short, long, and normal axes 609 for (int i = 0; i < 3; i++) { 610 temp.getColumn(i, v[i]); 611 double r = Math.sqrt(v[i][0] * v[i][0] + v[i][1] * v[i][1] + v[i][2] * v[i][2]); 612 for (int j = 0; j < 3; j++) { 613 v[i][j] /= r; 614 } 615 temp.setColumn(i, v[i]); 616 } 617 618 // calculate pairing parameters: buckle, propeller, opening, shear, stretch, stagger 619 temp2.invert(); 620 temp2.mul(ref2); 621 pairParameters = calculateTp(temp2); 622 for (int i = 0; i < 6; i++) pairParameters[i] *= -1; 623 624 // return the central frame of the base pair 625 return temp; 626 627 } 628 629 630 @Override 631 public String toString() { 632 if (getPairingParameters() == null) return "No data"; 633 StringBuilder result = new StringBuilder(10000); 634 result.append(pairingParameters.length + " base pairs\n"); 635 result.append("bp: buckle propeller opening shear stretch stagger tilt roll twist shift slide rise\n"); 636 for (int i = 0; i < pairingParameters.length; i++) { 637 result.append(pairingNames.get(i)+": "); 638 for (int j = 0; j < 6; j++) 639 result.append(String.format(Locale.US, "%5.4f", pairingParameters[i][j]) + " "); 640 for (int j = 0; j < 6; j++) 641 result.append(String.format(Locale.US, "%5.4f", stepParameters[i][j]) + " "); 642 result.append("\n"); 643 } 644 return result.toString(); 645 } 646 647 648 // The following methods are just helper classes for the rapid analyze of base-pair geometry. 649 /** 650 * This method calculates pairing and step parameters from 4x4 transformation matrices (used internally) 651 * that comes out as a Matrix4d. 652 * @param input the 4x4 matrix representing the transformation from strand II -> strand I or pair i to pair i+1 653 * @return Six parameters as double[6] 654 */ 655 public static double[] calculateTp(Matrix4d input) { 656 657 double[][] A = new double[4][4]; 658 for (int i = 0; i < 4; i++) for (int j = 0; j < 4; j++) { 659 A[i][j] = input.getElement(i, j); 660 } 661 double[] M = new double[6]; 662 663 double cosgamma, gamma, phi, omega, sgcp, omega2_minus_phi, 664 sm, cm, sp, cp, sg, cg; 665 666 cosgamma = A[2][2]; 667 if (cosgamma > 1.0) cosgamma = 1.0; 668 else if (cosgamma < -1.0) cosgamma = -1.0; 669 670 gamma = acos(cosgamma); 671 672 sgcp = A[1][1]*A[0][2]-A[0][1]*A[1][2]; 673 674 if (gamma == 0.0) omega = -atan2(A[0][1],A[1][1]); 675 else omega = atan2(A[2][1]*A[0][2]+sgcp*A[1][2],sgcp*A[0][2]-A[2][1]*A[1][2]); 676 677 omega2_minus_phi = atan2(A[1][2],A[0][2]); 678 679 phi = omega/2.0 - omega2_minus_phi; 680 681 M[0] = gamma*sin(phi)*180.0/PI; 682 M[1] = gamma*cos(phi)*180.0/PI; 683 M[2] = omega*180.0/PI; 684 685 sm = sin(omega/2.0-phi); 686 cm = cos(omega/2.0-phi); 687 sp = sin(phi); 688 cp = cos(phi); 689 sg = sin(gamma/2.0); 690 cg = cos(gamma/2.0); 691 692 M[3] = (cm*cg*cp-sm*sp)*A[0][3]+(sm*cg*cp+cm*sp)*A[1][3]-sg*cp*A[2][3]; 693 M[4] = (-cm*cg*sp-sm*cp)*A[0][3]+(-sm*cg*sp+cm*cp)*A[1][3]+sg*sp*A[2][3]; 694 M[5] = (cm*sg)*A[0][3]+(sm*sg)*A[1][3]+cg*A[2][3]; 695 696 return M; 697 698 } 699 700 /** 701 * This method returns the complement of a base. (used internally) 702 * @param base The letter of the base 703 * @param RNA Whether it is RNA (if false, it is DNA) 704 * @return The character representing the complement of the base 705 */ 706 protected static char complementBase(char base, boolean RNA) { 707 if (base == 'A' && RNA) return 'U'; 708 if (base == 'A') return 'T'; 709 if (base == 'T' && !RNA) return 'A'; 710 if (base == 'U' && RNA) return 'A'; 711 if (base == 'C') return 'G'; 712 if (base == 'G') return 'C'; 713 return ' '; 714 } 715 716 /** 717 * Simple helper method for quickly checking the complement of a sequence, see also DNASequence nad RNASequence classes 718 * for more extensively useful functions not used in this narrow context of structural biology of base pairs. (Used internally) 719 */ 720 private static String complement(String sequence, boolean RNA) { 721 String result = ""; 722 for (int i = sequence.length() - 1; i >= 0; i--) { 723 result += complementBase(sequence.charAt(i), RNA); 724 } 725 return result; 726 } 727 728 /** 729 * This does a 3D Vector cross product of two vectors as double arrays. (used internally) 730 * 731 * @param a An array of length 3 or 4 (4th component is ignored) 732 * @param b An array of length 3 or 4 (4th component is ignored) 733 * @return The cross product of the vectors (just the first three components 734 */ 735 @SuppressWarnings("unused") 736 private static double[] cross(double[] a, double[] b) { 737 assert a.length >= 3 && b.length >= 3; 738 double[] result = new double[4]; 739 result[0] = a[1]*b[2]-a[2]*b[1]; 740 result[1] = a[2]*b[0]-a[0]*b[2]; 741 result[2] = a[0]*b[1]-a[1]*b[0]; 742 return result; 743 } 744 745 /** 746 * This method removes any component of vector a that is along vector b. (used internally) 747 * @param a The array (vector) to remove component from 748 * @param b The component array (vector) to remove from the first 749 * @return The original array a with any component along b removed from it. 750 */ 751 private static double[] removeComponent(double[] a, double[] b) { 752 double dot = 0; 753 double[] result = new double[4]; 754 for (int i = 0; i < 3; i++) { 755 dot += a[i]*b[i]; 756 } 757 for (int i = 0; i < 3; i++) { 758 result[i] = a[i]-dot*b[i]; 759 } 760 return result; 761 762 } 763 764 /** 765 * This method finds the longest common substring between two strings. (used internally) 766 * @param s1 The first string 767 * @param s2 The second string 768 * @return The substring itself 769 */ 770 private static String longestCommonSubstring(String s1, String s2) { 771 int start = 0; 772 int max = 0; 773 for (int i = 0; i < s1.length(); i++) { 774 for (int j = 0; j < s2.length(); j++) { 775 int x = 0; 776 while (s1.charAt(i + x) == s2.charAt(j + x)) { 777 x++; 778 if (((i + x) >= s1.length()) || ((j + x) >= s2.length())) break; 779 } 780 if (x > max) { 781 max = x; 782 start = i; 783 } 784 } 785 } 786 return s1.substring(start, (start + max)); 787 } 788 789 /** 790 * This returns true if a is the complement of b, false otherwise. (used internally) 791 * @param a First letter 792 * @param b Potential matching letter 793 * @param RNA Whether it is RNA (if false, DNA rules are used) 794 * @return True if the bases are complementary. 795 */ 796 protected static boolean match(char a, char b, boolean RNA) { 797 if (a == 'A' && b == 'T' && !RNA) return true; 798 if (a == 'A' && b == 'U' && RNA) return true; 799 if (a == 'T' && b == 'A' && !RNA) return true; 800 if (a == 'U' && b == 'A' && RNA) return true; 801 if (a == 'G' && b == 'C') return true; 802 if (a == 'C' && b == 'G') return true; 803 return false; 804 } 805}