001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io.mmtf; 022 023import java.text.DateFormat; 024import java.text.SimpleDateFormat; 025import java.util.ArrayList; 026import java.util.Date; 027import java.util.HashSet; 028import java.util.LinkedHashMap; 029import java.util.List; 030import java.util.Map; 031import java.util.Map.Entry; 032import java.util.Set; 033 034import javax.vecmath.Matrix4d; 035 036import org.biojava.nbio.structure.AminoAcid; 037import org.biojava.nbio.structure.AminoAcidImpl; 038import org.biojava.nbio.structure.Atom; 039import org.biojava.nbio.structure.Bond; 040import org.biojava.nbio.structure.Chain; 041import org.biojava.nbio.structure.ExperimentalTechnique; 042import org.biojava.nbio.structure.Group; 043import org.biojava.nbio.structure.GroupType; 044import org.biojava.nbio.structure.NucleotideImpl; 045import org.biojava.nbio.structure.PDBCrystallographicInfo; 046import org.biojava.nbio.structure.Structure; 047import org.biojava.nbio.structure.StructureException; 048import org.biojava.nbio.structure.StructureIO; 049import org.biojava.nbio.structure.align.util.AtomCache; 050import org.biojava.nbio.structure.io.FileParsingParameters; 051import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; 052import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider; 053import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 054import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; 055import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 056import org.biojava.nbio.structure.secstruc.DSSPParser; 057import org.biojava.nbio.structure.secstruc.SecStrucCalc; 058import org.biojava.nbio.structure.secstruc.SecStrucState; 059import org.biojava.nbio.structure.secstruc.SecStrucType; 060import org.biojava.nbio.structure.xtal.CrystalCell; 061import org.biojava.nbio.structure.xtal.SpaceGroup; 062import org.rcsb.mmtf.dataholders.DsspType; 063import org.rcsb.mmtf.utils.CodecUtils; 064import org.slf4j.Logger; 065import org.slf4j.LoggerFactory; 066 067/** 068 * A utils class of functions needed for Biojava to read and write to mmtf. 069 * @author Anthony Bradley 070 * 071 */ 072public class MmtfUtils { 073 074 private static final Logger LOGGER = LoggerFactory.getLogger(MmtfUtils.class); 075 076 /** 077 * Set up the configuration parameters for BioJava. 078 */ 079 public static AtomCache setUpBioJava() { 080 // Set up the atom cache etc 081 AtomCache cache = new AtomCache(); 082 cache.setUseMmCif(true); 083 FileParsingParameters params = cache.getFileParsingParams(); 084 params.setCreateAtomBonds(true); 085 params.setAlignSeqRes(true); 086 params.setParseBioAssembly(true); 087 DownloadChemCompProvider cc = new DownloadChemCompProvider(); 088 ChemCompGroupFactory.setChemCompProvider(cc); 089 cc.checkDoFirstInstall(); 090 cache.setFileParsingParams(params); 091 StructureIO.setAtomCache(cache); 092 return cache; 093 } 094 095 /** 096 * Set up the configuration parameters for BioJava. 097 * @param extraUrl the string describing the URL (or file path) from which 098 * to get missing CCD entries. 099 */ 100 public static AtomCache setUpBioJava(String extraUrl) { 101 // Set up the atom cache etc 102 AtomCache cache = new AtomCache(); 103 cache.setUseMmCif(true); 104 FileParsingParameters params = cache.getFileParsingParams(); 105 params.setCreateAtomBonds(true); 106 params.setAlignSeqRes(true); 107 params.setParseBioAssembly(true); 108 DownloadChemCompProvider.serverBaseUrl = extraUrl; 109 DownloadChemCompProvider.useDefaultUrlLayout = false; 110 DownloadChemCompProvider cc = new DownloadChemCompProvider(); 111 ChemCompGroupFactory.setChemCompProvider(cc); 112 cc.checkDoFirstInstall(); 113 cache.setFileParsingParams(params); 114 StructureIO.setAtomCache(cache); 115 return cache; 116 } 117 118 119 /** 120 * This sets all microheterogeneous groups 121 * (previously alternate location groups) as separate groups. 122 * This is required because mmtf groups cannot have multiple HET codes. 123 * @param bioJavaStruct 124 */ 125 public static void fixMicroheterogenity(Structure bioJavaStruct) { 126 // Loop through the models 127 for (int i=0; i<bioJavaStruct.nrModels(); i++){ 128 // Then the chains 129 List<Chain> chains = bioJavaStruct.getModel(i); 130 for (Chain c : chains) { 131 // Build a new list of groups 132 List<Group> outGroups = new ArrayList<>(); 133 for (Group g : c.getAtomGroups()) { 134 List<Group> removeList = new ArrayList<>(); 135 for (Group altLoc : g.getAltLocs()) { 136 // Check if they are not equal -> microheterogenity 137 if(! altLoc.getPDBName().equals(g.getPDBName())) { 138 // Now add this group to the main list 139 removeList.add(altLoc); 140 } 141 } 142 // Add this group 143 outGroups.add(g); 144 // Remove any microhet alt locs 145 g.getAltLocs().removeAll(removeList); 146 // Add these microhet alt locs 147 outGroups.addAll(removeList); 148 } 149 c.setAtomGroups(outGroups); 150 } 151 } 152 } 153 154 155 /** 156 * Generate the secondary structure for a Biojava structure object. 157 * @param bioJavaStruct the Biojava structure for which it is to be calculate. 158 */ 159 public static void calculateDsspSecondaryStructure(Structure bioJavaStruct) { 160 SecStrucCalc ssp = new SecStrucCalc(); 161 162 try{ 163 ssp.calculate(bioJavaStruct, true); 164 } 165 catch(StructureException e) { 166 LOGGER.warn("Could not calculate secondary structure (error {}). Will try to get a DSSP file from the RCSB web server instead.", e.getMessage()); 167 168 try { 169 DSSPParser.fetch(bioJavaStruct.getPDBCode(), bioJavaStruct, true); //download from PDB the DSSP result 170 } catch(Exception bige){ 171 LOGGER.warn("Could not get a DSSP file from RCSB web server. There will not be secondary structure assignment for this structure ({}). Error: {}", bioJavaStruct.getPDBCode(), bige.getMessage()); 172 } 173 } 174 } 175 176 /** 177 * Get the string representation of a space group. 178 * @param spaceGroup the input SpaceGroup object 179 * @return the space group as a string. 180 */ 181 public static String getSpaceGroupAsString(SpaceGroup spaceGroup) { 182 if(spaceGroup==null){ 183 return "NA"; 184 } 185 else{ 186 return spaceGroup.getShortSymbol(); 187 } 188 } 189 190 /** 191 * Get the length six array of the unit cell information. 192 * @param xtalInfo the input PDBCrystallographicInfo object 193 * @return the length six float array 194 */ 195 public static float[] getUnitCellAsArray(PDBCrystallographicInfo xtalInfo) { 196 CrystalCell xtalCell = xtalInfo.getCrystalCell(); 197 if(xtalCell==null){ 198 return null; 199 }else{ 200 float[] inputUnitCell = new float[6]; 201 inputUnitCell[0] = (float) xtalCell.getA(); 202 inputUnitCell[1] = (float) xtalCell.getB(); 203 inputUnitCell[2] = (float) xtalCell.getC(); 204 inputUnitCell[3] = (float) xtalCell.getAlpha(); 205 inputUnitCell[4] = (float) xtalCell.getBeta(); 206 inputUnitCell[5] = (float) xtalCell.getGamma(); 207 return inputUnitCell; 208 } 209 } 210 211 /** 212 * Converts the set of experimental techniques to an array of strings. 213 * @param experimentalTechniques the input set of experimental techniques 214 * @return the array of strings describing the methods used. 215 */ 216 public static String[] techniquesToStringArray(Set<ExperimentalTechnique> experimentalTechniques) { 217 if(experimentalTechniques==null){ 218 return new String[0]; 219 } 220 String[] outArray = new String[experimentalTechniques.size()]; 221 int index = 0; 222 for (ExperimentalTechnique experimentalTechnique : experimentalTechniques) { 223 outArray[index] = experimentalTechnique.getName(); 224 index++; 225 } 226 return outArray; 227 } 228 229 /** 230 * Covert a Date object to ISO time format. 231 * @param inputDate The input date object 232 * @return the time in ISO time format 233 */ 234 public static String dateToIsoString(Date inputDate) { 235 DateFormat dateStringFormat = new SimpleDateFormat("yyyy-MM-dd"); 236 return dateStringFormat.format(inputDate); 237 } 238 239 /** 240 * Convert a bioassembly information into a map of transform, chainindices it relates to. 241 * @param bioassemblyInfo the bioassembly info object for this structure 242 * @param chainIdToIndexMap the map of chain ids to the index that chain corresponds to. 243 * @return the bioassembly information (as primitive types). 244 */ 245 public static Map<double[], int[]> getTransformMap(BioAssemblyInfo bioassemblyInfo, Map<String, Integer> chainIdToIndexMap) { 246 Map<Matrix4d, List<Integer>> matMap = new LinkedHashMap<>(); 247 List<BiologicalAssemblyTransformation> transforms = bioassemblyInfo.getTransforms(); 248 for (BiologicalAssemblyTransformation transformation : transforms) { 249 Matrix4d transMatrix = transformation.getTransformationMatrix(); 250 String transChainId = transformation.getChainId(); 251 if (!chainIdToIndexMap.containsKey(transChainId)){ 252 continue; 253 } 254 int chainIndex = chainIdToIndexMap.get(transformation.getChainId()); 255 if(matMap.containsKey(transMatrix)){ 256 matMap.get(transMatrix).add(chainIndex); 257 } 258 else{ 259 List<Integer> chainIdList = new ArrayList<>(); 260 chainIdList.add(chainIndex); 261 matMap.put(transMatrix, chainIdList); 262 } 263 } 264 265 Map<double[], int[]> outMap = new LinkedHashMap<>(); 266 for (Entry<Matrix4d, List<Integer>> entry : matMap.entrySet()) { 267 outMap.put(convertToDoubleArray(entry.getKey()), CodecUtils.convertToIntArray(entry.getValue())); 268 } 269 return outMap; 270 } 271 272 /** 273 * Convert a four-d matrix to a double array. Row-packed. 274 * @param transformationMatrix the input matrix4d object 275 * @return the double array (16 long). 276 */ 277 public static double[] convertToDoubleArray(Matrix4d transformationMatrix) { 278 // Initialise the output array 279 double[] outArray = new double[16]; 280 // Iterate over the matrix 281 for(int i=0; i<4; i++){ 282 for(int j=0; j<4; j++){ 283 // Now set this element 284 outArray[i*4+j] = transformationMatrix.getElement(i,j); 285 } 286 } 287 return outArray; 288 } 289 290 /** 291 * Count the total number of groups in the structure 292 * @param structure the input structure 293 * @return the total number of groups 294 */ 295 public static int getNumGroups(Structure structure) { 296 int count = 0; 297 for(int i=0; i<structure.nrModels(); i++) { 298 for(Chain chain : structure.getChains(i)){ 299 count+= chain.getAtomGroups().size(); 300 } 301 } 302 return count; 303 } 304 305 306 /** 307 * Get a list of atoms for a group. Only add each atom once. 308 * @param inputGroup the Biojava Group to consider 309 * @return the atoms for the input Biojava Group 310 */ 311 public static List<Atom> getAtomsForGroup(Group inputGroup) { 312 Set<Atom> uniqueAtoms = new HashSet<Atom>(); 313 List<Atom> theseAtoms = new ArrayList<Atom>(); 314 for(Atom a: inputGroup.getAtoms()){ 315 theseAtoms.add(a); 316 uniqueAtoms.add(a); 317 } 318 List<Group> altLocs = inputGroup.getAltLocs(); 319 for(Group thisG: altLocs){ 320 for(Atom a: thisG.getAtoms()){ 321 if(uniqueAtoms.contains(a)){ 322 continue; 323 } 324 theseAtoms.add(a); 325 } 326 } 327 return theseAtoms; 328 } 329 330 /** 331 * Find the number of bonds in a group 332 * @param atomsInGroup the list of atoms in the group 333 * @return the number of atoms in the group 334 */ 335 public static int getNumBondsInGroup(List<Atom> atomsInGroup) { 336 int bondCounter = 0; 337 for(Atom atom : atomsInGroup) { 338 if(atom.getBonds()==null){ 339 continue; 340 } 341 for(Bond bond : atom.getBonds()) { 342 // Now set the bonding information. 343 Atom other = bond.getOther(atom); 344 // If both atoms are in the group 345 if (atomsInGroup.indexOf(other)!=-1){ 346 Integer firstBondIndex = atomsInGroup.indexOf(atom); 347 Integer secondBondIndex = atomsInGroup.indexOf(other); 348 // Don't add the same bond twice 349 if (firstBondIndex<secondBondIndex){ 350 bondCounter++; 351 } 352 } 353 } 354 } 355 return bondCounter; 356 } 357 358 /** 359 * Get the secondary structure as defined by DSSP. 360 * @param group the input group to be calculated 361 * @return the integer index of the group type. 362 */ 363 public static int getSecStructType(Group group) { 364 SecStrucState props = (SecStrucState) group.getProperty("secstruc"); 365 if(props==null){ 366 return DsspType.NULL_ENTRY.getDsspIndex(); 367 } 368 return DsspType.dsspTypeFromString(props.getType().name).getDsspIndex(); 369 } 370 371 /** 372 * Get the secondary structure as defined by DSSP. 373 * @param group the input group to be calculated 374 * @param the integer index of the group type. 375 */ 376 public static void setSecStructType(Group group, int dsspIndex) { 377 SecStrucType secStrucType = getSecStructTypeFromDsspIndex(dsspIndex); 378 SecStrucState secStrucState = new SecStrucState(group, "MMTF_ASSIGNED", secStrucType); 379 if(secStrucType!=null){ 380 group.setProperty("secstruc", secStrucState); 381 } 382 else{ 383 } 384 } 385 386 387 /** 388 * Set the DSSP type based on a numerical index. 389 * @param dsspIndex the integer index of the type to set 390 * @return the instance of the SecStrucType object holding this secondary 391 * structure type. 392 */ 393 public static SecStrucType getSecStructTypeFromDsspIndex(int dsspIndex) { 394 String dsspType = DsspType.dsspTypeFromInt(dsspIndex).getDsspType(); 395 for(SecStrucType secStrucType : SecStrucType.values()) 396 { 397 if(dsspType==secStrucType.name) 398 { 399 return secStrucType; 400 } 401 } 402 // Return a null entry. 403 return null; 404 } 405 406 /** 407 * Get summary information for the structure. 408 * @param structure the structure for which to get the information. 409 */ 410 public static MmtfSummaryDataBean getStructureInfo(Structure structure) { 411 MmtfSummaryDataBean mmtfSummaryDataBean = new MmtfSummaryDataBean(); 412 // Get all the atoms 413 List<Atom> theseAtoms = new ArrayList<>(); 414 List<Chain> allChains = new ArrayList<>(); 415 Map<String, Integer> chainIdToIndexMap = new LinkedHashMap<>(); 416 int chainCounter = 0; 417 int bondCount = 0; 418 mmtfSummaryDataBean.setAllAtoms(theseAtoms); 419 mmtfSummaryDataBean.setAllChains(allChains); 420 mmtfSummaryDataBean.setChainIdToIndexMap(chainIdToIndexMap); 421 for (int i=0; i<structure.nrModels(); i++){ 422 List<Chain> chains = structure.getModel(i); 423 allChains.addAll(chains); 424 for (Chain chain : chains) { 425 String idOne = chain.getId(); 426 if (!chainIdToIndexMap.containsKey(idOne)) { 427 chainIdToIndexMap.put(idOne, chainCounter); 428 } 429 chainCounter++; 430 for (Group g : chain.getAtomGroups()) { 431 for(Atom atom: getAtomsForGroup(g)){ 432 theseAtoms.add(atom); 433 // If both atoms are in the group 434 if (atom.getBonds()!=null){ 435 bondCount+=atom.getBonds().size(); 436 } 437 } 438 } 439 } 440 } 441 // Assumes all bonds are referenced twice 442 mmtfSummaryDataBean.setNumBonds(bondCount/2); 443 return mmtfSummaryDataBean; 444 445 } 446 447 /** 448 * Get a list of N 4*4 matrices from a single list of doubles of length 16*N. 449 * @param ncsOperMatrixList the input list of doubles 450 * @return the list of 4*4 matrics 451 */ 452 public static Matrix4d[] getNcsAsMatrix4d(double[][] ncsOperMatrixList) { 453 if(ncsOperMatrixList==null){ 454 return null; 455 } 456 int numMats = ncsOperMatrixList.length; 457 if(numMats==0){ 458 return null; 459 } 460 if(numMats==1 && ncsOperMatrixList[0].length==0){ 461 return null; 462 } 463 Matrix4d[] outList = new Matrix4d[numMats]; 464 for(int i=0; i<numMats; i++){ 465 outList[i] = new Matrix4d(ncsOperMatrixList[i]); 466 } 467 return outList; 468 } 469 470 /** 471 * Get a list of length N*16 of a list of Matrix4d*N. 472 * @param ncsOperators the {@link Matrix4d} list 473 * @return the list of length N*16 of the list of matrices 474 */ 475 public static double[][] getNcsAsArray(Matrix4d[] ncsOperators) { 476 if(ncsOperators==null){ 477 return new double[0][0]; 478 } 479 double[][] outList = new double[ncsOperators.length][16]; 480 for(int i=0; i<ncsOperators.length;i++){ 481 outList[i] = convertToDoubleArray(ncsOperators[i]); 482 } 483 return outList; 484 } 485 486 /** 487 * Insert the group in the given position in the sequence. 488 * @param chain the chain to add the seq res group to 489 * @param group the group to add 490 * @param sequenceIndexId the index to add it in 491 */ 492 public static void insertSeqResGroup(Chain chain, Group group, int sequenceIndexId) { 493 List<Group> seqResGroups = chain.getSeqResGroups(); 494 addGroupAtId(seqResGroups, group, sequenceIndexId); 495 } 496 497 /** 498 * Add the missing groups to the SeqResGroups. 499 * @param modelChain the chain to add the information for 500 * @param sequence the sequence of the construct 501 */ 502 public static void addSeqRes(Chain modelChain, String sequence) { 503 List<Group> seqResGroups = modelChain.getSeqResGroups(); 504 GroupType chainType = getChainType(modelChain.getAtomGroups()); 505 for(int i=0; i<sequence.length(); i++){ 506 char singleLetterCode = sequence.charAt(i); 507 Group group = null; 508 if(seqResGroups.size()<=i){ 509 } 510 else{ 511 group=seqResGroups.get(i); 512 } 513 if(group!=null){ 514 continue; 515 } 516 group = getSeqResGroup(modelChain, singleLetterCode, chainType); 517 addGroupAtId(seqResGroups, group, i); 518 seqResGroups.set(i, group); 519 } 520 } 521 522 private static GroupType getChainType(List<Group> groups) { 523 for(Group group : groups) { 524 if(group==null){ 525 continue; 526 } 527 else if(group.getType()!=GroupType.HETATM){ 528 return group.getType(); 529 } 530 } 531 return GroupType.HETATM; 532 } 533 534 private static <T> void addGroupAtId(List<T> seqResGroups, T group, int sequenceIndexId) { 535 while(seqResGroups.size()<=sequenceIndexId){ 536 seqResGroups.add(null); 537 } 538 if(sequenceIndexId>=0){ 539 seqResGroups.set(sequenceIndexId, group); 540 } 541 } 542 543 private static Group getSeqResGroup(Chain modelChain, char singleLetterCode, GroupType type) { 544 if(type==GroupType.AMINOACID){ 545 AminoAcidImpl a = new AminoAcidImpl(); 546 a.setRecordType(AminoAcid.SEQRESRECORD); 547 a.setAminoType(singleLetterCode); 548 ChemComp chemComp = new ChemComp(); 549 chemComp.setOne_letter_code(""+singleLetterCode); 550 a.setChemComp(chemComp); 551 return a; 552 553 } else if (type==GroupType.NUCLEOTIDE) { 554 NucleotideImpl n = new NucleotideImpl(); 555 ChemComp chemComp = new ChemComp(); 556 chemComp.setOne_letter_code(""+singleLetterCode); 557 n.setChemComp(chemComp); 558 return n; 559 } 560 else{ 561 return null; 562 } 563 } 564}