001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io.mmtf; 022 023import java.text.DateFormat; 024import java.text.SimpleDateFormat; 025import java.util.ArrayList; 026import java.util.Date; 027import java.util.HashSet; 028import java.util.LinkedHashMap; 029import java.util.List; 030import java.util.Map; 031import java.util.Map.Entry; 032import java.util.Set; 033 034import javax.vecmath.Matrix4d; 035 036import org.biojava.nbio.structure.AminoAcid; 037import org.biojava.nbio.structure.AminoAcidImpl; 038import org.biojava.nbio.structure.Atom; 039import org.biojava.nbio.structure.Bond; 040import org.biojava.nbio.structure.Chain; 041import org.biojava.nbio.structure.ExperimentalTechnique; 042import org.biojava.nbio.structure.Group; 043import org.biojava.nbio.structure.GroupType; 044import org.biojava.nbio.structure.NucleotideImpl; 045import org.biojava.nbio.structure.PDBCrystallographicInfo; 046import org.biojava.nbio.structure.Structure; 047import org.biojava.nbio.structure.StructureException; 048import org.biojava.nbio.structure.chem.ChemComp; 049import org.biojava.nbio.structure.chem.ChemCompGroupFactory; 050import org.biojava.nbio.structure.chem.ChemCompTools; 051import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; 052import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 053import org.biojava.nbio.structure.secstruc.SecStrucCalc; 054import org.biojava.nbio.structure.secstruc.SecStrucState; 055import org.biojava.nbio.structure.secstruc.SecStrucType; 056import org.biojava.nbio.structure.xtal.CrystalCell; 057import org.biojava.nbio.structure.xtal.SpaceGroup; 058import org.rcsb.mmtf.dataholders.DsspType; 059import org.rcsb.mmtf.utils.CodecUtils; 060import org.slf4j.Logger; 061import org.slf4j.LoggerFactory; 062 063/** 064 * A utils class of functions needed for Biojava to read and write to mmtf. 065 * @author Anthony Bradley 066 * 067 */ 068public class MmtfUtils { 069 070 private static final Logger LOGGER = LoggerFactory.getLogger(MmtfUtils.class); 071 072 /** 073 * This sets all microheterogeneous groups 074 * (previously alternate location groups) as separate groups. 075 * This is required because mmtf groups cannot have multiple HET codes. 076 * @param bioJavaStruct 077 */ 078 public static void fixMicroheterogenity(Structure bioJavaStruct) { 079 // Loop through the models 080 for (int i=0; i<bioJavaStruct.nrModels(); i++){ 081 // Then the chains 082 List<Chain> chains = bioJavaStruct.getModel(i); 083 for (Chain c : chains) { 084 // Build a new list of groups 085 List<Group> outGroups = new ArrayList<>(); 086 for (Group g : c.getAtomGroups()) { 087 List<Group> removeList = new ArrayList<>(); 088 for (Group altLoc : g.getAltLocs()) { 089 // Check if they are not equal -> microheterogenity 090 if(! altLoc.getPDBName().equals(g.getPDBName())) { 091 // Now add this group to the main list 092 removeList.add(altLoc); 093 } 094 } 095 // Add this group 096 outGroups.add(g); 097 // Remove any microhet alt locs 098 g.getAltLocs().removeAll(removeList); 099 // Add these microhet alt locs 100 outGroups.addAll(removeList); 101 } 102 c.setAtomGroups(outGroups); 103 } 104 } 105 } 106 107 108 /** 109 * Generate the secondary structure for a Biojava structure object. 110 * @param bioJavaStruct the Biojava structure for which it is to be calculate. 111 */ 112 public static void calculateDsspSecondaryStructure(Structure bioJavaStruct) { 113 SecStrucCalc ssp = new SecStrucCalc(); 114 115 try{ 116 ssp.calculate(bioJavaStruct, true); 117 } 118 catch(StructureException e) { 119 LOGGER.warn("Could not calculate secondary structure (error {}). Secondary structure annotation will be missing.", e.getMessage()); 120 } 121 } 122 123 /** 124 * Get the string representation of a space group. 125 * @param spaceGroup the input SpaceGroup object 126 * @return the space group as a string. 127 */ 128 public static String getSpaceGroupAsString(SpaceGroup spaceGroup) { 129 if(spaceGroup==null){ 130 return "NA"; 131 } 132 else{ 133 return spaceGroup.getShortSymbol(); 134 } 135 } 136 137 /** 138 * Get the length six array of the unit cell information. 139 * @param xtalInfo the input PDBCrystallographicInfo object 140 * @return the length six float array 141 */ 142 public static float[] getUnitCellAsArray(PDBCrystallographicInfo xtalInfo) { 143 CrystalCell xtalCell = xtalInfo.getCrystalCell(); 144 if(xtalCell==null){ 145 return null; 146 }else{ 147 float[] inputUnitCell = new float[6]; 148 inputUnitCell[0] = (float) xtalCell.getA(); 149 inputUnitCell[1] = (float) xtalCell.getB(); 150 inputUnitCell[2] = (float) xtalCell.getC(); 151 inputUnitCell[3] = (float) xtalCell.getAlpha(); 152 inputUnitCell[4] = (float) xtalCell.getBeta(); 153 inputUnitCell[5] = (float) xtalCell.getGamma(); 154 return inputUnitCell; 155 } 156 } 157 158 /** 159 * Converts the set of experimental techniques to an array of strings. 160 * @param experimentalTechniques the input set of experimental techniques 161 * @return the array of strings describing the methods used. 162 */ 163 public static String[] techniquesToStringArray(Set<ExperimentalTechnique> experimentalTechniques) { 164 if(experimentalTechniques==null){ 165 return new String[0]; 166 } 167 String[] outArray = new String[experimentalTechniques.size()]; 168 int index = 0; 169 for (ExperimentalTechnique experimentalTechnique : experimentalTechniques) { 170 outArray[index] = experimentalTechnique.getName(); 171 index++; 172 } 173 return outArray; 174 } 175 176 /** 177 * Covert a Date object to ISO time format. 178 * @param inputDate The input date object 179 * @return the time in ISO time format 180 */ 181 public static String dateToIsoString(Date inputDate) { 182 DateFormat dateStringFormat = new SimpleDateFormat("yyyy-MM-dd"); 183 return dateStringFormat.format(inputDate); 184 } 185 186 /** 187 * Convert a bioassembly information into a map of transform, chainindices it relates to. 188 * @param bioassemblyInfo the bioassembly info object for this structure 189 * @param chainIdToIndexMap the map of chain ids to the index that chain corresponds to. 190 * @return the bioassembly information (as primitive types). 191 */ 192 public static Map<double[], int[]> getTransformMap(BioAssemblyInfo bioassemblyInfo, Map<String, Integer> chainIdToIndexMap) { 193 Map<Matrix4d, List<Integer>> matMap = new LinkedHashMap<>(); 194 List<BiologicalAssemblyTransformation> transforms = bioassemblyInfo.getTransforms(); 195 for (BiologicalAssemblyTransformation transformation : transforms) { 196 Matrix4d transMatrix = transformation.getTransformationMatrix(); 197 String transChainId = transformation.getChainId(); 198 if (!chainIdToIndexMap.containsKey(transChainId)){ 199 continue; 200 } 201 int chainIndex = chainIdToIndexMap.get(transformation.getChainId()); 202 if(matMap.containsKey(transMatrix)){ 203 matMap.get(transMatrix).add(chainIndex); 204 } 205 else{ 206 List<Integer> chainIdList = new ArrayList<>(); 207 chainIdList.add(chainIndex); 208 matMap.put(transMatrix, chainIdList); 209 } 210 } 211 212 Map<double[], int[]> outMap = new LinkedHashMap<>(); 213 for (Entry<Matrix4d, List<Integer>> entry : matMap.entrySet()) { 214 outMap.put(convertToDoubleArray(entry.getKey()), CodecUtils.convertToIntArray(entry.getValue())); 215 } 216 return outMap; 217 } 218 219 /** 220 * Convert a four-d matrix to a double array. Row-packed. 221 * @param transformationMatrix the input matrix4d object 222 * @return the double array (16 long). 223 */ 224 public static double[] convertToDoubleArray(Matrix4d transformationMatrix) { 225 // Initialise the output array 226 double[] outArray = new double[16]; 227 // Iterate over the matrix 228 for(int i=0; i<4; i++){ 229 for(int j=0; j<4; j++){ 230 // Now set this element 231 outArray[i*4+j] = transformationMatrix.getElement(i,j); 232 } 233 } 234 return outArray; 235 } 236 237 /** 238 * Count the total number of groups in the structure 239 * @param structure the input structure 240 * @return the total number of groups 241 */ 242 public static int getNumGroups(Structure structure) { 243 int count = 0; 244 for(int i=0; i<structure.nrModels(); i++) { 245 for(Chain chain : structure.getChains(i)){ 246 count+= chain.getAtomGroups().size(); 247 } 248 } 249 return count; 250 } 251 252 253 /** 254 * Get a list of atoms for a group. Only add each atom once. 255 * @param inputGroup the Biojava Group to consider 256 * @return the atoms for the input Biojava Group 257 */ 258 public static List<Atom> getAtomsForGroup(Group inputGroup) { 259 Set<Atom> uniqueAtoms = new HashSet<>(); 260 List<Atom> theseAtoms = new ArrayList<>(); 261 for(Atom a: inputGroup.getAtoms()){ 262 theseAtoms.add(a); 263 uniqueAtoms.add(a); 264 } 265 List<Group> altLocs = inputGroup.getAltLocs(); 266 for(Group thisG: altLocs){ 267 for(Atom a: thisG.getAtoms()){ 268 if(uniqueAtoms.contains(a)){ 269 continue; 270 } 271 theseAtoms.add(a); 272 } 273 } 274 return theseAtoms; 275 } 276 277 /** 278 * Find the number of bonds in a group 279 * @param atomsInGroup the list of atoms in the group 280 * @return the number of atoms in the group 281 */ 282 public static int getNumBondsInGroup(List<Atom> atomsInGroup) { 283 int bondCounter = 0; 284 for(Atom atom : atomsInGroup) { 285 if(atom.getBonds()==null){ 286 continue; 287 } 288 for(Bond bond : atom.getBonds()) { 289 // Now set the bonding information. 290 Atom other = bond.getOther(atom); 291 // If both atoms are in the group 292 if (atomsInGroup.indexOf(other)!=-1){ 293 Integer firstBondIndex = atomsInGroup.indexOf(atom); 294 Integer secondBondIndex = atomsInGroup.indexOf(other); 295 // Don't add the same bond twice 296 if (firstBondIndex<secondBondIndex){ 297 bondCounter++; 298 } 299 } 300 } 301 } 302 return bondCounter; 303 } 304 305 /** 306 * Get the secondary structure as defined by DSSP. 307 * @param group the input group to be calculated 308 * @return the integer index of the group type. 309 */ 310 public static int getSecStructType(Group group) { 311 SecStrucState props = (SecStrucState) group.getProperty("secstruc"); 312 if(props==null){ 313 return DsspType.NULL_ENTRY.getDsspIndex(); 314 } 315 return DsspType.dsspTypeFromString(props.getType().name).getDsspIndex(); 316 } 317 318 /** 319 * Get the secondary structure as defined by DSSP. 320 * @param group the input group to be calculated 321 * @param dsspIndex integer index of the group type. 322 */ 323 public static void setSecStructType(Group group, int dsspIndex) { 324 SecStrucType secStrucType = getSecStructTypeFromDsspIndex(dsspIndex); 325 SecStrucState secStrucState = new SecStrucState(group, "MMTF_ASSIGNED", secStrucType); 326 if(secStrucType!=null){ 327 group.setProperty("secstruc", secStrucState); 328 } 329 } 330 331 332 /** 333 * Set the DSSP type based on a numerical index. 334 * @param dsspIndex the integer index of the type to set 335 * @return the instance of the SecStrucType object holding this secondary 336 * structure type. 337 */ 338 public static SecStrucType getSecStructTypeFromDsspIndex(int dsspIndex) { 339 String dsspType = DsspType.dsspTypeFromInt(dsspIndex).getDsspType(); 340 for(SecStrucType secStrucType : SecStrucType.values()) 341 { 342 if(dsspType.equals(secStrucType.name)) 343 { 344 return secStrucType; 345 } 346 } 347 // Return a null entry. 348 return null; 349 } 350 351 /** 352 * Get summary information for the structure. 353 * @param structure the structure for which to get the information. 354 */ 355 public static MmtfSummaryDataBean getStructureInfo(Structure structure) { 356 MmtfSummaryDataBean mmtfSummaryDataBean = new MmtfSummaryDataBean(); 357 // Get all the atoms 358 List<Atom> theseAtoms = new ArrayList<>(); 359 List<Chain> allChains = new ArrayList<>(); 360 Map<String, Integer> chainIdToIndexMap = new LinkedHashMap<>(); 361 int chainCounter = 0; 362 int bondCount = 0; 363 mmtfSummaryDataBean.setAllAtoms(theseAtoms); 364 mmtfSummaryDataBean.setAllChains(allChains); 365 mmtfSummaryDataBean.setChainIdToIndexMap(chainIdToIndexMap); 366 for (int i=0; i<structure.nrModels(); i++){ 367 List<Chain> chains = structure.getModel(i); 368 allChains.addAll(chains); 369 for (Chain chain : chains) { 370 String idOne = chain.getId(); 371 if (!chainIdToIndexMap.containsKey(idOne)) { 372 chainIdToIndexMap.put(idOne, chainCounter); 373 } 374 chainCounter++; 375 for (Group g : chain.getAtomGroups()) { 376 for(Atom atom: getAtomsForGroup(g)){ 377 theseAtoms.add(atom); 378 // If both atoms are in the group 379 if (atom.getBonds()!=null){ 380 bondCount+=atom.getBonds().size(); 381 } 382 } 383 } 384 } 385 } 386 // Assumes all bonds are referenced twice 387 mmtfSummaryDataBean.setNumBonds(bondCount/2); 388 return mmtfSummaryDataBean; 389 390 } 391 392 /** 393 * Get a list of N 4*4 matrices from a single list of doubles of length 16*N. 394 * @param ncsOperMatrixList the input list of doubles 395 * @return the list of 4*4 matrics 396 */ 397 public static Matrix4d[] getNcsAsMatrix4d(double[][] ncsOperMatrixList) { 398 if(ncsOperMatrixList==null){ 399 return null; 400 } 401 int numMats = ncsOperMatrixList.length; 402 if(numMats==0){ 403 return null; 404 } 405 if(numMats==1 && ncsOperMatrixList[0].length==0){ 406 return null; 407 } 408 Matrix4d[] outList = new Matrix4d[numMats]; 409 for(int i=0; i<numMats; i++){ 410 outList[i] = new Matrix4d(ncsOperMatrixList[i]); 411 } 412 return outList; 413 } 414 415 /** 416 * Get a list of length N*16 of a list of Matrix4d*N. 417 * @param ncsOperators the {@link Matrix4d} list 418 * @return the list of length N*16 of the list of matrices 419 */ 420 public static double[][] getNcsAsArray(Matrix4d[] ncsOperators) { 421 if(ncsOperators==null){ 422 return new double[0][0]; 423 } 424 double[][] outList = new double[ncsOperators.length][16]; 425 for(int i=0; i<ncsOperators.length;i++){ 426 outList[i] = convertToDoubleArray(ncsOperators[i]); 427 } 428 return outList; 429 } 430 431 /** 432 * Insert the group in the given position in the sequence. 433 * @param chain the chain to add the seq res group to 434 * @param group the group to add 435 * @param sequenceIndexId the index to add it in 436 */ 437 public static void insertSeqResGroup(Chain chain, Group group, int sequenceIndexId) { 438 List<Group> seqResGroups = chain.getSeqResGroups(); 439 addGroupAtId(seqResGroups, group, sequenceIndexId); 440 } 441 442 /** 443 * Add the missing groups to the SeqResGroups. 444 * @param modelChain the chain to add the information for 445 * @param sequence the sequence of the construct 446 */ 447 public static void addSeqRes(Chain modelChain, String sequence) { 448 449 List<Group> seqResGroups = modelChain.getSeqResGroups(); 450 GroupType chainType = getChainType(modelChain.getAtomGroups()); 451 452 for(int i=0; i<sequence.length(); i++){ 453 454 char singleLetterCode = sequence.charAt(i); 455 Group group = null; 456 if (seqResGroups.size() > i) { 457 group=seqResGroups.get(i); 458 } 459 if(group!=null){ 460 continue; 461 } 462 463 group = getSeqResGroup(singleLetterCode, chainType); 464 addGroupAtId(seqResGroups, group, i); 465 } 466 } 467 468 private static GroupType getChainType(List<Group> groups) { 469 for(Group group : groups) { 470 if(group!=null && group.getType()!=GroupType.HETATM){ 471 return group.getType(); 472 } 473 } 474 return GroupType.HETATM; 475 } 476 477 private static <T> void addGroupAtId(List<T> seqResGroups, T group, int sequenceIndexId) { 478 while(seqResGroups.size()<=sequenceIndexId){ 479 seqResGroups.add(null); 480 } 481 if(sequenceIndexId>=0){ 482 seqResGroups.set(sequenceIndexId, group); 483 } 484 } 485 486 private static Group getSeqResGroup(char singleLetterCode, GroupType type) { 487 488 if(type==GroupType.AMINOACID){ 489 String threeLetter = ChemCompTools.getAminoThreeLetter(singleLetterCode); 490 if (threeLetter == null) return null; 491 ChemComp chemComp = ChemCompGroupFactory.getChemComp(threeLetter); 492 493 AminoAcidImpl a = new AminoAcidImpl(); 494 a.setRecordType(AminoAcid.SEQRESRECORD); 495 a.setAminoType(singleLetterCode); 496 a.setPDBName(threeLetter); 497 a.setChemComp(chemComp); 498 return a; 499 500 } else if (type==GroupType.NUCLEOTIDE) { 501 String twoLetter = ChemCompTools.getDNATwoLetter(singleLetterCode); 502 if (twoLetter == null) return null; 503 ChemComp chemComp = ChemCompGroupFactory.getChemComp(twoLetter); 504 505 NucleotideImpl n = new NucleotideImpl(); 506 n.setPDBName(twoLetter); 507 n.setChemComp(chemComp); 508 return n; 509 } 510 else{ 511 return null; 512 } 513 } 514}