001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io.mmtf; 022 023import java.io.Serializable; 024import java.text.ParseException; 025import java.text.SimpleDateFormat; 026import java.util.ArrayList; 027import java.util.Collections; 028import java.util.Date; 029import java.util.HashMap; 030import java.util.LinkedHashMap; 031import java.util.List; 032import java.util.Map; 033 034import javax.vecmath.Matrix4d; 035 036import org.biojava.nbio.structure.AminoAcid; 037import org.biojava.nbio.structure.AminoAcidImpl; 038import org.biojava.nbio.structure.Atom; 039import org.biojava.nbio.structure.AtomImpl; 040import org.biojava.nbio.structure.BondImpl; 041import org.biojava.nbio.structure.Chain; 042import org.biojava.nbio.structure.ChainImpl; 043import org.biojava.nbio.structure.Element; 044import org.biojava.nbio.structure.EntityInfo; 045import org.biojava.nbio.structure.EntityType; 046import org.biojava.nbio.structure.Group; 047import org.biojava.nbio.structure.HetatomImpl; 048import org.biojava.nbio.structure.NucleotideImpl; 049import org.biojava.nbio.structure.PDBCrystallographicInfo; 050import org.biojava.nbio.structure.PDBHeader; 051import org.biojava.nbio.structure.PdbId; 052import org.biojava.nbio.structure.Structure; 053import org.biojava.nbio.structure.StructureImpl; 054import org.biojava.nbio.structure.StructureTools; 055import org.biojava.nbio.structure.chem.ChemComp; 056import org.biojava.nbio.structure.chem.PolymerType; 057import org.biojava.nbio.structure.chem.ResidueType; 058import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; 059import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 060import org.biojava.nbio.structure.xtal.CrystalCell; 061import org.biojava.nbio.structure.xtal.SpaceGroup; 062import org.rcsb.mmtf.api.StructureAdapterInterface; 063import org.rcsb.mmtf.dataholders.MmtfStructure; 064import org.slf4j.Logger; 065import org.slf4j.LoggerFactory; 066 067 068/** 069 * A biojava specific structure inflator for MMTF. 070 * Should be ported to biojava code. 071 * 072 * @author Anthony Bradley 073 * @since 5.0 074 * 075 */ 076public class MmtfStructureReader implements StructureAdapterInterface, Serializable { 077 078 /** The Constant serialVersionUID. */ 079 private static final long serialVersionUID = 6772030485225130853L; 080 081 /** The logger */ 082 private static final Logger logger = LoggerFactory.getLogger(MmtfStructureReader.class); 083 084 /** The structure. */ 085 private Structure structure; 086 087 /** The model number. */ 088 private int modelNumber; 089 090 /** The chain. */ 091 private Chain chain; 092 093 /** The group. */ 094 private Group group; 095 096 /** The atoms in a group. */ 097 private List<Atom> atomsInGroup; 098 099 /** All the atoms. */ 100 private Atom[] allAtoms; 101 private int atomCounter; 102 103 /** The list of EntityInformation */ 104 private List<EntityInfo> entityInfoList; 105 106 /** All the chains */ 107 private List<Chain> chainList; 108 109 /** All the chains as a list of maps */ 110 private List<Map<String,Chain>> chainMap; 111 112 private List<double[]> transformList; 113 114 private int bioassIndex; 115 116 private Map<String,String> chainSequenceMap; 117 118 /** 119 * Instantiates a new bio java structure decoder. 120 */ 121 public MmtfStructureReader() { 122 structure = new StructureImpl(); 123 modelNumber = 0; 124 entityInfoList = new ArrayList<>(); 125 chainList = new ArrayList<>(); 126 chainMap = new ArrayList<>(); 127 transformList = new ArrayList<>(); 128 chainSequenceMap = new HashMap<>(); 129 } 130 131 /** 132 * Gets the structure. 133 * 134 * @return the structure 135 */ 136 public Structure getStructure() { 137 return structure; 138 } 139 140 @Override 141 public void finalizeStructure() { 142 // Number the remaining ones 143 int counter =0; 144 // Add the entity info 145 for (EntityInfo entityInfo : entityInfoList) { 146 counter++; 147 entityInfo.setMolId(counter); 148 } 149 structure.setEntityInfos(entityInfoList); 150 // Add the actual chains 151 for(int i=0; i<chainMap.size(); i++) { 152 // Now add the chain information 153 Map<String, Chain> modelChainMap = chainMap.get(i); 154 for(Chain modelChain : modelChainMap.values()){ 155 structure.addChain(modelChain, i); 156 String sequence = chainSequenceMap.get(modelChain.getId()); 157 if (sequence == null) { 158 logger.warn("Sequence is null for chain with asym_id {}. Most likely the chain is non-polymeric. Will not add seqres groups for it.", modelChain.getId()); 159 continue; 160 } 161 MmtfUtils.addSeqRes(modelChain, sequence); 162 } 163 } 164 StructureTools.cleanUpAltLocs(structure); 165 } 166 167 @Override 168 public void initStructure(int totalNumBonds, int totalNumAtoms, int totalNumGroups, 169 int totalNumChains, int totalNumModels, String modelId) { 170 if (modelId != null) { 171 structure.setPdbId(new PdbId(modelId)); 172 } 173 allAtoms = new Atom[totalNumAtoms]; 174 } 175 176 177 /* (non-Javadoc) 178 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#setModelInfo(int, int) 179 */ 180 @Override 181 public void setModelInfo(int inputModelNumber, 182 int chainCount) { 183 modelNumber = inputModelNumber; 184 structure.addModel(new ArrayList<Chain>(chainCount)); 185 chainMap.add(new LinkedHashMap<>()); 186 } 187 188 /* (non-Javadoc) 189 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface 190 * #setChainInfo(java.lang.String, int) 191 */ 192 @Override 193 public void setChainInfo(String chainId, String chainName, int groupCount) { 194 // First check to see if the chain exists 195 Map<String, Chain> modelChainMap = chainMap.get(modelNumber); 196 if(modelChainMap.containsKey(chainId)){ 197 chain = modelChainMap.get(chainId); 198 } 199 // If we need to set a new chain do this 200 else{ 201 chain = new ChainImpl(); 202 chain.setId(chainId.trim()); 203 chain.setName(chainName); 204 chain.setAtomGroups(new ArrayList<>(groupCount)); 205 modelChainMap.put(chainId, chain); 206 chainList.add(chain); 207 } 208 } 209 210 211 /* (non-Javadoc) 212 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface 213 * #setGroupInfo(java.lang.String, int, char, int, int) 214 */ 215 @Override 216 public void setGroupInfo(String groupName, int groupNumber, 217 char insertionCode, String chemCompType, int atomCount, int bondCount, 218 char singleLetterCode, int sequenceIndexId, int secStructType) { 219 // Get the polymer type 220 ResidueType residueType = ResidueType.getResidueTypeFromString(chemCompType); 221 if (residueType == null) 222 throw new IllegalStateException("Couldn't resolve residue type for "+ chemCompType); 223 224 int polymerType = getGroupTypIndicator(residueType.polymerType); 225 switch (polymerType) { 226 case 1: 227 AminoAcid aa = new AminoAcidImpl(); 228 // Now set the one letter code 229 aa.setAminoType(singleLetterCode); 230 group = aa; 231 break; 232 case 2: 233 group = new NucleotideImpl(); 234 break; 235 default: 236 group = new HetatomImpl(); 237 break; 238 } 239 atomsInGroup = new ArrayList<>(); 240 ChemComp chemComp = new ChemComp(); 241 chemComp.setOneLetterCode(String.valueOf(singleLetterCode)); 242 chemComp.setType(chemCompType.toUpperCase()); 243 chemComp.setResidueType(residueType); 244 chemComp.setPolymerType(residueType.polymerType); 245 group.setChemComp(chemComp); 246 group.setPDBName(groupName); 247 if (insertionCode == MmtfStructure.UNAVAILABLE_CHAR_VALUE) { 248 group.setResidueNumber(chain.getName().trim(), groupNumber, null); 249 } else { 250 group.setResidueNumber(chain.getName().trim(), 251 groupNumber, insertionCode); 252 } 253 group.setAtoms(new ArrayList<>(atomCount)); 254 if (polymerType==1 || polymerType==2) { 255 MmtfUtils.insertSeqResGroup(chain, group, sequenceIndexId); 256 } 257 if (atomCount > 0) { 258 chain.addGroup(group); 259 } 260 MmtfUtils.setSecStructType(group, secStructType); 261 } 262 263 /** 264 * 265 * @return 266 */ 267 private Group getGroupWithSameResNumButDiffPDBName() { 268 // If this chain already has this group number 269 for (Group g : chain.getAtomGroups() ) { 270 if (g.getResidueNumber().equals(group.getResidueNumber())) { 271 if( ! g.getPDBName().equals(group.getPDBName() )){ 272 return g; 273 } 274 } 275 } 276 return null; 277 } 278 279 /* (non-Javadoc) 280 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface# 281 * setAtomInfo(java.lang.String, int, char, float, float, 282 * float, float, float, java.lang.String, int) 283 */ 284 @Override 285 public void setAtomInfo(String atomName, 286 int serialNumber, char alternativeLocationId, float x, 287 float y, float z, float occupancy, 288 float temperatureFactor, 289 String element, int charge) { 290 Atom atom = new AtomImpl(); 291 Group altGroup = null; 292 atom.setPDBserial(serialNumber); 293 atom.setName(atomName.trim()); 294 atom.setElement(Element.valueOfIgnoreCase(element)); 295 if(alternativeLocationId==MmtfStructure.UNAVAILABLE_CHAR_VALUE){ 296 alternativeLocationId = ' '; 297 } 298 if (alternativeLocationId != ' ') { 299 // Get the altGroup 300 altGroup = getCorrectAltLocGroup(alternativeLocationId); 301 atom.setAltLoc(alternativeLocationId); 302 } else { 303 atom.setAltLoc(Character.valueOf(' ')); 304 } 305 atom.setX(x); 306 atom.setY(y); 307 atom.setZ(z); 308 atom.setOccupancy(occupancy); 309 atom.setTempFactor(temperatureFactor); 310 atom.setCharge((short) charge); 311 if (altGroup == null) { 312 group.addAtom(atom); 313 } else { 314 altGroup.setChain(chain); 315 altGroup.addAtom(atom); 316 } 317 318 // IF the main group doesn't have this atom 319 if (!group.hasAtom(atom.getName())) { 320 321 // If it's not a microheterogenity case 322 if (group.getPDBName().equals(atom.getGroup().getPDBName())) { 323 // And it's not a deuterated case. 'nanoheterogenity' 324 if(!StructureTools.hasNonDeuteratedEquiv(atom,group)){ 325 group.addAtom(atom); 326 } 327 } 328 } 329 atomsInGroup.add(atom); 330 allAtoms[atomCounter] = atom; 331 atomCounter++; 332 } 333 334 /* (non-Javadoc) 335 * @see org.rcsb.mmtf.decoder.StructureDecoderInter 336 * face#setGroupBonds(int, int, int) 337 */ 338 @Override 339 public void setGroupBond(int indOne, int indTwo, int bondOrder) { 340 341 // Get the atoms 342 Atom atomOne = atomsInGroup.get(indOne); 343 Atom atomTwo = atomsInGroup.get(indTwo); 344 345 // set the new bond 346 new BondImpl(atomOne, atomTwo, bondOrder); 347 348 } 349 350 /* (non-Javadoc) 351 * @see org.rcsb.mmtf.decoder.StructureDecoder 352 * Interface#setInterGroupBonds(int, int, int) 353 */ 354 @Override 355 public void setInterGroupBond(int indOne, int indTwo, int bondOrder) { 356 357 // Get the atoms 358 Atom atomOne = allAtoms[indOne]; 359 Atom atomTwo = allAtoms[indTwo]; 360 361 // set the new bond (this 362 new BondImpl(atomOne, atomTwo, bondOrder); 363 } 364 365 366 /** 367 * Generates Alternate location groups. 368 * 369 * @param altLoc the alt loc 370 * @return the correct alt loc group 371 */ 372 private Group getCorrectAltLocGroup(Character altLoc) { 373 // see if we know this altLoc already; 374 List<Atom> atoms = group.getAtoms(); 375 if (atoms.size() > 0) { 376 Atom a1 = atoms.get(0); 377 // we are just adding atoms to the current group 378 // probably there is a second group following later... 379 if (a1.getAltLoc().equals(altLoc)) { 380 return group; } 381 } 382 383 // Get the altLocGroup 384 Group altLocgroup = group.getAltLocGroup(altLoc); 385 if (altLocgroup != null) { 386 return altLocgroup; 387 } 388 // If the group already exists (microheterogenity). 389 Group oldGroup = getGroupWithSameResNumButDiffPDBName(); 390 if (oldGroup!= null){ 391 Group altLocG = group; 392 group = oldGroup; 393 group.addAltLoc(altLocG); 394 chain.getAtomGroups().remove(altLocG); 395 return altLocG; 396 } 397 // no matching altLoc group found. 398 // build it up. 399 if (group.getAtoms().size() == 0) { 400 return group; 401 } 402 Group altLocG = (Group) group.clone(); 403 // drop atoms from cloned group... 404 // https://redmine.open-bio.org/issues/3307 405 altLocG.setAtoms(new ArrayList<Atom>()); 406 altLocG.getAltLocs().clear(); 407 group.addAltLoc(altLocG); 408 return altLocG; 409 410 } 411 412 413 /* (non-Javadoc) 414 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface# 415 * setXtalInfo(java.lang.String, java.util.List) 416 */ 417 @Override 418 public void setXtalInfo(String spaceGroupString, float[] unitCell, double[][] ncsOperMatrixList) { 419 // Now set the xtalographic information 420 PDBCrystallographicInfo pci = new PDBCrystallographicInfo(); 421 SpaceGroup spaceGroup = SpaceGroup.parseSpaceGroup(spaceGroupString); 422 pci.setSpaceGroup(spaceGroup); 423 if (unitCell.length > 0) { 424 CrystalCell cell = new CrystalCell(unitCell[0], unitCell[1], 425 unitCell[2], unitCell[3], unitCell[4], unitCell[5]); 426 pci.setCrystalCell(cell); 427 structure.setCrystallographicInfo(pci); 428 } 429 430 pci.setNcsOperators(MmtfUtils.getNcsAsMatrix4d(ncsOperMatrixList)); 431 } 432 433 434 /** 435 * Get the type of group (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0) 436 * @param polymerType 437 * @return The type of group. (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0) 438 */ 439 private int getGroupTypIndicator(PolymerType polymerType) { 440 if(PolymerType.PROTEIN_ONLY.contains(polymerType)){ 441 return 1; 442 } 443 if(PolymerType.POLYNUCLEOTIDE_ONLY.contains(polymerType)){ 444 return 2; 445 } 446 return 0; 447 } 448 449 450 @Override 451 public void setBioAssemblyTrans(int bioAssemblyId, int[] inputChainIndices, double[] inputTransform, String name) { 452 // Biojava uses this as a one indexed id. 453 bioAssemblyId++; 454 if(bioassIndex!=bioAssemblyId){ 455 transformList = new ArrayList<>(); 456 bioassIndex = bioAssemblyId; 457 } 458 PDBHeader pdbHeader = structure.getPDBHeader(); 459 // Get the bioassembly data 460 Map<Integer, BioAssemblyInfo> bioAssemblies = pdbHeader.getBioAssemblies(); 461 // Get the bioassembly itself (if it exists 462 BioAssemblyInfo bioAssInfo; 463 if (bioAssemblies.containsKey(bioAssemblyId)){ 464 bioAssInfo = bioAssemblies.get(bioAssemblyId); 465 } 466 else{ 467 bioAssInfo = new BioAssemblyInfo(); 468 bioAssInfo.setTransforms(new ArrayList<BiologicalAssemblyTransformation>()); 469 bioAssemblies.put(bioAssemblyId, bioAssInfo); 470 bioAssInfo.setId(bioAssemblyId); 471 } 472 473 for(int currChainIndex : inputChainIndices){ 474 BiologicalAssemblyTransformation bioAssTrans = new BiologicalAssemblyTransformation(); 475 Integer transId = transformList.indexOf(inputTransform)+1; 476 if(transId==0){ 477 transformList.add(inputTransform); 478 transId = transformList.indexOf(inputTransform)+1; 479 } 480 bioAssTrans.setId(transId.toString()); 481 // If it actually has an index - if it doesn't it is because the chain has no density. 482 if (currChainIndex!=-1){ 483 bioAssTrans.setChainId(chainList.get(currChainIndex).getId()); 484 } 485 else { 486 continue; 487 } 488 // Now set matrix 489 Matrix4d mat4d = new Matrix4d(inputTransform); 490 bioAssTrans.setTransformationMatrix(mat4d); 491 // Now add this 492 bioAssInfo.getTransforms().add(bioAssTrans); 493 // sort transformations into a unique order 494 Collections.sort(bioAssInfo.getTransforms()); 495 } 496 } 497 498 @Override 499 public void setEntityInfo(int[] chainIndices, String sequence, String description, String type) { 500 // First get the chains 501 EntityInfo entityInfo = new EntityInfo(); 502 entityInfo.setDescription(description); 503 entityInfo.setType(EntityType.entityTypeFromString(type)); 504 List<Chain> chains = new ArrayList<>(); 505 // Now loop through the chain ids and make a list of them 506 for( int index : chainIndices) { 507 chains.add(chainList.get(index)); 508 chainList.get(index).setEntityInfo(entityInfo); 509 chainSequenceMap.put(chainList.get(index).getId(), sequence); 510 } 511 entityInfo.setChains(chains); 512 entityInfoList.add(entityInfo); 513 } 514 515 @Override 516 public void setHeaderInfo(float rFree, float rWork, float resolution, String title, String depositionDate, 517 String releaseDate, String[] experimentalMethods) { 518 SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); 519 // Get the pdb header 520 PDBHeader pdbHeader = structure.getPDBHeader(); 521 pdbHeader.setTitle(title); 522 pdbHeader.setResolution(resolution); 523 pdbHeader.setRfree(rFree); 524 pdbHeader.setRwork(rWork); 525 // Now loop through the techniques and add them in 526 if (experimentalMethods!=null) { 527 for (String techniqueStr : experimentalMethods) { 528 pdbHeader.setExperimentalTechnique(techniqueStr); 529 } 530 } 531 // Set the dates 532 if(depositionDate!=null){ 533 try { 534 Date depDate = formatter.parse(depositionDate); 535 pdbHeader.setDepDate(depDate); 536 } catch (ParseException e) { 537 logger.warn("Could not parse date string '{}', depositon date will be unavailable", depositionDate); 538 } 539 } 540 else{ 541 pdbHeader.setDepDate(new Date(0)); 542 } 543 if(releaseDate!=null){ 544 try { 545 Date relDate = formatter.parse(releaseDate); 546 pdbHeader.setRelDate(relDate); 547 } catch (ParseException e) { 548 logger.warn("Could not parse date string '{}', release date will be unavailable", releaseDate); 549 } 550 } 551 else{ 552 pdbHeader.setRelDate(new Date(0)); 553 } 554 } 555}