001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io.mmtf; 022 023import java.io.Serializable; 024import java.text.ParseException; 025import java.text.SimpleDateFormat; 026import java.util.ArrayList; 027import java.util.Collections; 028import java.util.Date; 029import java.util.HashMap; 030import java.util.List; 031import java.util.Map; 032 033import javax.vecmath.Matrix4d; 034 035import org.biojava.nbio.structure.AminoAcid; 036import org.biojava.nbio.structure.AminoAcidImpl; 037import org.biojava.nbio.structure.Atom; 038import org.biojava.nbio.structure.AtomImpl; 039import org.biojava.nbio.structure.BondImpl; 040import org.biojava.nbio.structure.Chain; 041import org.biojava.nbio.structure.ChainImpl; 042import org.biojava.nbio.structure.Element; 043import org.biojava.nbio.structure.EntityInfo; 044import org.biojava.nbio.structure.EntityType; 045import org.biojava.nbio.structure.Group; 046import org.biojava.nbio.structure.HetatomImpl; 047import org.biojava.nbio.structure.NucleotideImpl; 048import org.biojava.nbio.structure.PDBCrystallographicInfo; 049import org.biojava.nbio.structure.PDBHeader; 050import org.biojava.nbio.structure.Structure; 051import org.biojava.nbio.structure.StructureImpl; 052import org.biojava.nbio.structure.StructureTools; 053import org.biojava.nbio.structure.io.mmcif.chem.PolymerType; 054import org.biojava.nbio.structure.io.mmcif.chem.ResidueType; 055import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 056import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; 057import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 058import org.biojava.nbio.structure.xtal.CrystalCell; 059import org.biojava.nbio.structure.xtal.SpaceGroup; 060import org.rcsb.mmtf.api.StructureAdapterInterface; 061import org.rcsb.mmtf.dataholders.MmtfStructure; 062import org.slf4j.Logger; 063import org.slf4j.LoggerFactory; 064 065 066/** 067 * A biojava specific structure inflator for MMTF. 068 * Should be ported to biojava code. 069 * 070 * @author Anthony Bradley 071 */ 072public class MmtfStructureReader implements StructureAdapterInterface, Serializable { 073 074 /** The Constant serialVersionUID. */ 075 private static final long serialVersionUID = 6772030485225130853L; 076 077 /** The logger */ 078 private static final Logger logger = LoggerFactory.getLogger(MmtfStructureReader.class); 079 080 /** The structure. */ 081 private Structure structure; 082 083 /** The model number. */ 084 private int modelNumber; 085 086 /** The chain. */ 087 private Chain chain; 088 089 /** The group. */ 090 private Group group; 091 092 /** The atoms in a group. */ 093 private List<Atom> atomsInGroup; 094 095 /** All the atoms. */ 096 private Atom[] allAtoms; 097 private int atomCounter; 098 099 /** The list of EntityInformation */ 100 private List<EntityInfo> entityInfoList; 101 102 /** All the chains */ 103 private List<Chain> chainList; 104 105 /** All the chains as a list of maps */ 106 private List<Map<String,Chain>> chainMap; 107 108 private List<double[]> transformList; 109 110 private int bioassIndex; 111 112 private Map<String,String> chainSequenceMap; 113 114 /** 115 * Instantiates a new bio java structure decoder. 116 */ 117 public MmtfStructureReader() { 118 structure = new StructureImpl(); 119 modelNumber = 0; 120 entityInfoList = new ArrayList<>(); 121 chainList = new ArrayList<>(); 122 chainMap = new ArrayList<>(); 123 transformList = new ArrayList<>(); 124 chainSequenceMap = new HashMap<>(); 125 } 126 127 /** 128 * Gets the structure. 129 * 130 * @return the structure 131 */ 132 public Structure getStructure() { 133 return structure; 134 } 135 136 @Override 137 public void finalizeStructure() { 138 // Number the remaining ones 139 int counter =0; 140 // Add the entity info 141 for (EntityInfo entityInfo : entityInfoList) { 142 counter++; 143 entityInfo.setMolId(counter); 144 } 145 structure.setEntityInfos(entityInfoList); 146 // Add the actual chains 147 for(int i=0; i<chainMap.size(); i++) { 148 // Now add the chain information 149 Map<String, Chain> modelChainMap = chainMap.get(i); 150 for(Chain modelChain : modelChainMap.values()){ 151 structure.addChain(modelChain, i); 152 String sequence = chainSequenceMap.get(modelChain.getId()); 153 if (sequence == null) { 154 logger.warn("Sequence is null for chain with asym_id {}. Most likely the chain is non-polymeric. Will not add seqres groups for it.", modelChain.getId()); 155 continue; 156 } 157 MmtfUtils.addSeqRes(modelChain, sequence); 158 } 159 } 160 StructureTools.cleanUpAltLocs(structure); 161 } 162 163 @Override 164 public void initStructure(int totalNumBonds, int totalNumAtoms, int totalNumGroups, 165 int totalNumChains, int totalNumModels, String modelId) { 166 structure.setPDBCode(modelId); 167 allAtoms = new Atom[totalNumAtoms]; 168 } 169 170 171 /* (non-Javadoc) 172 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#setModelInfo(int, int) 173 */ 174 @Override 175 public void setModelInfo(int inputModelNumber, 176 int chainCount) { 177 modelNumber = inputModelNumber; 178 structure.addModel(new ArrayList<Chain>(chainCount)); 179 chainMap.add(new HashMap<>()); 180 } 181 182 /* (non-Javadoc) 183 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface 184 * #setChainInfo(java.lang.String, int) 185 */ 186 @Override 187 public void setChainInfo(String chainId, String chainName, int groupCount) { 188 // First check to see if the chain exists 189 Map<String, Chain> modelChainMap = chainMap.get(modelNumber); 190 if(modelChainMap.containsKey(chainId)){ 191 chain = modelChainMap.get(chainId); 192 } 193 // If we need to set a new chain do this 194 else{ 195 chain = new ChainImpl(); 196 chain.setId(chainId.trim()); 197 chain.setName(chainName); 198 chain.setAtomGroups(new ArrayList<>(groupCount)); 199 modelChainMap.put(chainId, chain); 200 chainList.add(chain); 201 } 202 } 203 204 205 /* (non-Javadoc) 206 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface 207 * #setGroupInfo(java.lang.String, int, char, int, int) 208 */ 209 @Override 210 public void setGroupInfo(String groupName, int groupNumber, 211 char insertionCode, String chemCompType, int atomCount, int bondCount, 212 char singleLetterCode, int sequenceIndexId, int secStructType) { 213 // Get the polymer type 214 ResidueType residueType = ResidueType.getResidueTypeFromString(chemCompType); 215 int polymerType = getGroupTypIndicator(residueType.polymerType); 216 switch (polymerType) { 217 case 1: 218 AminoAcid aa = new AminoAcidImpl(); 219 // Now set the one letter code 220 aa.setAminoType(singleLetterCode); 221 group = aa; 222 break; 223 case 2: 224 group = new NucleotideImpl(); 225 break; 226 default: 227 group = new HetatomImpl(); 228 break; 229 } 230 atomsInGroup = new ArrayList<Atom>(); 231 ChemComp chemComp = new ChemComp(); 232 chemComp.setOne_letter_code(String.valueOf(singleLetterCode)); 233 chemComp.setType(chemCompType.toUpperCase()); 234 chemComp.setResidueType(residueType); 235 chemComp.setPolymerType(residueType.polymerType); 236 group.setChemComp(chemComp); 237 group.setPDBName(groupName); 238 if (insertionCode == MmtfStructure.UNAVAILABLE_CHAR_VALUE) { 239 group.setResidueNumber(chain.getName().trim(), groupNumber, null); 240 } else { 241 group.setResidueNumber(chain.getName().trim(), 242 groupNumber, insertionCode); 243 } 244 group.setAtoms(new ArrayList<Atom>(atomCount)); 245 if (polymerType==1 || polymerType==2) { 246 MmtfUtils.insertSeqResGroup(chain, group, sequenceIndexId); 247 } 248 if (atomCount > 0) { 249 chain.addGroup(group); 250 } 251 MmtfUtils.setSecStructType(group, secStructType); 252 } 253 254 /** 255 * 256 * @return 257 */ 258 private Group getGroupWithSameResNumButDiffPDBName() { 259 // If this chain already has this group number 260 for (Group g : chain.getAtomGroups() ) { 261 if (g.getResidueNumber().equals(group.getResidueNumber())) { 262 if( ! g.getPDBName().equals(group.getPDBName() )){ 263 return g; 264 } 265 } 266 } 267 return null; 268 } 269 270 /* (non-Javadoc) 271 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface# 272 * setAtomInfo(java.lang.String, int, char, float, float, 273 * float, float, float, java.lang.String, int) 274 */ 275 @Override 276 public void setAtomInfo(String atomName, 277 int serialNumber, char alternativeLocationId, float x, 278 float y, float z, float occupancy, 279 float temperatureFactor, 280 String element, int charge) { 281 Atom atom = new AtomImpl(); 282 Group altGroup = null; 283 atom.setPDBserial(serialNumber); 284 atom.setName(atomName.trim()); 285 atom.setElement(Element.valueOfIgnoreCase(element)); 286 if(alternativeLocationId==MmtfStructure.UNAVAILABLE_CHAR_VALUE){ 287 alternativeLocationId = ' '; 288 } 289 if (alternativeLocationId != ' ') { 290 // Get the altGroup 291 altGroup = getCorrectAltLocGroup(alternativeLocationId); 292 atom.setAltLoc(alternativeLocationId); 293 } else { 294 atom.setAltLoc(Character.valueOf(' ')); 295 } 296 atom.setX(x); 297 atom.setY(y); 298 atom.setZ(z); 299 atom.setOccupancy(occupancy); 300 atom.setTempFactor(temperatureFactor); 301 atom.setCharge((short) charge); 302 if (altGroup == null) { 303 group.addAtom(atom); 304 } else { 305 altGroup.setChain(chain); 306 altGroup.addAtom(atom); 307 } 308 309 // IF the main group doesn't have this atom 310 if (!group.hasAtom(atom.getName())) { 311 312 // If it's not a microheterogenity case 313 if (group.getPDBName().equals(atom.getGroup().getPDBName())) { 314 // And it's not a deuterated case. 'nanoheterogenity' 315 if(!StructureTools.hasNonDeuteratedEquiv(atom,group)){ 316 group.addAtom(atom); 317 } 318 } 319 } 320 atomsInGroup.add(atom); 321 allAtoms[atomCounter] = atom; 322 atomCounter++; 323 } 324 325 /* (non-Javadoc) 326 * @see org.rcsb.mmtf.decoder.StructureDecoderInter 327 * face#setGroupBonds(int, int, int) 328 */ 329 @Override 330 public void setGroupBond(int indOne, 331 int indTwo, int bondOrder) { 332 // Get the atom 333 Atom atomOne = atomsInGroup.get(indOne); 334 Atom atomTwo = atomsInGroup.get(indTwo); 335 // set the new bond 336 @SuppressWarnings("unused") 337 BondImpl bond = new BondImpl(atomOne, atomTwo, bondOrder); 338 } 339 340 /* (non-Javadoc) 341 * @see org.rcsb.mmtf.decoder.StructureDecoder 342 * Interface#setInterGroupBonds(int, int, int) 343 */ 344 @Override 345 public void setInterGroupBond(int indOne, 346 int indTwo, int bondOrder) { 347 // Get the atom 348 Atom atomOne = allAtoms[indOne]; 349 Atom atomTwo = allAtoms[indTwo]; 350 // set the new bond 351 @SuppressWarnings("unused") 352 BondImpl bond = new BondImpl(atomOne, atomTwo, bondOrder); 353 } 354 355 356 /** 357 * Generates Alternate location groups. 358 * 359 * @param altLoc the alt loc 360 * @return the correct alt loc group 361 */ 362 private Group getCorrectAltLocGroup(Character altLoc) { 363 // see if we know this altLoc already; 364 List<Atom> atoms = group.getAtoms(); 365 if (atoms.size() > 0) { 366 Atom a1 = atoms.get(0); 367 // we are just adding atoms to the current group 368 // probably there is a second group following later... 369 if (a1.getAltLoc().equals(altLoc)) { 370 return group; } 371 } 372 373 // Get the altLocGroup 374 Group altLocgroup = group.getAltLocGroup(altLoc); 375 if (altLocgroup != null) { 376 return altLocgroup; 377 } 378 // If the group already exists (microheterogenity). 379 Group oldGroup = getGroupWithSameResNumButDiffPDBName(); 380 if (oldGroup!= null){ 381 Group altLocG = group; 382 group = oldGroup; 383 group.addAltLoc(altLocG); 384 chain.getAtomGroups().remove(altLocG); 385 return altLocG; 386 } 387 // no matching altLoc group found. 388 // build it up. 389 if (group.getAtoms().size() == 0) { 390 return group; 391 } 392 Group altLocG = (Group) group.clone(); 393 // drop atoms from cloned group... 394 // https://redmine.open-bio.org/issues/3307 395 altLocG.setAtoms(new ArrayList<Atom>()); 396 altLocG.getAltLocs().clear(); 397 group.addAltLoc(altLocG); 398 return altLocG; 399 400 } 401 402 403 /* (non-Javadoc) 404 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface# 405 * setXtalInfo(java.lang.String, java.util.List) 406 */ 407 @Override 408 public void setXtalInfo(String spaceGroupString, float[] unitCell, double[][] ncsOperMatrixList) { 409 // Now set the xtalographic information 410 PDBCrystallographicInfo pci = new PDBCrystallographicInfo(); 411 SpaceGroup spaceGroup = SpaceGroup.parseSpaceGroup(spaceGroupString); 412 pci.setSpaceGroup(spaceGroup); 413 if (unitCell.length > 0) { 414 CrystalCell cell = new CrystalCell(unitCell[0], unitCell[1], 415 unitCell[2], unitCell[3], unitCell[4], unitCell[5]); 416 pci.setCrystalCell(cell); 417 structure.setCrystallographicInfo(pci); 418 } 419 420 pci.setNcsOperators(MmtfUtils.getNcsAsMatrix4d(ncsOperMatrixList)); 421 } 422 423 424 /** 425 * Get the type of group (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0) 426 * @param polymerType 427 * @return The type of group. (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0) 428 */ 429 private int getGroupTypIndicator(PolymerType polymerType) { 430 if(PolymerType.PROTEIN_ONLY.contains(polymerType)){ 431 return 1; 432 } 433 if(PolymerType.POLYNUCLEOTIDE_ONLY.contains(polymerType)){ 434 return 2; 435 } 436 return 0; 437 } 438 439 440 @Override 441 public void setBioAssemblyTrans(int bioAssemblyId, int[] inputChainIndices, double[] inputTransform, String name) { 442 // Biojava uses this as a one indexed id. 443 bioAssemblyId++; 444 if(bioassIndex!=bioAssemblyId){ 445 transformList = new ArrayList<>(); 446 bioassIndex = bioAssemblyId; 447 } 448 PDBHeader pdbHeader = structure.getPDBHeader(); 449 // Get the bioassembly data 450 Map<Integer, BioAssemblyInfo> bioAssemblies = pdbHeader.getBioAssemblies(); 451 // Get the bioassembly itself (if it exists 452 BioAssemblyInfo bioAssInfo; 453 if (bioAssemblies.containsKey(bioAssemblyId)){ 454 bioAssInfo = bioAssemblies.get(bioAssemblyId); 455 } 456 else{ 457 bioAssInfo = new BioAssemblyInfo(); 458 bioAssInfo.setTransforms(new ArrayList<BiologicalAssemblyTransformation>()); 459 bioAssemblies.put(bioAssemblyId, bioAssInfo); 460 bioAssInfo.setId(bioAssemblyId); 461 } 462 463 for(int currChainIndex : inputChainIndices){ 464 BiologicalAssemblyTransformation bioAssTrans = new BiologicalAssemblyTransformation(); 465 Integer transId = transformList.indexOf(inputTransform)+1; 466 if(transId==0){ 467 transformList.add(inputTransform); 468 transId = transformList.indexOf(inputTransform)+1; 469 } 470 bioAssTrans.setId(transId.toString()); 471 // If it actually has an index - if it doesn't it is because the chain has no density. 472 if (currChainIndex!=-1){ 473 bioAssTrans.setChainId(chainList.get(currChainIndex).getId()); 474 } 475 else { 476 continue; 477 } 478 // Now set matrix 479 Matrix4d mat4d = new Matrix4d(inputTransform); 480 bioAssTrans.setTransformationMatrix(mat4d); 481 // Now add this 482 bioAssInfo.getTransforms().add(bioAssTrans); 483 // sort transformations into a unique order 484 Collections.sort(bioAssInfo.getTransforms()); 485 } 486 } 487 488 @Override 489 public void setEntityInfo(int[] chainIndices, String sequence, String description, String type) { 490 // First get the chains 491 EntityInfo entityInfo = new EntityInfo(); 492 entityInfo.setDescription(description); 493 entityInfo.setType(EntityType.entityTypeFromString(type)); 494 List<Chain> chains = new ArrayList<>(); 495 // Now loop through the chain ids and make a list of them 496 for( int index : chainIndices) { 497 chains.add(chainList.get(index)); 498 chainList.get(index).setEntityInfo(entityInfo); 499 chainSequenceMap.put(chainList.get(index).getId(), sequence); 500 } 501 entityInfo.setChains(chains); 502 entityInfoList.add(entityInfo); 503 } 504 505 @Override 506 public void setHeaderInfo(float rFree, float rWork, float resolution, String title, String depositionDate, 507 String releaseDate, String[] experimentalMethods) { 508 SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); 509 // Get the pdb header 510 PDBHeader pdbHeader = structure.getPDBHeader(); 511 pdbHeader.setTitle(title); 512 pdbHeader.setResolution(resolution); 513 pdbHeader.setRfree(rFree); 514 pdbHeader.setRwork(rWork); 515 // Now loop through the techniques and add them in 516 if (experimentalMethods!=null) { 517 for (String techniqueStr : experimentalMethods) { 518 pdbHeader.setExperimentalTechnique(techniqueStr); 519 } 520 } 521 // Set the dates 522 if(depositionDate!=null){ 523 try { 524 Date depDate = formatter.parse(depositionDate); 525 pdbHeader.setDepDate(depDate); 526 } catch (ParseException e) { 527 logger.warn("Could not parse date string '{}', depositon date will be unavailable", depositionDate); 528 } 529 } 530 else{ 531 pdbHeader.setDepDate(new Date(0)); 532 } 533 if(releaseDate!=null){ 534 try { 535 Date relDate = formatter.parse(releaseDate); 536 pdbHeader.setRelDate(relDate); 537 } catch (ParseException e) { 538 logger.warn("Could not parse date string '{}', release date will be unavailable", releaseDate); 539 } 540 } 541 else{ 542 pdbHeader.setRelDate(new Date(0)); 543 } 544 } 545}