001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmtf;
022
023import java.io.Serializable;
024import java.text.ParseException;
025import java.text.SimpleDateFormat;
026import java.util.ArrayList;
027import java.util.Collections;
028import java.util.Date;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032
033import javax.vecmath.Matrix4d;
034
035import org.biojava.nbio.structure.AminoAcid;
036import org.biojava.nbio.structure.AminoAcidImpl;
037import org.biojava.nbio.structure.Atom;
038import org.biojava.nbio.structure.AtomImpl;
039import org.biojava.nbio.structure.BondImpl;
040import org.biojava.nbio.structure.Chain;
041import org.biojava.nbio.structure.ChainImpl;
042import org.biojava.nbio.structure.Element;
043import org.biojava.nbio.structure.EntityInfo;
044import org.biojava.nbio.structure.EntityType;
045import org.biojava.nbio.structure.Group;
046import org.biojava.nbio.structure.HetatomImpl;
047import org.biojava.nbio.structure.NucleotideImpl;
048import org.biojava.nbio.structure.PDBCrystallographicInfo;
049import org.biojava.nbio.structure.PDBHeader;
050import org.biojava.nbio.structure.Structure;
051import org.biojava.nbio.structure.StructureImpl;
052import org.biojava.nbio.structure.StructureTools;
053import org.biojava.nbio.structure.io.mmcif.chem.PolymerType;
054import org.biojava.nbio.structure.io.mmcif.chem.ResidueType;
055import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
056import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
057import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
058import org.biojava.nbio.structure.xtal.CrystalCell;
059import org.biojava.nbio.structure.xtal.SpaceGroup;
060import org.rcsb.mmtf.api.StructureAdapterInterface;
061import org.rcsb.mmtf.dataholders.MmtfStructure;
062import org.slf4j.Logger;
063import org.slf4j.LoggerFactory;
064
065
066/**
067 * A biojava specific structure inflator for MMTF.
068 * Should be ported to biojava code.
069 *
070 * @author Anthony Bradley
071 * @since 5.0
072 * 
073 */
074public class MmtfStructureReader implements StructureAdapterInterface, Serializable {
075
076        /** The Constant serialVersionUID. */
077        private static final long serialVersionUID = 6772030485225130853L;
078
079        /** The logger */
080        private static final Logger logger = LoggerFactory.getLogger(MmtfStructureReader.class);
081
082        /** The structure. */
083        private Structure structure;
084
085        /** The model number. */
086        private int modelNumber;
087
088        /** The chain. */
089        private Chain chain;
090
091        /** The group. */
092        private Group group;
093
094        /** The atoms in a group. */
095        private List<Atom> atomsInGroup;
096
097        /** All the atoms. */
098        private Atom[] allAtoms;
099        private int atomCounter;
100
101        /** The list of EntityInformation */
102        private List<EntityInfo> entityInfoList;
103
104        /** All the chains */
105        private List<Chain> chainList;
106
107        /** All the chains as a list of maps */
108        private List<Map<String,Chain>> chainMap;
109
110        private List<double[]> transformList;
111
112        private int bioassIndex;
113
114        private Map<String,String> chainSequenceMap;
115
116        /**
117         * Instantiates a new bio java structure decoder.
118         */
119        public MmtfStructureReader() {
120                structure = new StructureImpl();
121                modelNumber = 0;
122                entityInfoList = new ArrayList<>();
123                chainList = new ArrayList<>();
124                chainMap = new ArrayList<>();
125                transformList = new ArrayList<>();
126                chainSequenceMap = new HashMap<>();
127        }
128
129        /**
130         * Gets the structure.
131         *
132         * @return the structure
133         */
134        public Structure getStructure() {
135                return structure;
136        }
137
138        @Override
139        public void finalizeStructure() {
140                // Number the remaining ones
141                int counter =0;
142                // Add the entity info
143                for (EntityInfo entityInfo : entityInfoList) {
144                        counter++;
145                        entityInfo.setMolId(counter);
146                }
147                structure.setEntityInfos(entityInfoList);
148                // Add the actual chains
149                for(int i=0; i<chainMap.size(); i++) {
150                        // Now add the chain information
151                        Map<String, Chain> modelChainMap = chainMap.get(i);
152                        for(Chain modelChain : modelChainMap.values()){
153                                structure.addChain(modelChain, i);
154                                String sequence = chainSequenceMap.get(modelChain.getId());
155                                if (sequence == null) {
156                                        logger.warn("Sequence is null for chain with asym_id {}. Most likely the chain is non-polymeric. Will not add seqres groups for it.", modelChain.getId());
157                                        continue;
158                                }
159                                MmtfUtils.addSeqRes(modelChain, sequence);
160                        }
161                }
162                StructureTools.cleanUpAltLocs(structure);
163        }
164
165        @Override
166        public void initStructure(int totalNumBonds, int totalNumAtoms, int totalNumGroups,
167                        int totalNumChains, int totalNumModels, String modelId) {
168                structure.setPDBCode(modelId);
169                allAtoms = new Atom[totalNumAtoms];
170        }
171
172
173        /* (non-Javadoc)
174         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#setModelInfo(int, int)
175         */
176        @Override
177        public void setModelInfo(int inputModelNumber,
178                        int chainCount) {
179                modelNumber = inputModelNumber;
180                structure.addModel(new ArrayList<Chain>(chainCount));
181                chainMap.add(new HashMap<>());
182        }
183
184        /* (non-Javadoc)
185         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface
186         * #setChainInfo(java.lang.String, int)
187         */
188        @Override
189        public void setChainInfo(String chainId, String chainName, int groupCount) {
190                // First check to see if the chain exists
191                Map<String, Chain> modelChainMap = chainMap.get(modelNumber);
192                if(modelChainMap.containsKey(chainId)){
193                        chain = modelChainMap.get(chainId);
194                }
195                // If we need to set a new chain do this
196                else{
197                        chain = new ChainImpl();
198                        chain.setId(chainId.trim());
199                        chain.setName(chainName);
200                        chain.setAtomGroups(new ArrayList<>(groupCount));
201                        modelChainMap.put(chainId, chain);
202                        chainList.add(chain);
203                }
204        }
205
206
207        /* (non-Javadoc)
208         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface
209         * #setGroupInfo(java.lang.String, int, char, int, int)
210         */
211        @Override
212        public void setGroupInfo(String groupName, int groupNumber,
213                        char insertionCode, String chemCompType, int atomCount, int bondCount,
214                        char singleLetterCode, int sequenceIndexId, int secStructType) {
215                // Get the polymer type
216                ResidueType residueType = ResidueType.getResidueTypeFromString(chemCompType);
217                int polymerType = getGroupTypIndicator(residueType.polymerType);
218                switch (polymerType) {
219                case 1:
220                        AminoAcid aa = new AminoAcidImpl();
221                        // Now set the one letter code
222                        aa.setAminoType(singleLetterCode);
223                        group = aa;
224                        break;
225                case 2:
226                        group = new NucleotideImpl();
227                        break;
228                default:
229                        group = new HetatomImpl();
230                        break;
231                }
232                atomsInGroup = new ArrayList<>();
233                ChemComp chemComp = new ChemComp();
234                chemComp.setOne_letter_code(String.valueOf(singleLetterCode));
235                chemComp.setType(chemCompType.toUpperCase());
236                chemComp.setResidueType(residueType);
237                chemComp.setPolymerType(residueType.polymerType);
238                group.setChemComp(chemComp);
239                group.setPDBName(groupName);
240                if (insertionCode == MmtfStructure.UNAVAILABLE_CHAR_VALUE) {
241                        group.setResidueNumber(chain.getName().trim(), groupNumber, null);
242                } else {
243                        group.setResidueNumber(chain.getName().trim(),
244                                        groupNumber, insertionCode);
245                }
246                group.setAtoms(new ArrayList<>(atomCount));
247                if (polymerType==1 || polymerType==2) {
248                        MmtfUtils.insertSeqResGroup(chain, group, sequenceIndexId);
249                }
250                if (atomCount > 0) {
251                        chain.addGroup(group);
252                }
253                MmtfUtils.setSecStructType(group, secStructType);
254        }
255
256        /**
257         *
258         * @return
259         */
260        private Group getGroupWithSameResNumButDiffPDBName() {
261                // If this chain already has this group number
262                for (Group g : chain.getAtomGroups() ) {
263                        if (g.getResidueNumber().equals(group.getResidueNumber())) {
264                                if( ! g.getPDBName().equals(group.getPDBName() )){
265                                        return g;
266                                }
267                        }
268                }
269                return null;
270        }
271
272        /* (non-Javadoc)
273         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#
274         * setAtomInfo(java.lang.String, int, char, float, float,
275         * float, float, float, java.lang.String, int)
276         */
277        @Override
278        public void setAtomInfo(String atomName,
279                        int serialNumber, char alternativeLocationId, float x,
280                        float y, float z, float occupancy,
281                        float temperatureFactor,
282                        String element, int charge) {
283                Atom atom = new AtomImpl();
284                Group altGroup = null;
285                atom.setPDBserial(serialNumber);
286                atom.setName(atomName.trim());
287                atom.setElement(Element.valueOfIgnoreCase(element));
288                if(alternativeLocationId==MmtfStructure.UNAVAILABLE_CHAR_VALUE){
289                        alternativeLocationId = ' ';
290                }
291                if (alternativeLocationId != ' ') {
292                        // Get the altGroup
293                        altGroup = getCorrectAltLocGroup(alternativeLocationId);
294                        atom.setAltLoc(alternativeLocationId);
295                } else {
296                        atom.setAltLoc(Character.valueOf(' '));
297                }
298                atom.setX(x);
299                atom.setY(y);
300                atom.setZ(z);
301                atom.setOccupancy(occupancy);
302                atom.setTempFactor(temperatureFactor);
303                atom.setCharge((short) charge);
304                if (altGroup == null) {
305                        group.addAtom(atom);
306                } else {
307                        altGroup.setChain(chain);
308                        altGroup.addAtom(atom);
309                }
310
311                // IF the main group doesn't have this atom
312                if (!group.hasAtom(atom.getName())) {
313
314                        // If it's not a microheterogenity case
315                        if (group.getPDBName().equals(atom.getGroup().getPDBName())) {
316                                // And it's not a deuterated case.  'nanoheterogenity'
317                                if(!StructureTools.hasNonDeuteratedEquiv(atom,group)){
318                                        group.addAtom(atom);
319                                }
320                        }
321                }
322                atomsInGroup.add(atom);
323                allAtoms[atomCounter] = atom;
324                atomCounter++;
325        }
326
327        /* (non-Javadoc)
328         * @see org.rcsb.mmtf.decoder.StructureDecoderInter
329         * face#setGroupBonds(int, int, int)
330         */
331        @Override
332        public void setGroupBond(int indOne, int indTwo, int bondOrder) {
333                
334                // Get the atoms
335                Atom atomOne = atomsInGroup.get(indOne);
336                Atom atomTwo = atomsInGroup.get(indTwo);
337                
338                // set the new bond
339                new BondImpl(atomOne, atomTwo, bondOrder);
340                
341        }
342
343        /* (non-Javadoc)
344         * @see org.rcsb.mmtf.decoder.StructureDecoder
345         * Interface#setInterGroupBonds(int, int, int)
346         */
347        @Override
348        public void setInterGroupBond(int indOne, int indTwo, int bondOrder) {
349                
350                // Get the atoms
351                Atom atomOne = allAtoms[indOne];
352                Atom atomTwo = allAtoms[indTwo];
353                
354                // set the new bond (this 
355                new BondImpl(atomOne, atomTwo, bondOrder);
356        }
357
358
359        /**
360         * Generates Alternate location groups.
361         *
362         * @param altLoc the alt loc
363         * @return the correct alt loc group
364         */
365        private Group getCorrectAltLocGroup(Character altLoc) {
366                // see if we know this altLoc already;
367                List<Atom> atoms = group.getAtoms();
368                if (atoms.size() > 0) {
369                        Atom a1 = atoms.get(0);
370                        // we are just adding atoms to the current group
371                        // probably there is a second group following later...
372                        if (a1.getAltLoc().equals(altLoc)) {
373                                return group;   }
374                }
375
376                // Get the altLocGroup
377                Group altLocgroup = group.getAltLocGroup(altLoc);
378                if (altLocgroup != null) {
379                        return altLocgroup;
380                }
381                // If the group already exists (microheterogenity).
382                Group oldGroup = getGroupWithSameResNumButDiffPDBName();
383                if (oldGroup!= null){
384                        Group altLocG = group;
385                        group = oldGroup;
386                        group.addAltLoc(altLocG);
387                        chain.getAtomGroups().remove(altLocG);
388                        return altLocG;
389                }
390                // no matching altLoc group found.
391                // build it up.
392                if (group.getAtoms().size() == 0) {
393                        return group;
394                }
395                Group altLocG = (Group) group.clone();
396                // drop atoms from cloned group...
397                // https://redmine.open-bio.org/issues/3307
398                altLocG.setAtoms(new ArrayList<Atom>());
399                altLocG.getAltLocs().clear();
400                group.addAltLoc(altLocG);
401                return altLocG;
402
403        }
404
405
406        /* (non-Javadoc)
407         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#
408         * setXtalInfo(java.lang.String, java.util.List)
409         */
410        @Override
411        public void setXtalInfo(String spaceGroupString, float[] unitCell, double[][] ncsOperMatrixList) {
412                // Now set the xtalographic information
413                PDBCrystallographicInfo pci = new PDBCrystallographicInfo();
414                SpaceGroup spaceGroup = SpaceGroup.parseSpaceGroup(spaceGroupString);
415                pci.setSpaceGroup(spaceGroup);
416                if (unitCell.length > 0) {
417                        CrystalCell cell = new CrystalCell(unitCell[0], unitCell[1],
418                                        unitCell[2], unitCell[3], unitCell[4], unitCell[5]);
419                        pci.setCrystalCell(cell);
420                        structure.setCrystallographicInfo(pci);
421                }
422
423                pci.setNcsOperators(MmtfUtils.getNcsAsMatrix4d(ncsOperMatrixList));
424        }
425
426
427        /**
428         * Get the type of group (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0)
429         * @param polymerType
430         * @return The type of group. (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0)
431         */
432        private int getGroupTypIndicator(PolymerType polymerType) {
433                if(PolymerType.PROTEIN_ONLY.contains(polymerType)){
434                        return 1;
435                }
436                if(PolymerType.POLYNUCLEOTIDE_ONLY.contains(polymerType)){
437                        return 2;
438                }
439                return 0;
440        }
441
442
443        @Override
444        public void setBioAssemblyTrans(int bioAssemblyId, int[] inputChainIndices, double[] inputTransform, String name) {
445                // Biojava uses this as a one indexed id.
446                bioAssemblyId++;
447                if(bioassIndex!=bioAssemblyId){
448                        transformList = new ArrayList<>();
449                        bioassIndex = bioAssemblyId;
450                }
451                PDBHeader pdbHeader = structure.getPDBHeader();
452                // Get the bioassembly data
453                Map<Integer, BioAssemblyInfo> bioAssemblies = pdbHeader.getBioAssemblies();
454                // Get the bioassembly itself (if it exists
455                BioAssemblyInfo bioAssInfo;
456                if (bioAssemblies.containsKey(bioAssemblyId)){
457                        bioAssInfo = bioAssemblies.get(bioAssemblyId);
458                }
459                else{
460                        bioAssInfo = new  BioAssemblyInfo();
461                        bioAssInfo.setTransforms(new ArrayList<BiologicalAssemblyTransformation>());
462                        bioAssemblies.put(bioAssemblyId, bioAssInfo);
463                        bioAssInfo.setId(bioAssemblyId);
464                }
465
466                for(int currChainIndex : inputChainIndices){
467                        BiologicalAssemblyTransformation bioAssTrans = new BiologicalAssemblyTransformation();
468                        Integer transId = transformList.indexOf(inputTransform)+1;
469                        if(transId==0){
470                                transformList.add(inputTransform);
471                                transId = transformList.indexOf(inputTransform)+1;
472                        }
473                        bioAssTrans.setId(transId.toString());
474                        // If it actually has an index - if it doesn't it is because the chain has no density.
475                        if (currChainIndex!=-1){
476                                bioAssTrans.setChainId(chainList.get(currChainIndex).getId());
477                        }
478                        else {
479                                continue;
480                        }
481                        // Now set matrix
482                        Matrix4d mat4d = new Matrix4d(inputTransform);
483                        bioAssTrans.setTransformationMatrix(mat4d);
484                        // Now add this
485                        bioAssInfo.getTransforms().add(bioAssTrans);
486                        // sort transformations into a unique order
487                        Collections.sort(bioAssInfo.getTransforms());
488                }
489        }
490
491        @Override
492        public void setEntityInfo(int[] chainIndices, String sequence, String description, String type) {
493                // First get the chains
494                EntityInfo entityInfo = new EntityInfo();
495                entityInfo.setDescription(description);
496                entityInfo.setType(EntityType.entityTypeFromString(type));
497                List<Chain> chains = new ArrayList<>();
498                // Now loop through the chain ids and make a list of them
499                for( int index : chainIndices) {
500                        chains.add(chainList.get(index));
501                        chainList.get(index).setEntityInfo(entityInfo);
502                        chainSequenceMap.put(chainList.get(index).getId(), sequence);
503                }
504                entityInfo.setChains(chains);
505                entityInfoList.add(entityInfo);
506        }
507
508        @Override
509        public void setHeaderInfo(float rFree, float rWork, float resolution, String title, String depositionDate,
510                        String releaseDate, String[] experimentalMethods) {
511                SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
512                // Get the pdb header
513                PDBHeader pdbHeader = structure.getPDBHeader();
514                pdbHeader.setTitle(title);
515                pdbHeader.setResolution(resolution);
516                pdbHeader.setRfree(rFree);
517                pdbHeader.setRwork(rWork);
518                // Now loop through the techniques and add them in
519                if (experimentalMethods!=null) {
520                        for (String techniqueStr : experimentalMethods) {
521                                pdbHeader.setExperimentalTechnique(techniqueStr);
522                        }
523                }
524                // Set the dates
525                if(depositionDate!=null){
526                        try {
527                                Date depDate = formatter.parse(depositionDate);
528                                pdbHeader.setDepDate(depDate);
529                        } catch (ParseException e) {
530                                logger.warn("Could not parse date string '{}', depositon date will be unavailable", depositionDate);
531                        }
532                }
533                else{
534                        pdbHeader.setDepDate(new Date(0));
535                }
536                if(releaseDate!=null){
537                        try {
538                                Date relDate = formatter.parse(releaseDate);
539                                pdbHeader.setRelDate(relDate);
540                        } catch (ParseException e) {
541                                logger.warn("Could not parse date string '{}', release date will be unavailable", releaseDate);
542                        }
543                }
544                else{
545                        pdbHeader.setRelDate(new Date(0));
546                }
547        }
548}