001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmtf;
022
023import java.io.Serializable;
024import java.text.ParseException;
025import java.text.SimpleDateFormat;
026import java.util.ArrayList;
027import java.util.Date;
028import java.util.HashMap;
029import java.util.List;
030import java.util.Map;
031
032import javax.vecmath.Matrix4d;
033
034import org.biojava.nbio.structure.AminoAcid;
035import org.biojava.nbio.structure.AminoAcidImpl;
036import org.biojava.nbio.structure.Atom;
037import org.biojava.nbio.structure.AtomImpl;
038import org.biojava.nbio.structure.BondImpl;
039import org.biojava.nbio.structure.Chain;
040import org.biojava.nbio.structure.ChainImpl;
041import org.biojava.nbio.structure.Element;
042import org.biojava.nbio.structure.EntityInfo;
043import org.biojava.nbio.structure.EntityType;
044import org.biojava.nbio.structure.Group;
045import org.biojava.nbio.structure.HetatomImpl;
046import org.biojava.nbio.structure.NucleotideImpl;
047import org.biojava.nbio.structure.PDBCrystallographicInfo;
048import org.biojava.nbio.structure.PDBHeader;
049import org.biojava.nbio.structure.Structure;
050import org.biojava.nbio.structure.StructureImpl;
051import org.biojava.nbio.structure.StructureTools;
052import org.biojava.nbio.structure.io.mmcif.chem.PolymerType;
053import org.biojava.nbio.structure.io.mmcif.chem.ResidueType;
054import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
055import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
056import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
057import org.biojava.nbio.structure.xtal.CrystalCell;
058import org.biojava.nbio.structure.xtal.SpaceGroup;
059import org.rcsb.mmtf.api.StructureAdapterInterface;
060import org.rcsb.mmtf.dataholders.MmtfStructure;
061import org.slf4j.Logger;
062import org.slf4j.LoggerFactory;
063
064
065/**
066 * A biojava specific structure inflator for MMTF.
067 * Should be ported to biojava code.
068 *
069 * @author Anthony Bradley
070 */
071public class MmtfStructureReader implements StructureAdapterInterface, Serializable {
072
073        /** The Constant serialVersionUID. */
074        private static final long serialVersionUID = 6772030485225130853L;
075
076        /** The logger */
077        private static final Logger logger = LoggerFactory.getLogger(MmtfStructureReader.class);
078
079        /** The structure. */
080        private Structure structure;
081
082        /** The model number. */
083        private int modelNumber;
084
085        /** The chain. */
086        private Chain chain;
087
088        /** The group. */
089        private Group group;
090
091        /** The atoms in a group. */
092        private List<Atom> atomsInGroup;
093
094        /** All the atoms. */
095        private Atom[] allAtoms;
096        private int atomCounter;
097
098        /** The list of EntityInformation */
099        private List<EntityInfo> entityInfoList;
100
101        /** All the chains */
102        private List<Chain> chainList;
103
104        /** All the chains as a list of maps */
105        private List<Map<String,Chain>> chainMap;
106
107        private List<double[]> transformList;
108
109        private int bioassIndex;
110
111        private Map<String,String> chainSequenceMap;
112
113        /**
114         * Instantiates a new bio java structure decoder.
115         */
116        public MmtfStructureReader() {
117                structure = new StructureImpl();
118                modelNumber = 0;
119                entityInfoList = new ArrayList<>();
120                chainList = new ArrayList<>();
121                chainMap = new ArrayList<>();
122                transformList = new ArrayList<>();
123                chainSequenceMap = new HashMap<>();
124        }
125
126        /**
127         * Gets the structure.
128         *
129         * @return the structure
130         */
131        public Structure getStructure() {
132                return structure;
133        }
134
135        @Override
136        public void finalizeStructure() {
137                // Number the remaining ones
138                int counter =0;
139                // Add the entity info
140                for (EntityInfo entityInfo : entityInfoList) {
141                        counter++;
142                        entityInfo.setMolId(counter);
143                }
144                structure.setEntityInfos(entityInfoList);
145                // Add the actual chains
146                for(int i=0; i<chainMap.size(); i++) {
147                        // Now add the chain information
148                        Map<String, Chain> modelChainMap = chainMap.get(i);
149                        for(Chain modelChain : modelChainMap.values()){
150                                structure.addChain(modelChain, i);
151                                String sequence = chainSequenceMap.get(modelChain.getId());
152                                if (sequence == null) {
153                                        logger.warn("Sequence is null for chain with asym_id {}. Most likely the chain is non-polymeric. Will not add seqres groups for it.", modelChain.getId());
154                                        continue;
155                                }
156                                MmtfUtils.addSeqRes(modelChain, sequence);
157                        }
158                }
159                StructureTools.cleanUpAltLocs(structure);
160        }
161
162        @Override
163        public void initStructure(int totalNumBonds, int totalNumAtoms, int totalNumGroups,
164                        int totalNumChains, int totalNumModels, String modelId) {
165                structure.setPDBCode(modelId);
166                allAtoms = new Atom[totalNumAtoms];
167        }
168
169
170        /* (non-Javadoc)
171         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#setModelInfo(int, int)
172         */
173        @Override
174        public void setModelInfo(int inputModelNumber,
175                        int chainCount) {
176                modelNumber = inputModelNumber;
177                structure.addModel(new ArrayList<Chain>(chainCount));
178                chainMap.add(new HashMap<>());
179        }
180
181        /* (non-Javadoc)
182         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface
183         * #setChainInfo(java.lang.String, int)
184         */
185        @Override
186        public void setChainInfo(String chainId, String chainName, int groupCount) {
187                // First check to see if the chain exists
188                Map<String, Chain> modelChainMap = chainMap.get(modelNumber);
189                if(modelChainMap.containsKey(chainId)){
190                        chain = modelChainMap.get(chainId);
191                }
192                // If we need to set a new chain do this
193                else{
194                        chain = new ChainImpl();
195                        chain.setId(chainId.trim());
196                        chain.setName(chainName);
197                        chain.setAtomGroups(new ArrayList<>(groupCount));
198                        modelChainMap.put(chainId, chain);
199                        chainList.add(chain);
200                }
201        }
202
203
204        /* (non-Javadoc)
205         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface
206         * #setGroupInfo(java.lang.String, int, char, int, int)
207         */
208        @Override
209        public void setGroupInfo(String groupName, int groupNumber,
210                        char insertionCode, String chemCompType, int atomCount, int bondCount,
211                        char singleLetterCode, int sequenceIndexId, int secStructType) {
212                // Get the polymer type
213                ResidueType residueType = ResidueType.getResidueTypeFromString(chemCompType);
214                int polymerType = getGroupTypIndicator(residueType.polymerType);
215                switch (polymerType) {
216                case 1:
217                        AminoAcid aa = new AminoAcidImpl();
218                        // Now set the one letter code
219                        aa.setAminoType(singleLetterCode);
220                        group = aa;
221                        break;
222                case 2:
223                        group = new NucleotideImpl();
224                        break;
225                default:
226                        group = new HetatomImpl();
227                        break;
228                }
229                atomsInGroup = new ArrayList<Atom>();
230                ChemComp chemComp = new ChemComp();
231                chemComp.setOne_letter_code(String.valueOf(singleLetterCode));
232                chemComp.setType(chemCompType.toUpperCase());
233                chemComp.setResidueType(residueType);
234                chemComp.setPolymerType(residueType.polymerType);
235                group.setChemComp(chemComp);
236                group.setPDBName(groupName);
237                if (insertionCode == MmtfStructure.UNAVAILABLE_CHAR_VALUE) {
238                        group.setResidueNumber(chain.getName().trim(), groupNumber, null);
239                } else {
240                        group.setResidueNumber(chain.getName().trim(),
241                                        groupNumber, insertionCode);
242                }
243                group.setAtoms(new ArrayList<Atom>(atomCount));
244                if (polymerType==1 || polymerType==2) {
245                        MmtfUtils.insertSeqResGroup(chain, group, sequenceIndexId);
246                }
247                if (atomCount > 0) {
248                        chain.addGroup(group);
249                }
250                MmtfUtils.setSecStructType(group, secStructType);
251        }
252
253        /**
254         *
255         * @return
256         */
257        private Group getGroupWithSameResNumButDiffPDBName() {
258                // If this chain already has this group number
259                for (Group g : chain.getAtomGroups() ) {
260                        if (g.getResidueNumber().equals(group.getResidueNumber())) {
261                                if( ! g.getPDBName().equals(group.getPDBName() )){
262                                        return g;
263                                }
264                        }
265                }
266                return null;
267        }
268
269        /* (non-Javadoc)
270         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#
271         * setAtomInfo(java.lang.String, int, char, float, float,
272         * float, float, float, java.lang.String, int)
273         */
274        @Override
275        public void setAtomInfo(String atomName,
276                        int serialNumber, char alternativeLocationId, float x,
277                        float y, float z, float occupancy,
278                        float temperatureFactor,
279                        String element, int charge) {
280                Atom atom = new AtomImpl();
281                Group altGroup = null;
282                atom.setPDBserial(serialNumber);
283                atom.setName(atomName.trim());
284                atom.setElement(Element.valueOfIgnoreCase(element));
285                if(alternativeLocationId==MmtfStructure.UNAVAILABLE_CHAR_VALUE){
286                        alternativeLocationId = ' ';
287                }
288                if (alternativeLocationId != ' ') {
289                        // Get the altGroup
290                        altGroup = getCorrectAltLocGroup(alternativeLocationId);
291                        atom.setAltLoc(alternativeLocationId);
292                } else {
293                        atom.setAltLoc(Character.valueOf(' '));
294                }
295                atom.setX(x);
296                atom.setY(y);
297                atom.setZ(z);
298                atom.setOccupancy(occupancy);
299                atom.setTempFactor(temperatureFactor);
300                atom.setCharge((short) charge);
301                if (altGroup == null) {
302                        group.addAtom(atom);
303                } else {
304                        altGroup.setChain(chain);
305                        altGroup.addAtom(atom);
306                }
307
308                // IF the main group doesn't have this atom
309                if (!group.hasAtom(atom.getName())) {
310                        
311                        // If it's not a microheterogenity case
312                        if (group.getPDBName().equals(atom.getGroup().getPDBName())) {
313                                // And it's not a deuterated case.  'nanoheterogenity'
314                                if(!StructureTools.hasNonDeuteratedEquiv(atom,group)){
315                                        group.addAtom(atom);
316                                }
317                        }
318                }
319                atomsInGroup.add(atom);
320                allAtoms[atomCounter] = atom;
321                atomCounter++;
322        }
323
324        /* (non-Javadoc)
325         * @see org.rcsb.mmtf.decoder.StructureDecoderInter
326         * face#setGroupBonds(int, int, int)
327         */
328        @Override
329        public void setGroupBond(int indOne,
330                        int indTwo, int bondOrder) {
331                // Get the atom
332                Atom atomOne = atomsInGroup.get(indOne);
333                Atom atomTwo = atomsInGroup.get(indTwo);
334                // set the new bond
335                @SuppressWarnings("unused")
336                BondImpl bond = new BondImpl(atomOne, atomTwo, bondOrder);
337        }
338
339        /* (non-Javadoc)
340         * @see org.rcsb.mmtf.decoder.StructureDecoder
341         * Interface#setInterGroupBonds(int, int, int)
342         */
343        @Override
344        public void setInterGroupBond(int indOne,
345                        int indTwo, int bondOrder) {
346                // Get the atom
347                Atom atomOne = allAtoms[indOne];
348                Atom atomTwo = allAtoms[indTwo];
349                // set the new bond
350                @SuppressWarnings("unused")
351                BondImpl bond = new BondImpl(atomOne, atomTwo, bondOrder);
352        }
353
354
355        /**
356         * Generates Alternate location groups.
357         *
358         * @param altLoc the alt loc
359         * @return the correct alt loc group
360         */
361        private Group getCorrectAltLocGroup(Character altLoc) {
362                // see if we know this altLoc already;
363                List<Atom> atoms = group.getAtoms();
364                if (atoms.size() > 0) {
365                        Atom a1 = atoms.get(0);
366                        // we are just adding atoms to the current group
367                        // probably there is a second group following later...
368                        if (a1.getAltLoc().equals(altLoc)) {
369                                return group;   }
370                }
371
372                // Get the altLocGroup
373                Group altLocgroup = group.getAltLocGroup(altLoc);
374                if (altLocgroup != null) {
375                        return altLocgroup;
376                }
377                // If the group already exists (microheterogenity).
378                Group oldGroup = getGroupWithSameResNumButDiffPDBName();
379                if (oldGroup!= null){
380                        Group altLocG = group;
381                        group = oldGroup;
382                        group.addAltLoc(altLocG);
383                        chain.getAtomGroups().remove(altLocG);
384                        return altLocG;
385                }
386                // no matching altLoc group found.
387                // build it up.
388                if (group.getAtoms().size() == 0) {
389                        return group;
390                }
391                Group altLocG = (Group) group.clone();
392                // drop atoms from cloned group...
393                // https://redmine.open-bio.org/issues/3307
394                altLocG.setAtoms(new ArrayList<Atom>());
395                altLocG.getAltLocs().clear();
396                group.addAltLoc(altLocG);
397                return altLocG;
398
399        }
400
401
402        /* (non-Javadoc)
403         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#
404         * setXtalInfo(java.lang.String, java.util.List)
405         */
406        @Override
407        public void setXtalInfo(String spaceGroupString, float[] unitCell, double[][] ncsOperMatrixList) {
408                // Now set the xtalographic information
409                PDBCrystallographicInfo pci = new PDBCrystallographicInfo();
410                SpaceGroup spaceGroup = SpaceGroup.parseSpaceGroup(spaceGroupString);
411                pci.setSpaceGroup(spaceGroup);
412                if (unitCell.length > 0) {
413                        CrystalCell cell = new CrystalCell(unitCell[0], unitCell[1],
414                                        unitCell[2], unitCell[3], unitCell[4], unitCell[5]);
415                        pci.setCrystalCell(cell);
416                        structure.setCrystallographicInfo(pci);
417                }
418
419                pci.setNcsOperators(MmtfUtils.getNcsAsMatrix4d(ncsOperMatrixList));
420        }
421
422
423        /**
424         * Get the type of group (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0)
425         * @param currentGroup
426         * @return The type of group. (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0)
427         */
428        private int getGroupTypIndicator(PolymerType polymerType) {
429                if(PolymerType.PROTEIN_ONLY.contains(polymerType)){
430                        return 1;
431                }
432                if(PolymerType.POLYNUCLEOTIDE_ONLY.contains(polymerType)){
433                        return 2;
434                }
435                return 0;
436        }
437
438
439        @Override
440        public void setBioAssemblyTrans(int bioAssemblyId, int[] inputChainIndices, double[] inputTransform, String name) {
441                // Biojava uses this as a one indexed id.
442                bioAssemblyId++;
443                if(bioassIndex!=bioAssemblyId){
444                        transformList = new ArrayList<>();
445                        bioassIndex = bioAssemblyId;
446                }
447                PDBHeader pdbHeader = structure.getPDBHeader();
448                // Get the bioassembly data
449                Map<Integer, BioAssemblyInfo> bioAssemblies = pdbHeader.getBioAssemblies();
450                // Get the bioassembly itself (if it exists
451                BioAssemblyInfo bioAssInfo;
452                if (bioAssemblies.containsKey(bioAssemblyId)){
453                        bioAssInfo = bioAssemblies.get(bioAssemblyId);
454                }
455                else{
456                        bioAssInfo = new  BioAssemblyInfo();
457                        bioAssInfo.setTransforms(new ArrayList<BiologicalAssemblyTransformation>());
458                        bioAssemblies.put(bioAssemblyId, bioAssInfo);
459                        bioAssInfo.setId(bioAssemblyId);
460                }
461
462                for(int currChainIndex : inputChainIndices){
463                        BiologicalAssemblyTransformation bioAssTrans = new BiologicalAssemblyTransformation();
464                        Integer transId = transformList.indexOf(inputTransform)+1;
465                        if(transId==0){
466                                transformList.add(inputTransform);
467                                transId = transformList.indexOf(inputTransform)+1;
468                        }
469                        bioAssTrans.setId(transId.toString());
470                        // If it actually has an index - if it doesn't it is because the chain has no density.
471                        if (currChainIndex!=-1){
472                                bioAssTrans.setChainId(chainList.get(currChainIndex).getId());
473                        }
474                        else {
475                                continue;
476                        }
477                        // Now set matrix
478                        Matrix4d mat4d = new Matrix4d(inputTransform);
479                        bioAssTrans.setTransformationMatrix(mat4d);
480                        // Now add this
481                        bioAssInfo.getTransforms().add(bioAssTrans);
482                }
483        }
484
485        @Override
486        public void setEntityInfo(int[] chainIndices, String sequence, String description, String type) {
487                // First get the chains
488                EntityInfo entityInfo = new EntityInfo();
489                entityInfo.setDescription(description);
490                entityInfo.setType(EntityType.entityTypeFromString(type));
491                List<Chain> chains = new ArrayList<>();
492                // Now loop through the chain ids and make a list of them
493                for( int index : chainIndices) {
494                        chains.add(chainList.get(index));
495                        chainList.get(index).setEntityInfo(entityInfo);
496                        chainSequenceMap.put(chainList.get(index).getId(), sequence);
497                }
498                entityInfo.setChains(chains);
499                entityInfoList.add(entityInfo);
500        }
501
502        @Override
503        public void setHeaderInfo(float rFree, float rWork, float resolution, String title, String depositionDate,
504                        String releaseDate, String[] experimentalMethods) {
505                SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
506                // Get the pdb header
507                PDBHeader pdbHeader = structure.getPDBHeader();
508                pdbHeader.setTitle(title);
509                pdbHeader.setResolution(resolution);
510                pdbHeader.setRfree(rFree);
511                pdbHeader.setRwork(rWork);
512                // Now loop through the techniques and add them in
513                if (experimentalMethods!=null) {
514                        for (String techniqueStr : experimentalMethods) {
515                                pdbHeader.setExperimentalTechnique(techniqueStr);
516                        }
517                }
518                // Set the dates
519                if(depositionDate!=null){
520                        try {
521                                Date depDate = formatter.parse(depositionDate);
522                                pdbHeader.setDepDate(depDate);
523                        } catch (ParseException e) {
524                                logger.warn("Could not parse date string '{}', depositon date will be unavailable", depositionDate);
525                        }
526                }
527                else{
528                        pdbHeader.setDepDate(new Date(0));
529                }
530                if(releaseDate!=null){
531                        try {
532                                Date relDate = formatter.parse(releaseDate);
533                                pdbHeader.setRelDate(relDate);
534                        } catch (ParseException e) {
535                                logger.warn("Could not parse date string '{}', release date will be unavailable", releaseDate);
536                        }
537                }
538                else{
539                        pdbHeader.setRelDate(new Date(0));
540                }
541        }
542}