001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmtf;
022
023import java.io.Serializable;
024import java.text.ParseException;
025import java.text.SimpleDateFormat;
026import java.util.ArrayList;
027import java.util.Collections;
028import java.util.Date;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032
033import javax.vecmath.Matrix4d;
034
035import org.biojava.nbio.structure.AminoAcid;
036import org.biojava.nbio.structure.AminoAcidImpl;
037import org.biojava.nbio.structure.Atom;
038import org.biojava.nbio.structure.AtomImpl;
039import org.biojava.nbio.structure.BondImpl;
040import org.biojava.nbio.structure.Chain;
041import org.biojava.nbio.structure.ChainImpl;
042import org.biojava.nbio.structure.Element;
043import org.biojava.nbio.structure.EntityInfo;
044import org.biojava.nbio.structure.EntityType;
045import org.biojava.nbio.structure.Group;
046import org.biojava.nbio.structure.HetatomImpl;
047import org.biojava.nbio.structure.NucleotideImpl;
048import org.biojava.nbio.structure.PDBCrystallographicInfo;
049import org.biojava.nbio.structure.PDBHeader;
050import org.biojava.nbio.structure.Structure;
051import org.biojava.nbio.structure.StructureImpl;
052import org.biojava.nbio.structure.StructureTools;
053import org.biojava.nbio.structure.io.mmcif.chem.PolymerType;
054import org.biojava.nbio.structure.io.mmcif.chem.ResidueType;
055import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
056import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
057import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
058import org.biojava.nbio.structure.xtal.CrystalCell;
059import org.biojava.nbio.structure.xtal.SpaceGroup;
060import org.rcsb.mmtf.api.StructureAdapterInterface;
061import org.rcsb.mmtf.dataholders.MmtfStructure;
062import org.slf4j.Logger;
063import org.slf4j.LoggerFactory;
064
065
066/**
067 * A biojava specific structure inflator for MMTF.
068 * Should be ported to biojava code.
069 *
070 * @author Anthony Bradley
071 */
072public class MmtfStructureReader implements StructureAdapterInterface, Serializable {
073
074        /** The Constant serialVersionUID. */
075        private static final long serialVersionUID = 6772030485225130853L;
076
077        /** The logger */
078        private static final Logger logger = LoggerFactory.getLogger(MmtfStructureReader.class);
079
080        /** The structure. */
081        private Structure structure;
082
083        /** The model number. */
084        private int modelNumber;
085
086        /** The chain. */
087        private Chain chain;
088
089        /** The group. */
090        private Group group;
091
092        /** The atoms in a group. */
093        private List<Atom> atomsInGroup;
094
095        /** All the atoms. */
096        private Atom[] allAtoms;
097        private int atomCounter;
098
099        /** The list of EntityInformation */
100        private List<EntityInfo> entityInfoList;
101
102        /** All the chains */
103        private List<Chain> chainList;
104
105        /** All the chains as a list of maps */
106        private List<Map<String,Chain>> chainMap;
107
108        private List<double[]> transformList;
109
110        private int bioassIndex;
111
112        private Map<String,String> chainSequenceMap;
113
114        /**
115         * Instantiates a new bio java structure decoder.
116         */
117        public MmtfStructureReader() {
118                structure = new StructureImpl();
119                modelNumber = 0;
120                entityInfoList = new ArrayList<>();
121                chainList = new ArrayList<>();
122                chainMap = new ArrayList<>();
123                transformList = new ArrayList<>();
124                chainSequenceMap = new HashMap<>();
125        }
126
127        /**
128         * Gets the structure.
129         *
130         * @return the structure
131         */
132        public Structure getStructure() {
133                return structure;
134        }
135
136        @Override
137        public void finalizeStructure() {
138                // Number the remaining ones
139                int counter =0;
140                // Add the entity info
141                for (EntityInfo entityInfo : entityInfoList) {
142                        counter++;
143                        entityInfo.setMolId(counter);
144                }
145                structure.setEntityInfos(entityInfoList);
146                // Add the actual chains
147                for(int i=0; i<chainMap.size(); i++) {
148                        // Now add the chain information
149                        Map<String, Chain> modelChainMap = chainMap.get(i);
150                        for(Chain modelChain : modelChainMap.values()){
151                                structure.addChain(modelChain, i);
152                                String sequence = chainSequenceMap.get(modelChain.getId());
153                                if (sequence == null) {
154                                        logger.warn("Sequence is null for chain with asym_id {}. Most likely the chain is non-polymeric. Will not add seqres groups for it.", modelChain.getId());
155                                        continue;
156                                }
157                                MmtfUtils.addSeqRes(modelChain, sequence);
158                        }
159                }
160                StructureTools.cleanUpAltLocs(structure);
161        }
162
163        @Override
164        public void initStructure(int totalNumBonds, int totalNumAtoms, int totalNumGroups,
165                        int totalNumChains, int totalNumModels, String modelId) {
166                structure.setPDBCode(modelId);
167                allAtoms = new Atom[totalNumAtoms];
168        }
169
170
171        /* (non-Javadoc)
172         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#setModelInfo(int, int)
173         */
174        @Override
175        public void setModelInfo(int inputModelNumber,
176                        int chainCount) {
177                modelNumber = inputModelNumber;
178                structure.addModel(new ArrayList<Chain>(chainCount));
179                chainMap.add(new HashMap<>());
180        }
181
182        /* (non-Javadoc)
183         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface
184         * #setChainInfo(java.lang.String, int)
185         */
186        @Override
187        public void setChainInfo(String chainId, String chainName, int groupCount) {
188                // First check to see if the chain exists
189                Map<String, Chain> modelChainMap = chainMap.get(modelNumber);
190                if(modelChainMap.containsKey(chainId)){
191                        chain = modelChainMap.get(chainId);
192                }
193                // If we need to set a new chain do this
194                else{
195                        chain = new ChainImpl();
196                        chain.setId(chainId.trim());
197                        chain.setName(chainName);
198                        chain.setAtomGroups(new ArrayList<>(groupCount));
199                        modelChainMap.put(chainId, chain);
200                        chainList.add(chain);
201                }
202        }
203
204
205        /* (non-Javadoc)
206         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface
207         * #setGroupInfo(java.lang.String, int, char, int, int)
208         */
209        @Override
210        public void setGroupInfo(String groupName, int groupNumber,
211                        char insertionCode, String chemCompType, int atomCount, int bondCount,
212                        char singleLetterCode, int sequenceIndexId, int secStructType) {
213                // Get the polymer type
214                ResidueType residueType = ResidueType.getResidueTypeFromString(chemCompType);
215                int polymerType = getGroupTypIndicator(residueType.polymerType);
216                switch (polymerType) {
217                case 1:
218                        AminoAcid aa = new AminoAcidImpl();
219                        // Now set the one letter code
220                        aa.setAminoType(singleLetterCode);
221                        group = aa;
222                        break;
223                case 2:
224                        group = new NucleotideImpl();
225                        break;
226                default:
227                        group = new HetatomImpl();
228                        break;
229                }
230                atomsInGroup = new ArrayList<Atom>();
231                ChemComp chemComp = new ChemComp();
232                chemComp.setOne_letter_code(String.valueOf(singleLetterCode));
233                chemComp.setType(chemCompType.toUpperCase());
234                chemComp.setResidueType(residueType);
235                chemComp.setPolymerType(residueType.polymerType);
236                group.setChemComp(chemComp);
237                group.setPDBName(groupName);
238                if (insertionCode == MmtfStructure.UNAVAILABLE_CHAR_VALUE) {
239                        group.setResidueNumber(chain.getName().trim(), groupNumber, null);
240                } else {
241                        group.setResidueNumber(chain.getName().trim(),
242                                        groupNumber, insertionCode);
243                }
244                group.setAtoms(new ArrayList<Atom>(atomCount));
245                if (polymerType==1 || polymerType==2) {
246                        MmtfUtils.insertSeqResGroup(chain, group, sequenceIndexId);
247                }
248                if (atomCount > 0) {
249                        chain.addGroup(group);
250                }
251                MmtfUtils.setSecStructType(group, secStructType);
252        }
253
254        /**
255         *
256         * @return
257         */
258        private Group getGroupWithSameResNumButDiffPDBName() {
259                // If this chain already has this group number
260                for (Group g : chain.getAtomGroups() ) {
261                        if (g.getResidueNumber().equals(group.getResidueNumber())) {
262                                if( ! g.getPDBName().equals(group.getPDBName() )){
263                                        return g;
264                                }
265                        }
266                }
267                return null;
268        }
269
270        /* (non-Javadoc)
271         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#
272         * setAtomInfo(java.lang.String, int, char, float, float,
273         * float, float, float, java.lang.String, int)
274         */
275        @Override
276        public void setAtomInfo(String atomName,
277                        int serialNumber, char alternativeLocationId, float x,
278                        float y, float z, float occupancy,
279                        float temperatureFactor,
280                        String element, int charge) {
281                Atom atom = new AtomImpl();
282                Group altGroup = null;
283                atom.setPDBserial(serialNumber);
284                atom.setName(atomName.trim());
285                atom.setElement(Element.valueOfIgnoreCase(element));
286                if(alternativeLocationId==MmtfStructure.UNAVAILABLE_CHAR_VALUE){
287                        alternativeLocationId = ' ';
288                }
289                if (alternativeLocationId != ' ') {
290                        // Get the altGroup
291                        altGroup = getCorrectAltLocGroup(alternativeLocationId);
292                        atom.setAltLoc(alternativeLocationId);
293                } else {
294                        atom.setAltLoc(Character.valueOf(' '));
295                }
296                atom.setX(x);
297                atom.setY(y);
298                atom.setZ(z);
299                atom.setOccupancy(occupancy);
300                atom.setTempFactor(temperatureFactor);
301                atom.setCharge((short) charge);
302                if (altGroup == null) {
303                        group.addAtom(atom);
304                } else {
305                        altGroup.setChain(chain);
306                        altGroup.addAtom(atom);
307                }
308
309                // IF the main group doesn't have this atom
310                if (!group.hasAtom(atom.getName())) {
311
312                        // If it's not a microheterogenity case
313                        if (group.getPDBName().equals(atom.getGroup().getPDBName())) {
314                                // And it's not a deuterated case.  'nanoheterogenity'
315                                if(!StructureTools.hasNonDeuteratedEquiv(atom,group)){
316                                        group.addAtom(atom);
317                                }
318                        }
319                }
320                atomsInGroup.add(atom);
321                allAtoms[atomCounter] = atom;
322                atomCounter++;
323        }
324
325        /* (non-Javadoc)
326         * @see org.rcsb.mmtf.decoder.StructureDecoderInter
327         * face#setGroupBonds(int, int, int)
328         */
329        @Override
330        public void setGroupBond(int indOne,
331                        int indTwo, int bondOrder) {
332                // Get the atom
333                Atom atomOne = atomsInGroup.get(indOne);
334                Atom atomTwo = atomsInGroup.get(indTwo);
335                // set the new bond
336                @SuppressWarnings("unused")
337                BondImpl bond = new BondImpl(atomOne, atomTwo, bondOrder);
338        }
339
340        /* (non-Javadoc)
341         * @see org.rcsb.mmtf.decoder.StructureDecoder
342         * Interface#setInterGroupBonds(int, int, int)
343         */
344        @Override
345        public void setInterGroupBond(int indOne,
346                        int indTwo, int bondOrder) {
347                // Get the atom
348                Atom atomOne = allAtoms[indOne];
349                Atom atomTwo = allAtoms[indTwo];
350                // set the new bond
351                @SuppressWarnings("unused")
352                BondImpl bond = new BondImpl(atomOne, atomTwo, bondOrder);
353        }
354
355
356        /**
357         * Generates Alternate location groups.
358         *
359         * @param altLoc the alt loc
360         * @return the correct alt loc group
361         */
362        private Group getCorrectAltLocGroup(Character altLoc) {
363                // see if we know this altLoc already;
364                List<Atom> atoms = group.getAtoms();
365                if (atoms.size() > 0) {
366                        Atom a1 = atoms.get(0);
367                        // we are just adding atoms to the current group
368                        // probably there is a second group following later...
369                        if (a1.getAltLoc().equals(altLoc)) {
370                                return group;   }
371                }
372
373                // Get the altLocGroup
374                Group altLocgroup = group.getAltLocGroup(altLoc);
375                if (altLocgroup != null) {
376                        return altLocgroup;
377                }
378                // If the group already exists (microheterogenity).
379                Group oldGroup = getGroupWithSameResNumButDiffPDBName();
380                if (oldGroup!= null){
381                        Group altLocG = group;
382                        group = oldGroup;
383                        group.addAltLoc(altLocG);
384                        chain.getAtomGroups().remove(altLocG);
385                        return altLocG;
386                }
387                // no matching altLoc group found.
388                // build it up.
389                if (group.getAtoms().size() == 0) {
390                        return group;
391                }
392                Group altLocG = (Group) group.clone();
393                // drop atoms from cloned group...
394                // https://redmine.open-bio.org/issues/3307
395                altLocG.setAtoms(new ArrayList<Atom>());
396                altLocG.getAltLocs().clear();
397                group.addAltLoc(altLocG);
398                return altLocG;
399
400        }
401
402
403        /* (non-Javadoc)
404         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#
405         * setXtalInfo(java.lang.String, java.util.List)
406         */
407        @Override
408        public void setXtalInfo(String spaceGroupString, float[] unitCell, double[][] ncsOperMatrixList) {
409                // Now set the xtalographic information
410                PDBCrystallographicInfo pci = new PDBCrystallographicInfo();
411                SpaceGroup spaceGroup = SpaceGroup.parseSpaceGroup(spaceGroupString);
412                pci.setSpaceGroup(spaceGroup);
413                if (unitCell.length > 0) {
414                        CrystalCell cell = new CrystalCell(unitCell[0], unitCell[1],
415                                        unitCell[2], unitCell[3], unitCell[4], unitCell[5]);
416                        pci.setCrystalCell(cell);
417                        structure.setCrystallographicInfo(pci);
418                }
419
420                pci.setNcsOperators(MmtfUtils.getNcsAsMatrix4d(ncsOperMatrixList));
421        }
422
423
424        /**
425         * Get the type of group (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0)
426         * @param polymerType
427         * @return The type of group. (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0)
428         */
429        private int getGroupTypIndicator(PolymerType polymerType) {
430                if(PolymerType.PROTEIN_ONLY.contains(polymerType)){
431                        return 1;
432                }
433                if(PolymerType.POLYNUCLEOTIDE_ONLY.contains(polymerType)){
434                        return 2;
435                }
436                return 0;
437        }
438
439
440        @Override
441        public void setBioAssemblyTrans(int bioAssemblyId, int[] inputChainIndices, double[] inputTransform, String name) {
442                // Biojava uses this as a one indexed id.
443                bioAssemblyId++;
444                if(bioassIndex!=bioAssemblyId){
445                        transformList = new ArrayList<>();
446                        bioassIndex = bioAssemblyId;
447                }
448                PDBHeader pdbHeader = structure.getPDBHeader();
449                // Get the bioassembly data
450                Map<Integer, BioAssemblyInfo> bioAssemblies = pdbHeader.getBioAssemblies();
451                // Get the bioassembly itself (if it exists
452                BioAssemblyInfo bioAssInfo;
453                if (bioAssemblies.containsKey(bioAssemblyId)){
454                        bioAssInfo = bioAssemblies.get(bioAssemblyId);
455                }
456                else{
457                        bioAssInfo = new  BioAssemblyInfo();
458                        bioAssInfo.setTransforms(new ArrayList<BiologicalAssemblyTransformation>());
459                        bioAssemblies.put(bioAssemblyId, bioAssInfo);
460                        bioAssInfo.setId(bioAssemblyId);
461                }
462
463                for(int currChainIndex : inputChainIndices){
464                        BiologicalAssemblyTransformation bioAssTrans = new BiologicalAssemblyTransformation();
465                        Integer transId = transformList.indexOf(inputTransform)+1;
466                        if(transId==0){
467                                transformList.add(inputTransform);
468                                transId = transformList.indexOf(inputTransform)+1;
469                        }
470                        bioAssTrans.setId(transId.toString());
471                        // If it actually has an index - if it doesn't it is because the chain has no density.
472                        if (currChainIndex!=-1){
473                                bioAssTrans.setChainId(chainList.get(currChainIndex).getId());
474                        }
475                        else {
476                                continue;
477                        }
478                        // Now set matrix
479                        Matrix4d mat4d = new Matrix4d(inputTransform);
480                        bioAssTrans.setTransformationMatrix(mat4d);
481                        // Now add this
482                        bioAssInfo.getTransforms().add(bioAssTrans);
483                        // sort transformations into a unique order
484                        Collections.sort(bioAssInfo.getTransforms());
485                }
486        }
487
488        @Override
489        public void setEntityInfo(int[] chainIndices, String sequence, String description, String type) {
490                // First get the chains
491                EntityInfo entityInfo = new EntityInfo();
492                entityInfo.setDescription(description);
493                entityInfo.setType(EntityType.entityTypeFromString(type));
494                List<Chain> chains = new ArrayList<>();
495                // Now loop through the chain ids and make a list of them
496                for( int index : chainIndices) {
497                        chains.add(chainList.get(index));
498                        chainList.get(index).setEntityInfo(entityInfo);
499                        chainSequenceMap.put(chainList.get(index).getId(), sequence);
500                }
501                entityInfo.setChains(chains);
502                entityInfoList.add(entityInfo);
503        }
504
505        @Override
506        public void setHeaderInfo(float rFree, float rWork, float resolution, String title, String depositionDate,
507                        String releaseDate, String[] experimentalMethods) {
508                SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
509                // Get the pdb header
510                PDBHeader pdbHeader = structure.getPDBHeader();
511                pdbHeader.setTitle(title);
512                pdbHeader.setResolution(resolution);
513                pdbHeader.setRfree(rFree);
514                pdbHeader.setRwork(rWork);
515                // Now loop through the techniques and add them in
516                if (experimentalMethods!=null) {
517                        for (String techniqueStr : experimentalMethods) {
518                                pdbHeader.setExperimentalTechnique(techniqueStr);
519                        }
520                }
521                // Set the dates
522                if(depositionDate!=null){
523                        try {
524                                Date depDate = formatter.parse(depositionDate);
525                                pdbHeader.setDepDate(depDate);
526                        } catch (ParseException e) {
527                                logger.warn("Could not parse date string '{}', depositon date will be unavailable", depositionDate);
528                        }
529                }
530                else{
531                        pdbHeader.setDepDate(new Date(0));
532                }
533                if(releaseDate!=null){
534                        try {
535                                Date relDate = formatter.parse(releaseDate);
536                                pdbHeader.setRelDate(relDate);
537                        } catch (ParseException e) {
538                                logger.warn("Could not parse date string '{}', release date will be unavailable", releaseDate);
539                        }
540                }
541                else{
542                        pdbHeader.setRelDate(new Date(0));
543                }
544        }
545}