001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmtf;
022
023import java.text.DateFormat;
024import java.text.SimpleDateFormat;
025import java.util.ArrayList;
026import java.util.Date;
027import java.util.HashSet;
028import java.util.LinkedHashMap;
029import java.util.List;
030import java.util.Map;
031import java.util.Map.Entry;
032import java.util.Set;
033
034import javax.vecmath.Matrix4d;
035
036import org.biojava.nbio.structure.AminoAcid;
037import org.biojava.nbio.structure.AminoAcidImpl;
038import org.biojava.nbio.structure.Atom;
039import org.biojava.nbio.structure.Bond;
040import org.biojava.nbio.structure.Chain;
041import org.biojava.nbio.structure.ExperimentalTechnique;
042import org.biojava.nbio.structure.Group;
043import org.biojava.nbio.structure.GroupType;
044import org.biojava.nbio.structure.NucleotideImpl;
045import org.biojava.nbio.structure.PDBCrystallographicInfo;
046import org.biojava.nbio.structure.Structure;
047import org.biojava.nbio.structure.StructureException;
048import org.biojava.nbio.structure.StructureIO;
049import org.biojava.nbio.structure.align.util.AtomCache;
050import org.biojava.nbio.structure.io.FileParsingParameters;
051import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory;
052import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider;
053import org.biojava.nbio.structure.io.mmcif.chem.ChemCompTools;
054import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
055import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
056import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
057import org.biojava.nbio.structure.secstruc.DSSPParser;
058import org.biojava.nbio.structure.secstruc.SecStrucCalc;
059import org.biojava.nbio.structure.secstruc.SecStrucState;
060import org.biojava.nbio.structure.secstruc.SecStrucType;
061import org.biojava.nbio.structure.xtal.CrystalCell;
062import org.biojava.nbio.structure.xtal.SpaceGroup;
063import org.rcsb.mmtf.dataholders.DsspType;
064import org.rcsb.mmtf.utils.CodecUtils;
065import org.slf4j.Logger;
066import org.slf4j.LoggerFactory;
067
068/**
069 * A utils class of functions needed for Biojava to read and write to mmtf.
070 * @author Anthony Bradley
071 *
072 */
073public class MmtfUtils {
074
075        private static final Logger LOGGER = LoggerFactory.getLogger(MmtfUtils.class);
076
077        /**
078         * Set up the configuration parameters for BioJava.
079         */
080        public static AtomCache setUpBioJava() {
081                // Set up the atom cache etc
082                AtomCache cache = new AtomCache();
083                cache.setUseMmCif(true);
084                FileParsingParameters params = cache.getFileParsingParams();
085                params.setCreateAtomBonds(true);
086                params.setAlignSeqRes(true);
087                params.setParseBioAssembly(true);
088                DownloadChemCompProvider cc = new DownloadChemCompProvider();
089                ChemCompGroupFactory.setChemCompProvider(cc);
090                cc.checkDoFirstInstall();
091                cache.setFileParsingParams(params);
092                StructureIO.setAtomCache(cache);
093                return cache;
094        }
095
096        /**
097         * Set up the configuration parameters for BioJava.
098         * @param extraUrl the string describing the URL (or file path) from which
099         * to get missing CCD entries.
100         */
101        public static AtomCache setUpBioJava(String extraUrl) {
102                // Set up the atom cache etc
103                AtomCache cache = new AtomCache();
104                cache.setUseMmCif(true);
105                FileParsingParameters params = cache.getFileParsingParams();
106                params.setCreateAtomBonds(true);
107                params.setAlignSeqRes(true);
108                params.setParseBioAssembly(true);
109                DownloadChemCompProvider.serverBaseUrl = extraUrl;
110                DownloadChemCompProvider.useDefaultUrlLayout = false;
111                DownloadChemCompProvider cc = new DownloadChemCompProvider();
112                ChemCompGroupFactory.setChemCompProvider(cc);
113                cc.checkDoFirstInstall();
114                cache.setFileParsingParams(params);
115                StructureIO.setAtomCache(cache);
116                return cache;
117        }
118
119
120        /**
121         * This sets all microheterogeneous groups
122         * (previously alternate location groups) as separate groups.
123         * This is required because mmtf groups cannot have multiple HET codes.
124         * @param bioJavaStruct
125         */
126        public static void fixMicroheterogenity(Structure bioJavaStruct) {
127                // Loop through the models
128                for (int i=0; i<bioJavaStruct.nrModels(); i++){
129                        // Then the chains
130                        List<Chain> chains = bioJavaStruct.getModel(i);
131                        for (Chain c : chains) {
132                                // Build a new list of groups
133                                List<Group> outGroups = new ArrayList<>();
134                                for (Group g : c.getAtomGroups()) {
135                                        List<Group> removeList = new ArrayList<>();
136                                        for (Group altLoc : g.getAltLocs()) {
137                                                // Check if they are not equal -> microheterogenity
138                                                if(! altLoc.getPDBName().equals(g.getPDBName())) {
139                                                        // Now add this group to the main list
140                                                        removeList.add(altLoc);
141                                                }
142                                        }
143                                        // Add this group
144                                        outGroups.add(g);
145                                        // Remove any microhet alt locs
146                                        g.getAltLocs().removeAll(removeList);
147                                        // Add these microhet alt locs
148                                        outGroups.addAll(removeList);
149                                }
150                                c.setAtomGroups(outGroups);
151                        }
152                }
153        }
154
155
156        /**
157         * Generate the secondary structure for a Biojava structure object.
158         * @param bioJavaStruct the Biojava structure for which it is to be calculate.
159         */
160        public static void calculateDsspSecondaryStructure(Structure bioJavaStruct) {
161                SecStrucCalc ssp = new SecStrucCalc();
162
163                try{
164                        ssp.calculate(bioJavaStruct, true);
165                }
166                catch(StructureException e) {
167                        LOGGER.warn("Could not calculate secondary structure (error {}). Will try to get a DSSP file from the RCSB web server instead.", e.getMessage());
168
169                        try {
170                                DSSPParser.fetch(bioJavaStruct.getPDBCode(), bioJavaStruct, true); //download from PDB the DSSP result
171                        } catch(Exception bige){
172                                LOGGER.warn("Could not get a DSSP file from RCSB web server. There will not be secondary structure assignment for this structure ({}). Error: {}", bioJavaStruct.getPDBCode(), bige.getMessage());
173                        }
174                }
175        }
176
177        /**
178         * Get the string representation of a space group.
179         * @param spaceGroup the input SpaceGroup object
180         * @return the space group as a string.
181         */
182        public static String getSpaceGroupAsString(SpaceGroup spaceGroup) {
183                if(spaceGroup==null){
184                        return "NA";
185                }
186                else{
187                        return spaceGroup.getShortSymbol();
188                }
189        }
190
191        /**
192         * Get the length six array of the unit cell information.
193         * @param xtalInfo the input PDBCrystallographicInfo object
194         * @return the length six float array
195         */
196        public static float[] getUnitCellAsArray(PDBCrystallographicInfo xtalInfo) {
197                CrystalCell xtalCell = xtalInfo.getCrystalCell();
198                if(xtalCell==null){
199                        return null;
200                }else{
201                        float[] inputUnitCell = new float[6];
202                        inputUnitCell[0] = (float) xtalCell.getA();
203                        inputUnitCell[1] = (float) xtalCell.getB();
204                        inputUnitCell[2] = (float) xtalCell.getC();
205                        inputUnitCell[3] = (float) xtalCell.getAlpha();
206                        inputUnitCell[4] = (float) xtalCell.getBeta();
207                        inputUnitCell[5] = (float) xtalCell.getGamma();
208                        return inputUnitCell;
209                }
210        }
211
212        /**
213         * Converts the set of experimental techniques to an array of strings.
214         * @param experimentalTechniques the input set of experimental techniques
215         * @return the array of strings describing the methods used.
216         */
217        public static String[] techniquesToStringArray(Set<ExperimentalTechnique> experimentalTechniques) {
218                if(experimentalTechniques==null){
219                        return new String[0];
220                }
221                String[] outArray = new String[experimentalTechniques.size()];
222                int index = 0;
223                for (ExperimentalTechnique experimentalTechnique : experimentalTechniques) {
224                        outArray[index] = experimentalTechnique.getName();
225                        index++;
226                }
227                return outArray;
228        }
229
230        /**
231         * Covert a Date object to ISO time format.
232         * @param inputDate The input date object
233         * @return the time in ISO time format
234         */
235        public static String dateToIsoString(Date inputDate) {
236                DateFormat dateStringFormat = new SimpleDateFormat("yyyy-MM-dd");
237                return dateStringFormat.format(inputDate);
238        }
239
240        /**
241         * Convert a bioassembly information into a map of transform, chainindices it relates to.
242         * @param bioassemblyInfo  the bioassembly info object for this structure
243         * @param chainIdToIndexMap the map of chain ids to the index that chain corresponds to.
244         * @return the bioassembly information (as primitive types).
245         */
246        public static Map<double[], int[]> getTransformMap(BioAssemblyInfo bioassemblyInfo, Map<String, Integer> chainIdToIndexMap) {
247            Map<Matrix4d, List<Integer>> matMap = new LinkedHashMap<>();
248                List<BiologicalAssemblyTransformation> transforms = bioassemblyInfo.getTransforms();
249                for (BiologicalAssemblyTransformation transformation : transforms) {
250                        Matrix4d transMatrix = transformation.getTransformationMatrix();
251                        String transChainId = transformation.getChainId();
252                        if (!chainIdToIndexMap.containsKey(transChainId)){
253                                continue;
254                        }
255                        int chainIndex = chainIdToIndexMap.get(transformation.getChainId());
256                        if(matMap.containsKey(transMatrix)){
257                                matMap.get(transMatrix).add(chainIndex);
258                        }
259                        else{
260                                List<Integer> chainIdList = new ArrayList<>();
261                                chainIdList.add(chainIndex);
262                                matMap.put(transMatrix, chainIdList);
263                        }
264                }
265
266            Map<double[], int[]> outMap = new LinkedHashMap<>();
267                for (Entry<Matrix4d, List<Integer>> entry : matMap.entrySet()) {
268                        outMap.put(convertToDoubleArray(entry.getKey()), CodecUtils.convertToIntArray(entry.getValue()));
269                }
270                return outMap;
271        }
272
273        /**
274         * Convert a four-d matrix to a double array. Row-packed.
275         * @param transformationMatrix the input matrix4d object
276         * @return the double array (16 long).
277         */
278        public static double[] convertToDoubleArray(Matrix4d transformationMatrix) {
279                // Initialise the output array
280                double[] outArray = new double[16];
281                // Iterate over the matrix
282                for(int i=0; i<4; i++){
283                        for(int j=0; j<4; j++){
284                                // Now set this element
285                                outArray[i*4+j] = transformationMatrix.getElement(i,j);
286                        }
287                }
288                return outArray;
289        }
290
291        /**
292         * Count the total number of groups in the structure
293         * @param structure the input structure
294         * @return the total number of groups
295         */
296        public static int getNumGroups(Structure structure) {
297                int count = 0;
298                for(int i=0; i<structure.nrModels(); i++) {
299                        for(Chain chain : structure.getChains(i)){
300                                count+= chain.getAtomGroups().size();
301                        }
302                }
303                return count;
304        }
305
306
307        /**
308         * Get a list of atoms for a group. Only add each atom once.
309         * @param inputGroup the Biojava Group to consider
310         * @return the atoms for the input Biojava Group
311         */
312        public static List<Atom> getAtomsForGroup(Group inputGroup) {
313                Set<Atom> uniqueAtoms = new HashSet<Atom>();
314                List<Atom> theseAtoms = new ArrayList<Atom>();
315                for(Atom a: inputGroup.getAtoms()){
316                        theseAtoms.add(a);
317                        uniqueAtoms.add(a);
318                }
319                List<Group> altLocs = inputGroup.getAltLocs();
320                for(Group thisG: altLocs){
321                        for(Atom a: thisG.getAtoms()){
322                                if(uniqueAtoms.contains(a)){
323                                        continue;
324                                }
325                                theseAtoms.add(a);
326                        }
327                }
328                return theseAtoms;
329        }
330
331        /**
332         * Find the number of bonds in a group
333         * @param atomsInGroup the list of atoms in the group
334         * @return the number of atoms in the group
335         */
336        public static int getNumBondsInGroup(List<Atom> atomsInGroup) {
337                int bondCounter = 0;
338                for(Atom atom : atomsInGroup) {
339                        if(atom.getBonds()==null){
340                                continue;
341                        }
342                        for(Bond bond : atom.getBonds()) {
343                                // Now set the bonding information.
344                                Atom other = bond.getOther(atom);
345                                // If both atoms are in the group
346                                if (atomsInGroup.indexOf(other)!=-1){
347                                        Integer firstBondIndex = atomsInGroup.indexOf(atom);
348                                        Integer secondBondIndex = atomsInGroup.indexOf(other);
349                                        // Don't add the same bond twice
350                                        if (firstBondIndex<secondBondIndex){
351                                                bondCounter++;
352                                        }
353                                }
354                        }
355                }
356                return bondCounter;
357        }
358
359        /**
360         * Get the secondary structure as defined by DSSP.
361         * @param group the input group to be calculated
362         * @return the integer index of the group type.
363         */
364        public static int getSecStructType(Group group) {
365                SecStrucState props = (SecStrucState) group.getProperty("secstruc");
366                if(props==null){
367                        return DsspType.NULL_ENTRY.getDsspIndex();
368                }
369                return DsspType.dsspTypeFromString(props.getType().name).getDsspIndex();
370        }
371
372        /**
373         * Get the secondary structure as defined by DSSP.
374         * @param group the input group to be calculated
375         * @param dsspIndex integer index of the group type.
376         */
377        public static void setSecStructType(Group group, int dsspIndex) {
378                SecStrucType secStrucType = getSecStructTypeFromDsspIndex(dsspIndex);
379                SecStrucState secStrucState = new SecStrucState(group, "MMTF_ASSIGNED", secStrucType);
380                if(secStrucType!=null){
381                        group.setProperty("secstruc", secStrucState);
382                }
383        }
384
385
386        /**
387         * Set the DSSP type based on a numerical index.
388         * @param dsspIndex the integer index of the type to set
389         * @return the instance of the SecStrucType object holding this secondary
390         * structure type.
391         */
392        public static SecStrucType getSecStructTypeFromDsspIndex(int dsspIndex) {
393                String dsspType = DsspType.dsspTypeFromInt(dsspIndex).getDsspType();
394                for(SecStrucType secStrucType : SecStrucType.values())
395                {
396                        if(dsspType.equals(secStrucType.name))
397                        {
398                                return secStrucType;
399                        }
400                }
401                // Return a null entry.
402                return null;
403        }
404
405        /**
406         * Get summary information for the structure.
407         * @param structure the structure for which to get the information.
408         */
409        public static MmtfSummaryDataBean getStructureInfo(Structure structure) {
410                MmtfSummaryDataBean mmtfSummaryDataBean = new MmtfSummaryDataBean();
411                // Get all the atoms
412                List<Atom> theseAtoms = new ArrayList<>();
413                List<Chain> allChains = new ArrayList<>();
414                Map<String, Integer> chainIdToIndexMap = new LinkedHashMap<>();
415                int chainCounter = 0;
416                int bondCount = 0;
417                mmtfSummaryDataBean.setAllAtoms(theseAtoms);
418                mmtfSummaryDataBean.setAllChains(allChains);
419                mmtfSummaryDataBean.setChainIdToIndexMap(chainIdToIndexMap);
420                for (int i=0; i<structure.nrModels(); i++){
421                        List<Chain> chains = structure.getModel(i);
422                        allChains.addAll(chains);
423                        for (Chain chain : chains) {
424                                String idOne = chain.getId();
425                                if (!chainIdToIndexMap.containsKey(idOne)) {
426                                        chainIdToIndexMap.put(idOne, chainCounter);
427                                }
428                                chainCounter++;
429                                for (Group g : chain.getAtomGroups()) {
430                                        for(Atom atom: getAtomsForGroup(g)){
431                                                theseAtoms.add(atom);
432                                                // If both atoms are in the group
433                                                if (atom.getBonds()!=null){
434                                                        bondCount+=atom.getBonds().size();
435                                                }
436                                        }
437                                }
438                        }
439                }
440                // Assumes all bonds are referenced twice
441                mmtfSummaryDataBean.setNumBonds(bondCount/2);
442                return mmtfSummaryDataBean;
443
444        }
445
446        /**
447         * Get a list of N 4*4 matrices from a single list of doubles of length 16*N.
448         * @param ncsOperMatrixList the input list of doubles
449         * @return the list of 4*4 matrics
450         */
451        public static Matrix4d[] getNcsAsMatrix4d(double[][] ncsOperMatrixList) {
452                if(ncsOperMatrixList==null){
453                        return null;
454                }
455                int numMats = ncsOperMatrixList.length;
456                if(numMats==0){
457                        return null;
458                }
459                if(numMats==1 && ncsOperMatrixList[0].length==0){
460                        return null;
461                }
462                Matrix4d[] outList = new Matrix4d[numMats];
463                for(int i=0; i<numMats; i++){
464                        outList[i] = new Matrix4d(ncsOperMatrixList[i]);
465                }
466                return outList;
467        }
468
469        /**
470         * Get a list of length N*16 of a list of Matrix4d*N.
471         * @param ncsOperators the {@link Matrix4d} list
472         * @return the list of length N*16 of the list of matrices
473         */
474        public static double[][] getNcsAsArray(Matrix4d[] ncsOperators) {
475                if(ncsOperators==null){
476                        return new double[0][0];
477                }
478                double[][] outList = new double[ncsOperators.length][16];
479                for(int i=0; i<ncsOperators.length;i++){
480                        outList[i] = convertToDoubleArray(ncsOperators[i]);
481                }
482                return outList;
483        }
484
485        /**
486         * Insert the group in the given position in the sequence.
487         * @param chain the chain to add the seq res group to
488         * @param group the group to add
489         * @param sequenceIndexId the index to add it in
490         */
491        public static void insertSeqResGroup(Chain chain, Group group, int sequenceIndexId) {
492                List<Group> seqResGroups = chain.getSeqResGroups();
493                addGroupAtId(seqResGroups, group, sequenceIndexId);
494        }
495
496        /**
497         * Add the missing groups to the SeqResGroups.
498         * @param modelChain the chain to add the information for
499         * @param sequence the sequence of the construct
500         */
501        public static void addSeqRes(Chain modelChain, String sequence) {
502                
503                List<Group> seqResGroups = modelChain.getSeqResGroups();
504                GroupType chainType = getChainType(modelChain.getAtomGroups());
505                
506                for(int i=0; i<sequence.length(); i++){
507                        
508                        char singleLetterCode = sequence.charAt(i);
509                        Group group = null;
510                        if (seqResGroups.size() > i) {
511                                group=seqResGroups.get(i);
512                        }
513                        if(group!=null){
514                                continue;
515                        }
516                        
517                        group = getSeqResGroup(singleLetterCode, chainType);
518                        addGroupAtId(seqResGroups, group, i);
519                }
520        }
521
522        private static GroupType getChainType(List<Group> groups) {
523                for(Group group : groups) {
524                        if(group!=null && group.getType()!=GroupType.HETATM){
525                                return group.getType();
526                        }
527                }
528                return GroupType.HETATM;
529        }
530
531        private static <T> void addGroupAtId(List<T> seqResGroups, T group, int sequenceIndexId) {
532                while(seqResGroups.size()<=sequenceIndexId){
533                        seqResGroups.add(null);
534                }
535                if(sequenceIndexId>=0){
536                        seqResGroups.set(sequenceIndexId, group);
537                }
538        }
539
540        private static Group getSeqResGroup(char singleLetterCode, GroupType type) {
541
542                if(type==GroupType.AMINOACID){
543                        String threeLetter = ChemCompTools.getAminoThreeLetter(singleLetterCode);
544                        if (threeLetter == null) return null;
545                        ChemComp chemComp = ChemCompGroupFactory.getChemComp(threeLetter);
546
547                        AminoAcidImpl a = new AminoAcidImpl();
548                        a.setRecordType(AminoAcid.SEQRESRECORD);
549                        a.setAminoType(singleLetterCode);
550                        a.setPDBName(threeLetter);
551                        a.setChemComp(chemComp);
552                        return a;
553
554                } else if (type==GroupType.NUCLEOTIDE) {
555                        String twoLetter = ChemCompTools.getDNATwoLetter(singleLetterCode);
556                        if (twoLetter == null) return null;
557                        ChemComp chemComp = ChemCompGroupFactory.getChemComp(twoLetter);
558
559                        NucleotideImpl n = new NucleotideImpl();
560                        n.setPDBName(twoLetter);
561                        n.setChemComp(chemComp);
562                        return n;
563                }
564                else{
565                        return null;
566                }
567        }
568}