001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmtf;
022
023import java.text.DateFormat;
024import java.text.SimpleDateFormat;
025import java.util.ArrayList;
026import java.util.Date;
027import java.util.HashMap;
028import java.util.HashSet;
029import java.util.List;
030import java.util.Map;
031import java.util.Map.Entry;
032import java.util.Set;
033
034import javax.vecmath.Matrix4d;
035
036import org.biojava.nbio.structure.AminoAcid;
037import org.biojava.nbio.structure.AminoAcidImpl;
038import org.biojava.nbio.structure.Atom;
039import org.biojava.nbio.structure.Bond;
040import org.biojava.nbio.structure.Chain;
041import org.biojava.nbio.structure.ExperimentalTechnique;
042import org.biojava.nbio.structure.Group;
043import org.biojava.nbio.structure.GroupType;
044import org.biojava.nbio.structure.NucleotideImpl;
045import org.biojava.nbio.structure.PDBCrystallographicInfo;
046import org.biojava.nbio.structure.Structure;
047import org.biojava.nbio.structure.StructureException;
048import org.biojava.nbio.structure.StructureIO;
049import org.biojava.nbio.structure.align.util.AtomCache;
050import org.biojava.nbio.structure.io.FileParsingParameters;
051import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory;
052import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider;
053import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
054import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
055import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
056import org.biojava.nbio.structure.secstruc.DSSPParser;
057import org.biojava.nbio.structure.secstruc.SecStrucCalc;
058import org.biojava.nbio.structure.secstruc.SecStrucState;
059import org.biojava.nbio.structure.secstruc.SecStrucType;
060import org.biojava.nbio.structure.xtal.CrystalCell;
061import org.biojava.nbio.structure.xtal.SpaceGroup;
062import org.rcsb.mmtf.dataholders.DsspType;
063import org.rcsb.mmtf.utils.CodecUtils;
064import org.slf4j.Logger;
065import org.slf4j.LoggerFactory;
066
067/**
068 * A utils class of functions needed for Biojava to read and write to mmtf.
069 * @author Anthony Bradley
070 *
071 */
072public class MmtfUtils {
073        
074        private static final Logger LOGGER = LoggerFactory.getLogger(MmtfUtils.class);
075        
076        /**
077         * Set up the configuration parameters for BioJava.
078         */
079        public static AtomCache setUpBioJava() {
080                // Set up the atom cache etc
081                AtomCache cache = new AtomCache();
082                cache.setUseMmCif(true);
083                FileParsingParameters params = cache.getFileParsingParams();
084                params.setCreateAtomBonds(true);
085                params.setAlignSeqRes(true);
086                params.setParseBioAssembly(true);
087                DownloadChemCompProvider cc = new DownloadChemCompProvider();
088                ChemCompGroupFactory.setChemCompProvider(cc);
089                cc.checkDoFirstInstall();
090                cache.setFileParsingParams(params);
091                StructureIO.setAtomCache(cache);
092                return cache;
093        }
094
095        /**
096         * Set up the configuration parameters for BioJava.
097         * @param extraUrl the string describing the URL (or file path) from which
098         * to get missing CCD entries.
099         */
100        public static AtomCache setUpBioJava(String extraUrl) {
101                // Set up the atom cache etc
102                AtomCache cache = new AtomCache();
103                cache.setUseMmCif(true);
104                FileParsingParameters params = cache.getFileParsingParams();
105                params.setCreateAtomBonds(true);
106                params.setAlignSeqRes(true);
107                params.setParseBioAssembly(true);
108                DownloadChemCompProvider.serverBaseUrl = extraUrl;
109                DownloadChemCompProvider.useDefaultUrlLayout = false;
110                DownloadChemCompProvider cc = new DownloadChemCompProvider();
111                ChemCompGroupFactory.setChemCompProvider(cc);
112                cc.checkDoFirstInstall();
113                cache.setFileParsingParams(params);
114                StructureIO.setAtomCache(cache);
115                return cache;
116        }
117
118
119        /**
120         * This sets all microheterogeneous groups 
121         * (previously alternate location groups) as separate groups.
122         * This is required because mmtf groups cannot have multiple HET codes.
123         * @param bioJavaStruct
124         */
125        public static void fixMicroheterogenity(Structure bioJavaStruct) {
126                // Loop through the models
127                for (int i=0; i<bioJavaStruct.nrModels(); i++){
128                        // Then the chains
129                        List<Chain> chains = bioJavaStruct.getModel(i);
130                        for (Chain c : chains) {
131                                // Build a new list of groups
132                                List<Group> outGroups = new ArrayList<>();
133                                for (Group g : c.getAtomGroups()) {
134                                        List<Group> removeList = new ArrayList<>();
135                                        for (Group altLoc : g.getAltLocs()) {     
136                                                // Check if they are not equal -> microheterogenity
137                                                if(! altLoc.getPDBName().equals(g.getPDBName())) {
138                                                        // Now add this group to the main list
139                                                        removeList.add(altLoc);
140                                                }
141                                        }
142                                        // Add this group
143                                        outGroups.add(g);
144                                        // Remove any microhet alt locs
145                                        g.getAltLocs().removeAll(removeList);
146                                        // Add these microhet alt locs
147                                        outGroups.addAll(removeList);
148                                }
149                                c.setAtomGroups(outGroups);
150                        }
151                }
152        }
153
154
155        /**
156         * Generate the secondary structure for a Biojava structure object.
157         * @param bioJavaStruct the Biojava structure for which it is to be calculate.
158         */
159        public static void calculateDsspSecondaryStructure(Structure bioJavaStruct) {
160                SecStrucCalc ssp = new SecStrucCalc();
161
162                try{
163                        ssp.calculate(bioJavaStruct, true);
164                }
165                catch(StructureException e) {
166                        LOGGER.warn("Could not calculate secondary structure (error {}). Will try to get a DSSP file from the RCSB web server instead.", e.getMessage());
167                        
168                        try {
169                                DSSPParser.fetch(bioJavaStruct.getPDBCode(), bioJavaStruct, true); //download from PDB the DSSP result
170                        } catch(Exception bige){
171                                LOGGER.warn("Could not get a DSSP file from RCSB web server. There will not be secondary structure assignment for this structure ({}). Error: {}", bioJavaStruct.getPDBCode(), bige.getMessage());
172                        }
173                }
174        }
175
176        /**
177         * Get the string representation of a space group.
178         * @param spaceGroup the input SpaceGroup object
179         * @return the space group as a string.
180         */
181        public static String getSpaceGroupAsString(SpaceGroup spaceGroup) {
182                if(spaceGroup==null){
183                        return "NA";
184                }
185                else{
186                        return spaceGroup.getShortSymbol();
187                }
188        }
189
190        /**
191         * Get the length six array of the unit cell information.
192         * @param xtalInfo the input PDBCrystallographicInfo object
193         * @return the length six float array
194         */
195        public static float[] getUnitCellAsArray(PDBCrystallographicInfo xtalInfo) {
196                CrystalCell xtalCell = xtalInfo.getCrystalCell();
197                if(xtalCell==null){
198                        return null;
199                }else{
200                        float[] inputUnitCell = new float[6];
201                        inputUnitCell[0] = (float) xtalCell.getA();
202                        inputUnitCell[1] = (float) xtalCell.getB();
203                        inputUnitCell[2] = (float) xtalCell.getC();
204                        inputUnitCell[3] = (float) xtalCell.getAlpha();
205                        inputUnitCell[4] = (float) xtalCell.getBeta();
206                        inputUnitCell[5] = (float) xtalCell.getGamma();
207                        return inputUnitCell;
208                }
209        }
210
211        /**
212         * Converts the set of experimental techniques to an array of strings.
213         * @param experimentalTechniques the input set of experimental techniques
214         * @return the array of strings describing the methods used.
215         */
216        public static String[] techniquesToStringArray(Set<ExperimentalTechnique> experimentalTechniques) {
217                if(experimentalTechniques==null){
218                        return new String[0];
219                }
220                String[] outArray = new String[experimentalTechniques.size()];
221                int index = 0;
222                for (ExperimentalTechnique experimentalTechnique : experimentalTechniques) {
223                        outArray[index] = experimentalTechnique.getName();
224                        index++;
225                }
226                return outArray;
227        }
228
229        /**
230         * Covert a Date object to ISO time format.
231         * @param inputDate The input date object
232         * @return the time in ISO time format
233         */
234        public static String dateToIsoString(Date inputDate) {
235                DateFormat dateStringFormat = new SimpleDateFormat("yyyy-MM-dd");
236                return dateStringFormat.format(inputDate);
237        }
238
239        /**
240         * Convert a bioassembly information into a map of transform, chainindices it relates to.
241         * @param bioassemblyInfo  the bioassembly info object for this structure
242         * @param chainIdToIndexMap the map of chain ids to the index that chain corresponds to.
243         * @return the bioassembly information (as primitive types).
244         */
245        public static Map<double[], int[]> getTransformMap(BioAssemblyInfo bioassemblyInfo, Map<String, Integer> chainIdToIndexMap) {
246                Map<Matrix4d, List<Integer>> matMap = new HashMap<>();
247                List<BiologicalAssemblyTransformation> transforms = bioassemblyInfo.getTransforms();
248                for (BiologicalAssemblyTransformation transformation : transforms) {
249                        Matrix4d transMatrix = transformation.getTransformationMatrix();
250                        String transChainId = transformation.getChainId();
251                        if (!chainIdToIndexMap.containsKey(transChainId)){
252                                continue;
253                        }
254                        int chainIndex = chainIdToIndexMap.get(transformation.getChainId());
255                        if(matMap.containsKey(transMatrix)){
256                                matMap.get(transMatrix).add(chainIndex);
257                        }
258                        else{
259                                List<Integer> chainIdList = new ArrayList<>();
260                                chainIdList.add(chainIndex);
261                                matMap.put(transMatrix, chainIdList);
262                        }
263                }
264                Map<double[], int[]> outMap = new HashMap<>();
265                for (Entry<Matrix4d, List<Integer>> entry : matMap.entrySet()) {
266                        outMap.put(convertToDoubleArray(entry.getKey()), CodecUtils.convertToIntArray(entry.getValue()));
267                }
268                return outMap;
269        }
270
271        /**
272         * Convert a four-d matrix to a double array. Row-packed.
273         * @param transformationMatrix the input matrix4d object
274         * @return the double array (16 long).
275         */
276        public static double[] convertToDoubleArray(Matrix4d transformationMatrix) {
277                // Initialise the output array
278                double[] outArray = new double[16];
279                // Iterate over the matrix
280                for(int i=0; i<4; i++){
281                        for(int j=0; j<4; j++){
282                                // Now set this element
283                                outArray[i*4+j] = transformationMatrix.getElement(i,j);
284                        }
285                }
286                return outArray;
287        }
288
289        /**
290         * Count the total number of groups in the structure
291         * @param structure the input structure
292         * @return the total number of groups
293         */
294        public static int getNumGroups(Structure structure) {
295                int count = 0;
296                for(int i=0; i<structure.nrModels(); i++) {
297                        for(Chain chain : structure.getChains(i)){
298                                count+= chain.getAtomGroups().size();
299                        }
300                }
301                return count;
302        }
303
304
305        /**
306         * Get a list of atoms for a group. Only add each atom once.
307         * @param inputGroup the Biojava Group to consider
308         * @return the atoms for the input Biojava Group
309         */
310        public static List<Atom> getAtomsForGroup(Group inputGroup) {
311                Set<Atom> uniqueAtoms = new HashSet<Atom>();
312                List<Atom> theseAtoms = new ArrayList<Atom>();
313                for(Atom a: inputGroup.getAtoms()){
314                        theseAtoms.add(a);
315                        uniqueAtoms.add(a);
316                }
317                List<Group> altLocs = inputGroup.getAltLocs();
318                for(Group thisG: altLocs){
319                        for(Atom a: thisG.getAtoms()){
320                                if(uniqueAtoms.contains(a)){ 
321                                        continue;
322                                }
323                                theseAtoms.add(a);
324                        }
325                }
326                return theseAtoms;
327        }
328
329        /**
330         * Find the number of bonds in a group
331         * @param atomsInGroup the list of atoms in the group
332         * @return the number of atoms in the group
333         */
334        public static int getNumBondsInGroup(List<Atom> atomsInGroup) {
335                int bondCounter = 0;
336                for(Atom atom : atomsInGroup) { 
337                        if(atom.getBonds()==null){
338                                continue;
339                        }
340                        for(Bond bond : atom.getBonds()) {
341                                // Now set the bonding information.
342                                Atom other = bond.getOther(atom);
343                                // If both atoms are in the group
344                                if (atomsInGroup.indexOf(other)!=-1){
345                                        Integer firstBondIndex = atomsInGroup.indexOf(atom);
346                                        Integer secondBondIndex = atomsInGroup.indexOf(other);
347                                        // Don't add the same bond twice
348                                        if (firstBondIndex<secondBondIndex){
349                                                bondCounter++;
350                                        }
351                                }
352                        }
353                }
354                return bondCounter;
355        }
356
357        /**
358         * Get the secondary structure as defined by DSSP.
359         * @param group the input group to be calculated
360         * @return the integer index of the group type.
361         */
362        public static int getSecStructType(Group group) {
363                SecStrucState props = (SecStrucState) group.getProperty("secstruc");
364                if(props==null){
365                        return DsspType.NULL_ENTRY.getDsspIndex();
366                }
367                return DsspType.dsspTypeFromString(props.getType().name).getDsspIndex();
368        }
369
370        /**
371         * Get the secondary structure as defined by DSSP.
372         * @param group the input group to be calculated
373         * @param the integer index of the group type.
374         */
375        public static void setSecStructType(Group group, int dsspIndex) {
376                SecStrucType secStrucType = getSecStructTypeFromDsspIndex(dsspIndex);
377                SecStrucState secStrucState = new SecStrucState(group, "MMTF_ASSIGNED", secStrucType);
378                if(secStrucType!=null){
379                        group.setProperty("secstruc", secStrucState);
380                }
381                else{
382                }
383        }
384
385
386        /**
387         * Set the DSSP type based on a numerical index.
388         * @param dsspIndex the integer index of the type to set
389         * @return the instance of the SecStrucType object holding this secondary
390         * structure type.
391         */
392        public static SecStrucType getSecStructTypeFromDsspIndex(int dsspIndex) {
393                String dsspType = DsspType.dsspTypeFromInt(dsspIndex).getDsspType();
394                for(SecStrucType secStrucType : SecStrucType.values())
395                {
396                        if(dsspType==secStrucType.name)
397                        {
398                                return secStrucType;
399                        }
400                }
401                // Return a null entry.
402                return null;
403        }
404
405        /**
406         * Get summary information for the structure.
407         * @param structure the structure for which to get the information.
408         */
409        public static MmtfSummaryDataBean getStructureInfo(Structure structure) {
410                MmtfSummaryDataBean mmtfSummaryDataBean = new MmtfSummaryDataBean();
411                // Get all the atoms
412                List<Atom> theseAtoms = new ArrayList<>();
413                List<Chain> allChains = new ArrayList<>();
414                Map<String, Integer> chainIdToIndexMap = new HashMap<>();
415                int chainCounter = 0;
416                int bondCount = 0;
417                mmtfSummaryDataBean.setAllAtoms(theseAtoms);
418                mmtfSummaryDataBean.setAllChains(allChains);
419                mmtfSummaryDataBean.setChainIdToIndexMap(chainIdToIndexMap);
420                for (int i=0; i<structure.nrModels(); i++){
421                        List<Chain> chains = structure.getModel(i);
422                        allChains.addAll(chains);
423                        for (Chain chain : chains) {
424                                String idOne = chain.getId();
425                                if (!chainIdToIndexMap.containsKey(idOne)) { 
426                                        chainIdToIndexMap.put(idOne, chainCounter);
427                                }
428                                chainCounter++;
429                                for (Group g : chain.getAtomGroups()) {
430                                        for(Atom atom: getAtomsForGroup(g)){
431                                                theseAtoms.add(atom);           
432                                                // If both atoms are in the group
433                                                if (atom.getBonds()!=null){
434                                                        bondCount+=atom.getBonds().size();
435                                                }
436                                        }
437                                }
438                        }
439                }
440                // Assumes all bonds are referenced twice
441                mmtfSummaryDataBean.setNumBonds(bondCount/2);
442                return mmtfSummaryDataBean;
443
444        }
445
446        /**
447         * Get a list of N 4*4 matrices from a single list of doubles of length 16*N.
448         * @param ncsOperMatrixList the input list of doubles
449         * @return the list of 4*4 matrics 
450         */
451        public static Matrix4d[] getNcsAsMatrix4d(double[][] ncsOperMatrixList) {
452                if(ncsOperMatrixList==null){
453                        return null;
454                }
455                int numMats = ncsOperMatrixList.length;
456                if(numMats==0){
457                        return null;
458                }
459                if(numMats==1 && ncsOperMatrixList[0].length==0){
460                        return null;
461                }
462                Matrix4d[] outList = new Matrix4d[numMats];
463                for(int i=0; i<numMats; i++){
464                        outList[i] = new Matrix4d(ncsOperMatrixList[i]);
465                }
466                return outList;
467        }
468
469        /**
470         * Get a list of length N*16 of a list of Matrix4d*N.
471         * @param ncsOperators the {@link Matrix4d} list 
472         * @return the list of length N*16 of the list of matrices
473         */
474        public static double[][] getNcsAsArray(Matrix4d[] ncsOperators) {
475                if(ncsOperators==null){
476                        return new double[0][0];
477                }
478                double[][] outList = new double[ncsOperators.length][16];
479                for(int i=0; i<ncsOperators.length;i++){
480                        outList[i] = convertToDoubleArray(ncsOperators[i]);
481                }
482                return outList;
483        }
484
485        /**
486         * Insert the group in the given position in the sequence.
487         * @param chain the chain to add the seq res group to
488         * @param group the group to add
489         * @param sequenceIndexId the index to add it in
490         */
491        public static void insertSeqResGroup(Chain chain, Group group, int sequenceIndexId) {
492                List<Group> seqResGroups = chain.getSeqResGroups();
493                addGroupAtId(seqResGroups, group, sequenceIndexId);
494        }
495
496        /**
497         * Add the missing groups to the SeqResGroups.
498         * @param modelChain the chain to add the information for
499         * @param sequence the sequence of the construct
500         */
501        public static void addSeqRes(Chain modelChain, String sequence) {
502                List<Group> seqResGroups = modelChain.getSeqResGroups();
503                GroupType chainType = getChainType(modelChain.getAtomGroups());
504                for(int i=0; i<sequence.length(); i++){
505                        char singleLetterCode = sequence.charAt(i);
506                        Group group = null;
507                        if(seqResGroups.size()<=i){
508                        }
509                        else{
510                                group=seqResGroups.get(i);
511                        }
512                        if(group!=null){
513                                continue;
514                        }
515                        group = getSeqResGroup(modelChain, singleLetterCode, chainType);
516                        addGroupAtId(seqResGroups, group, i);
517                        seqResGroups.set(i, group);
518                }
519        }
520
521        private static GroupType getChainType(List<Group> groups) {
522                for(Group group : groups) {
523                        if(group==null){
524                                continue;
525                        }
526                        else if(group.getType()!=GroupType.HETATM){
527                                return group.getType();
528                        }
529                }
530                return GroupType.HETATM;
531        }
532
533        private static <T> void addGroupAtId(List<T> seqResGroups, T group, int sequenceIndexId) {
534                while(seqResGroups.size()<=sequenceIndexId){
535                        seqResGroups.add(null);
536                }
537                if(sequenceIndexId>=0){
538                        seqResGroups.set(sequenceIndexId, group);
539                }               
540        }
541        
542        private static Group getSeqResGroup(Chain modelChain, char singleLetterCode, GroupType type) {
543                if(type==GroupType.AMINOACID){
544                        AminoAcidImpl a = new AminoAcidImpl();
545                        a.setRecordType(AminoAcid.SEQRESRECORD);
546                        a.setAminoType(singleLetterCode);
547                        ChemComp chemComp = new ChemComp();
548                        chemComp.setOne_letter_code(""+singleLetterCode);
549                        a.setChemComp(chemComp);
550                        return a;
551
552                } else if (type==GroupType.NUCLEOTIDE) {
553                        NucleotideImpl n = new NucleotideImpl();
554                        ChemComp chemComp = new ChemComp();
555                        chemComp.setOne_letter_code(""+singleLetterCode);
556                        n.setChemComp(chemComp);
557                        return n;
558                }
559                else{
560                        return null;
561                }
562        }
563}