001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io;
022
023import org.biojava.nbio.structure.jama.Matrix;
024import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
025import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
026//import org.slf4j.Logger;
027//import org.slf4j.LoggerFactory;
028
029import java.util.*;
030
031/**
032 * Parses REMARK 350 records in a PDB file and creates transformations to
033 * construct the quaternary structure of a protein from an asymmetric unit
034 *
035 * @author Peter Rose
036 * @author Andreas Prlic
037 *
038 */
039public class PDBBioAssemblyParser {
040
041        //private static final Logger logger = LoggerFactory.getLogger(PDBBioAssemblyParser.class);
042
043        private Integer currentBioMolecule = null;
044        private List<String> currentChainIDs = new ArrayList<>();
045        private Matrix currentMatrix = null;
046        private double[] shift = null;
047        private Map<Integer,BioAssemblyInfo> transformationMap = new HashMap<>();
048        private int modelNumber = 1;
049
050        private List<BiologicalAssemblyTransformation> transformations;
051
052        /**
053         * Parses REMARK 350 line. See format description:
054         * http://www.wwpdb.org/documentation/format33/remarks2.html
055         *
056         * @param line
057         */
058        public void pdb_REMARK_350_Handler(String line) {
059
060                if (line.startsWith("REMARK 350 BIOMOLECULE:")) {
061                    initialize();
062                        currentBioMolecule = Integer.parseInt(line.substring(24).trim());
063
064                }
065                // not parsing anymore the size (from biojava 5.0), thus this is not needed anymore
066                // eventually if needed this could be used to
067                // infer if bioassembly is author or software determined
068                //else if ( line.matches("REMARK 350 \\w+ DETERMINED BIOLOGICAL UNIT:.*" ) ||
069                //                      line.matches("REMARK 350 \\w+ DETERMINED QUATERNARY STRUCTURE:.*" )) {
070                        // text can be :
071                        // author determined biological unit
072                        // software determined quaternary structure
073                //}
074                else if ( line.startsWith("REMARK 350 APPLY THE FOLLOWING TO CHAINS:")) {
075                        currentChainIDs.clear();
076                        addToCurrentChainList(line);
077
078                } else if ( line.startsWith("REMARK 350 IN ADDITION APPLY THE FOLLOWING TO CHAINS:")) {
079                        currentChainIDs.clear();
080                        addToCurrentChainList(line);
081
082                } else if ( line.startsWith("REMARK 350") && line.contains("AND CHAINS:")) {
083                        addToCurrentChainList(line);
084
085                } else if ( line.startsWith("REMARK 350   BIOMT")) {
086                        if (readMatrix(line)) {
087                                saveMatrix();
088                                modelNumber++;
089                        }
090                }
091        }
092
093        /**
094         * Returns a map of bioassembly transformations
095         * @return
096         */
097        public Map<Integer, BioAssemblyInfo> getTransformationMap() {
098                return transformationMap;
099        }
100
101        /**
102         * Parses a row of a BIOMT matrix in a REMARK 350 record.
103         * Example: REMARK 350   BIOMT1   2  1.000000  0.000000  0.000000        0.00000
104         * @param line
105         * @return true if 3rd line of matrix has been parsed (matrix is complete)
106         */
107        private boolean readMatrix(String line) {
108                // split by one or more spaces
109                String[] items = line.split("[ ]+");
110
111                // parse BIOMTx, where x is the position in the matrix
112                String pos = items[2].substring(5);
113                int row = Integer.parseInt(pos);
114                if (row == 1) {
115                        currentMatrix = Matrix.identity(3,3);
116                        shift = new double[3];
117                }
118
119                currentMatrix.set((row-1), 0,Float.parseFloat(items[4]));
120                currentMatrix.set((row-1), 1,Float.parseFloat(items[5]));
121                currentMatrix.set((row-1), 2,Float.parseFloat(items[6]));
122                shift[row-1] = Float.parseFloat(items[7]);
123
124                // return true if 3rd row of matrix has been processed
125                return row == 3;
126        }
127
128        /**
129         * Saves transformation matrix for the list of current chains
130         */
131        private void saveMatrix() {
132
133                for (String chainId : currentChainIDs) {
134                        BiologicalAssemblyTransformation transformation = new BiologicalAssemblyTransformation();
135                        transformation.setRotationMatrix(currentMatrix.getArray());
136                        transformation.setTranslation(shift);
137                        transformation.setId(String.valueOf(modelNumber));
138                        transformation.setChainId(chainId);
139                        transformations.add(transformation);
140                }
141
142                if (!transformationMap.containsKey(currentBioMolecule)) {
143                        BioAssemblyInfo bioAssembly = new BioAssemblyInfo();
144                        bioAssembly.setId(currentBioMolecule);
145                        bioAssembly.setTransforms(transformations);
146                        transformationMap.put(currentBioMolecule,bioAssembly);
147                }
148        }
149
150        /**
151         * Parses list of chain ids (A, B, C, etc.)
152         */
153        private void addToCurrentChainList(String line) {
154                int index = line.indexOf(":");
155                String chainList = line.substring(index+1).trim();
156                // split by spaces or commas
157                String[] chainIds = chainList.split("[ ,]+");
158                currentChainIDs.addAll(Arrays.asList(chainIds));
159        }
160
161        private void initialize() {
162                transformations = new ArrayList<>();
163                currentMatrix = Matrix.identity(3,3);
164                currentBioMolecule = null;
165                shift = new double[3];
166                modelNumber = 1;
167        }
168
169        /**
170         * Set the macromolecularSize fields of the parsed bioassemblies.
171         * This can only be called after the full PDB file has been read so that
172         * all the info for all bioassemblies has been gathered.
173         * Note that an explicit method to set the field is necessary here because
174         * in PDB files the transformations contain only the author chain ids, corresponding
175         * to polymeric chains, whilst in mmCIF files the transformations
176         * contain all asym ids of both polymers and non-polymers.
177         */
178        public void setMacromolecularSizes() {
179                for (BioAssemblyInfo bioAssembly : transformationMap.values()) {
180                        bioAssembly.setMacromolecularSize(bioAssembly.getTransforms().size());
181                }
182        }
183}