001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io;
022
023import org.biojava.nbio.structure.jama.Matrix;
024import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
025import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
026import org.slf4j.Logger;
027import org.slf4j.LoggerFactory;
028
029import java.util.*;
030
031/**
032 * Parses REMARK 350 records in a PDB file and creates transformations to
033 * construct the quaternary structure of a protein from an asymmetric unit
034 *
035 * @author Peter Rose
036 * @author Andreas Prlic
037 *
038 */
039public class PDBBioAssemblyParser {
040
041        private static final Logger logger = LoggerFactory.getLogger(PDBBioAssemblyParser.class);
042
043        private Integer currentBioMolecule = null;
044        private List<String> currentChainIDs = new ArrayList<String>();
045        private Matrix currentMatrix = null;
046        private double[] shift = null;
047        private Map<Integer,BioAssemblyInfo> transformationMap = new HashMap<Integer, BioAssemblyInfo>();
048        private int modelNumber = 1;
049        private int currentMmSize;
050
051        private List<BiologicalAssemblyTransformation> transformations;
052
053        /**
054         * Parses REMARK 350 line. See format description:
055         * http://www.wwpdb.org/documentation/format33/remarks2.html
056         *
057         * @param line
058         */
059        public void pdb_REMARK_350_Handler(String line) {
060
061                if (line.startsWith("REMARK 350 BIOMOLECULE:")) {
062                    initialize();
063                        currentBioMolecule = Integer.parseInt(line.substring(24).trim());
064
065                } else if ( line.matches("REMARK 350 \\w+ DETERMINED BIOLOGICAL UNIT:.*" ) ||
066                                        line.matches("REMARK 350 \\w+ DETERMINED QUATERNARY STRUCTURE:.*" )) {
067                        // text can be :
068                        // author determined biological unit
069                        // software determined quaternary structure
070                        currentMmSize = getMmSize(line);
071                } else if ( line.startsWith("REMARK 350 APPLY THE FOLLOWING TO CHAINS:")) {
072                        currentChainIDs.clear();
073                        addToCurrentChainList(line);
074
075                } else if ( line.startsWith("REMARK 350 IN ADDITION APPLY THE FOLLOWING TO CHAINS:")) {
076                        currentChainIDs.clear();
077                        addToCurrentChainList(line);
078
079                } else if ( line.startsWith("REMARK 350") && line.contains("AND CHAINS:")) {
080                        addToCurrentChainList(line);
081
082                } else if ( line.startsWith("REMARK 350   BIOMT")) {
083                if (readMatrix(line)) {
084                        saveMatrix();
085                        modelNumber++;
086                }
087                }
088        }
089
090        /**
091         * Returns a map of bioassembly transformations
092         * @return
093         */
094        public Map<Integer, BioAssemblyInfo> getTransformationMap() {
095                return transformationMap;
096        }
097
098        /**
099         * Parses a row of a BIOMT matrix in a REMARK 350 record.
100         * Example: REMARK 350   BIOMT1   2  1.000000  0.000000  0.000000        0.00000
101         * @param line
102         * @return true if 3rd line of matrix has been parsed (matrix is complete)
103         */
104        private boolean readMatrix(String line) {
105                // split by one or more spaces
106                String[] items = line.split("[ ]+");
107
108                // parse BIOMTx, where x is the position in the matrix
109                String pos = items[2].substring(5);
110                int row = Integer.parseInt(pos);
111                if (row == 1) {
112                        currentMatrix = Matrix.identity(3,3);
113                        shift = new double[3];
114                }
115
116                currentMatrix.set((row-1), 0,Float.parseFloat(items[4]));
117                currentMatrix.set((row-1), 1,Float.parseFloat(items[5]));
118                currentMatrix.set((row-1), 2,Float.parseFloat(items[6]));
119                shift[row-1] = Float.parseFloat(items[7]);
120
121                // return true if 3rd row of matrix has been processed
122                return row == 3;
123        }
124
125        /**
126         * Saves transformation matrix for the list of current chains
127         */
128        private void saveMatrix() {
129
130                for (String chainId : currentChainIDs) {
131                        BiologicalAssemblyTransformation transformation = new BiologicalAssemblyTransformation();
132                        transformation.setRotationMatrix(currentMatrix.getArray());
133                        transformation.setTranslation(shift);
134                        transformation.setId(String.valueOf(modelNumber));
135                        transformation.setChainId(chainId);
136                        transformations.add(transformation);
137                }
138
139                if (!transformationMap.containsKey(currentBioMolecule)) {
140                        BioAssemblyInfo bioAssembly = new BioAssemblyInfo();
141                        bioAssembly.setId(currentBioMolecule);
142                        if (currentMmSize==0) {
143                                logger.warn("No macromolecular size could be parsed for biological assembly {}",currentBioMolecule);
144                        }
145                        bioAssembly.setMacromolecularSize(currentMmSize);
146                        bioAssembly.setTransforms(transformations);
147                        transformationMap.put(currentBioMolecule,bioAssembly);
148                }
149        }
150
151        private int getMmSize(String line) {
152                int index = line.indexOf(':');
153                String mmString = line.substring(index+1,line.length()-1).trim().toLowerCase();
154                return getSizefromString(mmString);
155        }
156
157        private static int getSizefromString(String oligomer){
158                int size=0;
159
160                oligomer = oligomer.toLowerCase();
161
162                if (oligomer.equals("monomeric")) {
163                    size = 1;
164                } else if (oligomer.equals("dimeric")) {
165                    size = 2;
166                } else if (oligomer.equals("trimeric")) {
167                    size = 3;
168                } else if (oligomer.equals("tetrameric")) {
169                    size = 4;
170                } else if (oligomer.equals("pentameric")) {
171                    size = 5;
172                } else if (oligomer.equals("hexameric")) {
173                    size = 6;
174                } else if (oligomer.equals("heptameric")) {
175                    size = 7;
176                } else if (oligomer.equals("octameric")) {
177                    size = 8;
178                } else if (oligomer.equals("nonameric")) {
179                    size = 9;
180                } else if (oligomer.equals("decameric")) {
181                    size = 10;
182                } else if (oligomer.equals("undecameric")) {
183                    size = 11;
184                } else if (oligomer.equals("dodecameric")) {
185                    size = 12;
186                } else if (oligomer.equals("tridecameric")) {
187                    size = 13;
188                } else if (oligomer.equals("tetradecameric")) {
189                    size = 14;
190                } else if (oligomer.equals("pentadecameric")) {
191                    size = 15;
192                } else if (oligomer.equals("hexadecameric")) {
193                    size = 16;
194                } else if (oligomer.equals("heptadecameric")) {
195                    size = 17;
196                } else if (oligomer.equals("octadecameric")) {
197                    size = 18;
198                } else if (oligomer.equals("nonadecameric")) {
199                    size = 19;
200                } else if (oligomer.equals("eicosameric")) {
201                    size = 20;
202                } else if( oligomer.matches("(\\d+).*")) {
203                    size = Integer.parseInt((oligomer.replaceAll("(\\d+).*", "$1")));
204                }
205                return size;
206        }
207
208        /**
209         * Parses list of chain ids (A, B, C, etc.)
210         */
211        private void addToCurrentChainList(String line) {
212                int index = line.indexOf(":");
213                String chainList = line.substring(index+1).trim();
214        // split by spaces or commas
215                String[] chainIds = chainList.split("[ ,]+");
216                currentChainIDs.addAll(Arrays.asList(chainIds));
217        }
218
219        private void initialize() {
220                transformations = new ArrayList<BiologicalAssemblyTransformation>();
221                currentMatrix = Matrix.identity(3,3);
222                currentBioMolecule = null;
223                shift = new double[3];
224                modelNumber = 1;
225                currentMmSize = 0;
226        }
227}