001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.nbio.structure.quaternary;
023
024import org.biojava.nbio.structure.Calc;
025import org.biojava.nbio.structure.Chain;
026import org.biojava.nbio.structure.Structure;
027import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssembly;
028import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssemblyGen;
029import org.biojava.nbio.structure.io.mmcif.model.PdbxStructOperList;
030import org.slf4j.Logger;
031import org.slf4j.LoggerFactory;
032
033import javax.vecmath.Matrix4d;
034import java.util.*;
035
036/**
037 * Reconstructs the quaternary structure of a protein from an asymmetric unit
038 *
039 * @author Peter Rose
040 * @author Andreas Prlic
041 * @author Jose Duarte
042 *
043 */
044public class BiologicalAssemblyBuilder {
045
046        private static final Logger logger = LoggerFactory.getLogger(BiologicalAssemblyBuilder.class);
047
048        private OperatorResolver operatorResolver;
049
050
051        /**
052         * All matrix operators present in _pdbx_struct_oper_list.
053         * Identifiers (_pdbx_struct_oper_list.id) to matrix operators.
054         */
055        private Map<String, Matrix4d> allTransformations;
056
057        private List<String> modelIndex = new ArrayList<String>();
058
059        public BiologicalAssemblyBuilder(){
060                init();
061        }
062
063        /**
064         * Builds a Structure object containing the quaternary structure built from given asymUnit and transformations,
065         * by adding symmetry partners as new models.
066         * The output Structure will be different depending on the multiModel parameter:
067         * <li>
068         * the symmetry-expanded chains are added as new models, one per transformId. All original models but
069         * the first one are discarded.
070         * </li>
071         * <li>
072         * as original with symmetry-expanded chains added with renamed chain ids and names (in the form
073         * originalAsymId_transformId and originalAuthId_transformId)
074         * </li>
075         * @param asymUnit
076         * @param transformations
077         * @param useAsymIds if true use {@link Chain#getId()} to match the ids in the BiologicalAssemblyTransformation (needed if data read from mmCIF),
078         * if false use {@link Chain#getName()} for the chain matching (needed if data read from PDB).
079         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
080         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
081         * @return
082         */
083        public Structure rebuildQuaternaryStructure(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations, boolean useAsymIds, boolean multiModel) {
084
085                // ensure that new chains are build in the same order as they appear in the asymmetric unit
086                orderTransformationsByChainId(asymUnit, transformations);
087
088                Structure s = asymUnit.clone();
089
090
091                // this resets all models (not only the first one): this is important for NMR (multi-model)
092                // like that we can be sure we start with an empty structures and we add models or chains to it
093                s.resetModels();
094
095                for (BiologicalAssemblyTransformation transformation : transformations){
096
097                        List<Chain> chainsToTransform = new ArrayList<>();
098
099                        // note: for NMR structures (or any multi-model) we use the first model only and throw away the rest
100                        if (useAsymIds) {
101                                Chain c = asymUnit.getChain(transformation.getChainId());
102                                chainsToTransform.add(c);
103                        } else {
104                                Chain polyC = asymUnit.getPolyChainByPDB(transformation.getChainId());
105                                List<Chain> nonPolyCs = asymUnit.getNonPolyChainsByPDB(transformation.getChainId());
106                                Chain waterC = asymUnit.getWaterChainByPDB(transformation.getChainId());
107                                if (polyC!=null)
108                                        chainsToTransform.add(polyC);
109                                if (!nonPolyCs.isEmpty())
110                                        chainsToTransform.addAll(nonPolyCs);
111                                if (waterC!=null)
112                                        chainsToTransform.add(waterC);
113                        }
114
115                        for (Chain c: chainsToTransform) {
116
117                                Chain chain = (Chain)c.clone();
118
119                                Calc.transform(chain, transformation.getTransformationMatrix());
120
121                                String transformId = transformation.getId();
122
123                                // note that the Structure.addChain/Structure.addModel methods set the parent reference to the new Structure
124
125                                // TODO set entities properly in the new structures! at the moment they are a mess... - JD 2016-05-19
126
127                                if (multiModel)
128                                        addChainMultiModel(s, chain, transformId);
129                                else
130                                        addChainFlattened(s, chain, transformId);
131
132                        }
133                }
134
135                s.setBiologicalAssembly(true);
136                return s;
137        }
138
139        /**
140         * Orders model transformations by chain ids in the same order as in the asymmetric unit
141         * @param asymUnit
142         * @param transformations
143         */
144        private void orderTransformationsByChainId(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations) {
145                final List<String> chainIds = getChainIds(asymUnit);
146                Collections.sort(transformations, new Comparator<BiologicalAssemblyTransformation>() {
147                        @Override
148                        public int compare(BiologicalAssemblyTransformation t1, BiologicalAssemblyTransformation t2) {
149                                // set sort order only if the two ids are identical
150                                if (t1.getId().equals(t2.getId())) {
151                                         return chainIds.indexOf(t1.getChainId()) - chainIds.indexOf(t2.getChainId());
152                                } else {
153                                        return t1.getId().compareTo(t2.getId());
154                                }
155                    }
156                });
157        }
158
159        /**
160         * Returns a list of chain ids in the order they are specified in the ATOM
161         * records in the asymmetric unit
162         * @param asymUnit
163         * @return
164         */
165        private List<String> getChainIds(Structure asymUnit) {
166                List<String> chainIds = new ArrayList<String>();
167                for ( Chain c : asymUnit.getChains()){
168                        String intChainID = c.getId();
169                        chainIds.add(intChainID);
170                }
171                return chainIds;
172        }
173
174        /**
175         * Adds a chain to the given structure to form a biological assembly,
176         * adding the symmetry expanded chains as new models per transformId.
177         * @param s
178         * @param newChain
179         * @param transformId
180         */
181        private void addChainMultiModel(Structure s, Chain newChain, String transformId) {
182
183                // multi-model bioassembly
184
185                if ( modelIndex.size() == 0)
186                        modelIndex.add("PLACEHOLDER FOR ASYM UNIT");
187
188                int modelCount = modelIndex.indexOf(transformId);
189                if ( modelCount == -1)  {
190                        modelIndex.add(transformId);
191                        modelCount = modelIndex.indexOf(transformId);
192                }
193
194                if (modelCount == 0) {
195                        s.addChain(newChain);
196                } else if (modelCount > s.nrModels()) {
197                        List<Chain> newModel = new ArrayList<Chain>();
198                        newModel.add(newChain);
199                        s.addModel(newModel);
200                } else {
201                        s.addChain(newChain, modelCount-1);
202                }
203
204        }
205
206        /**
207         * Adds a chain to the given structure to form a biological assembly,
208         * adding the symmetry-expanded chains as new chains with renamed
209         * chain ids and names (in the form originalAsymId_transformId and originalAuthId_transformId).
210         * @param s
211         * @param newChain
212         * @param transformId
213         */
214        private void addChainFlattened(Structure s, Chain newChain, String transformId) {
215                newChain.setId(newChain.getId()+"_"+transformId);
216                newChain.setName(newChain.getName()+"_"+transformId);
217                s.addChain(newChain);
218        }
219
220        /**
221         * Returns a list of transformation matrices for the generation of a macromolecular
222         * assembly for the specified assembly Id.
223         *
224         * @param psa
225         * @param psags
226         * @param operators
227         * @return list of transformation matrices to generate macromolecular assembly
228         */
229        public ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationList(PdbxStructAssembly psa, List<PdbxStructAssemblyGen> psags, List<PdbxStructOperList> operators) {
230                init();
231
232                // first we populate the list of all operators from pdbx_struct_oper_list so that we can then
233                // get them from getBioUnitTransformationsListUnaryOperators() and getBioUnitTransformationsListBinaryOperators()
234                for (PdbxStructOperList oper: operators){
235                        try {
236                                Matrix4d m = new Matrix4d();
237                                m.m00 = Double.parseDouble(oper.getMatrix11());
238                                m.m01 = Double.parseDouble(oper.getMatrix12());
239                                m.m02 = Double.parseDouble(oper.getMatrix13());
240
241                                m.m10 = Double.parseDouble(oper.getMatrix21());
242                                m.m11 = Double.parseDouble(oper.getMatrix22());
243                                m.m12 = Double.parseDouble(oper.getMatrix23());
244
245                                m.m20 = Double.parseDouble(oper.getMatrix31());
246                                m.m21 = Double.parseDouble(oper.getMatrix32());
247                                m.m22 = Double.parseDouble(oper.getMatrix33());
248
249                                m.m03 = Double.parseDouble(oper.getVector1());
250                                m.m13 = Double.parseDouble(oper.getVector2());
251                                m.m23 = Double.parseDouble(oper.getVector3());
252
253                                m.m30 = 0;
254                                m.m31 = 0;
255                                m.m32 = 0;
256                                m.m33 = 1;
257
258                                allTransformations.put(oper.getId(), m);
259
260                        } catch (NumberFormatException e) {
261                                logger.warn("Could not parse a matrix value from pdbx_struct_oper_list for id {}. The operator id will be ignored. Error: {}", oper.getId(), e.getMessage());
262                        }
263                }
264
265                ArrayList<BiologicalAssemblyTransformation> transformations = getBioUnitTransformationsListUnaryOperators(psa.getId(), psags);
266                transformations.addAll(getBioUnitTransformationsListBinaryOperators(psa.getId(), psags));
267                transformations.trimToSize();
268                return transformations;
269        }
270
271
272        private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListBinaryOperators(String assemblyId, List<PdbxStructAssemblyGen> psags) {
273
274                ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<BiologicalAssemblyTransformation>();
275
276                List<OrderedPair<String>> operators = operatorResolver.getBinaryOperators();
277
278
279                for ( PdbxStructAssemblyGen psag : psags){
280                        if ( psag.getAssembly_id().equals(assemblyId)) {
281
282                                List<String>asymIds= Arrays.asList(psag.getAsym_id_list().split(","));
283
284                                operatorResolver.parseOperatorExpressionString(psag.getOper_expression());
285
286                                // apply binary operators to the specified chains
287                                // Example 1M4X: generates all products of transformation matrices (1-60)(61-88)
288                                for (String chainId : asymIds) {
289
290                                        for (OrderedPair<String> operator : operators) {
291                                                Matrix4d original1 = allTransformations.get(operator.getElement1());
292                                                Matrix4d original2 = allTransformations.get(operator.getElement2());
293                                                if (original1 == null || original2 == null) {
294                                                        logger.warn("Could not find matrix operator for operator id {} or {}. Assembly id {} will not contain the composed operator.", operator.getElement1(), operator.getElement2(), assemblyId);
295                                                        continue;
296                                                }
297                                                Matrix4d composed = new Matrix4d(original1);
298                                                composed.mul(original2);
299                                                BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation();
300                                                transform.setChainId(chainId);
301                                                transform.setId(operator.getElement1() + "x" + operator.getElement2());
302                                                transform.setTransformationMatrix(composed);
303                                                transformations.add(transform);
304                                        }
305                                }
306                        }
307
308                }
309
310                return transformations;
311        }
312
313        private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListUnaryOperators(String assemblyId, List<PdbxStructAssemblyGen> psags) {
314                ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<BiologicalAssemblyTransformation>();
315
316
317                for ( PdbxStructAssemblyGen psag : psags){
318                        if ( psag.getAssembly_id().equals(assemblyId)) {
319
320                                operatorResolver.parseOperatorExpressionString(psag.getOper_expression());
321                                List<String> operators = operatorResolver.getUnaryOperators();
322
323                                List<String>asymIds= Arrays.asList(psag.getAsym_id_list().split(","));
324
325                                // apply unary operators to the specified chains
326                                for (String chainId : asymIds) {
327                                        for (String operator : operators) {
328                                                Matrix4d original = allTransformations.get(operator);
329                                                if (original == null) {
330                                                        logger.warn("Could not find matrix operator for operator id {}. Assembly id {} will not contain the operator.", operator, assemblyId);
331                                                        continue;
332                                                }
333                                                BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation();
334                                                transform.setChainId(chainId);
335                                                transform.setId(operator);
336                                                transform.setTransformationMatrix(original);
337                                                transformations.add(transform);
338                                        }
339                                }
340                        }
341                }
342
343                return transformations;
344        }
345
346        private void init(){
347                operatorResolver= new OperatorResolver();
348                allTransformations = new HashMap<>();
349        }
350}