001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.nbio.structure.quaternary; 023 024import org.biojava.nbio.structure.Calc; 025import org.biojava.nbio.structure.Chain; 026import org.biojava.nbio.structure.Structure; 027import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssembly; 028import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssemblyGen; 029import org.biojava.nbio.structure.io.mmcif.model.PdbxStructOperList; 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032 033import javax.vecmath.Matrix4d; 034import java.util.*; 035 036/** 037 * Reconstructs the quaternary structure of a protein from an asymmetric unit 038 * 039 * @author Peter Rose 040 * @author Andreas Prlic 041 * @author Jose Duarte 042 * 043 */ 044public class BiologicalAssemblyBuilder { 045 046 private static final Logger logger = LoggerFactory.getLogger(BiologicalAssemblyBuilder.class); 047 048 private OperatorResolver operatorResolver; 049 050 051 /** 052 * All matrix operators present in _pdbx_struct_oper_list. 053 * Identifiers (_pdbx_struct_oper_list.id) to matrix operators. 054 */ 055 private Map<String, Matrix4d> allTransformations; 056 057 private List<String> modelIndex = new ArrayList<String>(); 058 059 public BiologicalAssemblyBuilder(){ 060 init(); 061 } 062 063 /** 064 * Builds a Structure object containing the quaternary structure built from given asymUnit and transformations, 065 * by adding symmetry partners as new models. 066 * The output Structure will be different depending on the multiModel parameter: 067 * <li> 068 * the symmetry-expanded chains are added as new models, one per transformId. All original models but 069 * the first one are discarded. 070 * </li> 071 * <li> 072 * as original with symmetry-expanded chains added with renamed chain ids and names (in the form 073 * originalAsymId_transformId and originalAuthId_transformId) 074 * </li> 075 * @param asymUnit 076 * @param transformations 077 * @param useAsymIds if true use {@link Chain#getId()} to match the ids in the BiologicalAssemblyTransformation (needed if data read from mmCIF), 078 * if false use {@link Chain#getName()} for the chain matching (needed if data read from PDB). 079 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 080 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 081 * @return 082 */ 083 public Structure rebuildQuaternaryStructure(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations, boolean useAsymIds, boolean multiModel) { 084 085 // ensure that new chains are build in the same order as they appear in the asymmetric unit 086 orderTransformationsByChainId(asymUnit, transformations); 087 088 Structure s = asymUnit.clone(); 089 090 091 // this resets all models (not only the first one): this is important for NMR (multi-model) 092 // like that we can be sure we start with an empty structures and we add models or chains to it 093 s.resetModels(); 094 095 for (BiologicalAssemblyTransformation transformation : transformations){ 096 097 List<Chain> chainsToTransform = new ArrayList<>(); 098 099 // note: for NMR structures (or any multi-model) we use the first model only and throw away the rest 100 if (useAsymIds) { 101 Chain c = asymUnit.getChain(transformation.getChainId()); 102 chainsToTransform.add(c); 103 } else { 104 Chain polyC = asymUnit.getPolyChainByPDB(transformation.getChainId()); 105 List<Chain> nonPolyCs = asymUnit.getNonPolyChainsByPDB(transformation.getChainId()); 106 Chain waterC = asymUnit.getWaterChainByPDB(transformation.getChainId()); 107 if (polyC!=null) 108 chainsToTransform.add(polyC); 109 if (!nonPolyCs.isEmpty()) 110 chainsToTransform.addAll(nonPolyCs); 111 if (waterC!=null) 112 chainsToTransform.add(waterC); 113 } 114 115 for (Chain c: chainsToTransform) { 116 117 Chain chain = (Chain)c.clone(); 118 119 Calc.transform(chain, transformation.getTransformationMatrix()); 120 121 String transformId = transformation.getId(); 122 123 // note that the Structure.addChain/Structure.addModel methods set the parent reference to the new Structure 124 125 // TODO set entities properly in the new structures! at the moment they are a mess... - JD 2016-05-19 126 127 if (multiModel) 128 addChainMultiModel(s, chain, transformId); 129 else 130 addChainFlattened(s, chain, transformId); 131 132 } 133 } 134 135 s.setBiologicalAssembly(true); 136 return s; 137 } 138 139 /** 140 * Orders model transformations by chain ids in the same order as in the asymmetric unit 141 * @param asymUnit 142 * @param transformations 143 */ 144 private void orderTransformationsByChainId(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations) { 145 final List<String> chainIds = getChainIds(asymUnit); 146 Collections.sort(transformations, new Comparator<BiologicalAssemblyTransformation>() { 147 @Override 148 public int compare(BiologicalAssemblyTransformation t1, BiologicalAssemblyTransformation t2) { 149 // set sort order only if the two ids are identical 150 if (t1.getId().equals(t2.getId())) { 151 return chainIds.indexOf(t1.getChainId()) - chainIds.indexOf(t2.getChainId()); 152 } else { 153 return t1.getId().compareTo(t2.getId()); 154 } 155 } 156 }); 157 } 158 159 /** 160 * Returns a list of chain ids in the order they are specified in the ATOM 161 * records in the asymmetric unit 162 * @param asymUnit 163 * @return 164 */ 165 private List<String> getChainIds(Structure asymUnit) { 166 List<String> chainIds = new ArrayList<String>(); 167 for ( Chain c : asymUnit.getChains()){ 168 String intChainID = c.getId(); 169 chainIds.add(intChainID); 170 } 171 return chainIds; 172 } 173 174 /** 175 * Adds a chain to the given structure to form a biological assembly, 176 * adding the symmetry expanded chains as new models per transformId. 177 * @param s 178 * @param newChain 179 * @param transformId 180 */ 181 private void addChainMultiModel(Structure s, Chain newChain, String transformId) { 182 183 // multi-model bioassembly 184 185 if ( modelIndex.size() == 0) 186 modelIndex.add("PLACEHOLDER FOR ASYM UNIT"); 187 188 int modelCount = modelIndex.indexOf(transformId); 189 if ( modelCount == -1) { 190 modelIndex.add(transformId); 191 modelCount = modelIndex.indexOf(transformId); 192 } 193 194 if (modelCount == 0) { 195 s.addChain(newChain); 196 } else if (modelCount > s.nrModels()) { 197 List<Chain> newModel = new ArrayList<Chain>(); 198 newModel.add(newChain); 199 s.addModel(newModel); 200 } else { 201 s.addChain(newChain, modelCount-1); 202 } 203 204 } 205 206 /** 207 * Adds a chain to the given structure to form a biological assembly, 208 * adding the symmetry-expanded chains as new chains with renamed 209 * chain ids and names (in the form originalAsymId_transformId and originalAuthId_transformId). 210 * @param s 211 * @param newChain 212 * @param transformId 213 */ 214 private void addChainFlattened(Structure s, Chain newChain, String transformId) { 215 newChain.setId(newChain.getId()+"_"+transformId); 216 newChain.setName(newChain.getName()+"_"+transformId); 217 s.addChain(newChain); 218 } 219 220 /** 221 * Returns a list of transformation matrices for the generation of a macromolecular 222 * assembly for the specified assembly Id. 223 * 224 * @param psa 225 * @param psags 226 * @param operators 227 * @return list of transformation matrices to generate macromolecular assembly 228 */ 229 public ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationList(PdbxStructAssembly psa, List<PdbxStructAssemblyGen> psags, List<PdbxStructOperList> operators) { 230 init(); 231 232 // first we populate the list of all operators from pdbx_struct_oper_list so that we can then 233 // get them from getBioUnitTransformationsListUnaryOperators() and getBioUnitTransformationsListBinaryOperators() 234 for (PdbxStructOperList oper: operators){ 235 try { 236 Matrix4d m = new Matrix4d(); 237 m.m00 = Double.parseDouble(oper.getMatrix11()); 238 m.m01 = Double.parseDouble(oper.getMatrix12()); 239 m.m02 = Double.parseDouble(oper.getMatrix13()); 240 241 m.m10 = Double.parseDouble(oper.getMatrix21()); 242 m.m11 = Double.parseDouble(oper.getMatrix22()); 243 m.m12 = Double.parseDouble(oper.getMatrix23()); 244 245 m.m20 = Double.parseDouble(oper.getMatrix31()); 246 m.m21 = Double.parseDouble(oper.getMatrix32()); 247 m.m22 = Double.parseDouble(oper.getMatrix33()); 248 249 m.m03 = Double.parseDouble(oper.getVector1()); 250 m.m13 = Double.parseDouble(oper.getVector2()); 251 m.m23 = Double.parseDouble(oper.getVector3()); 252 253 m.m30 = 0; 254 m.m31 = 0; 255 m.m32 = 0; 256 m.m33 = 1; 257 258 allTransformations.put(oper.getId(), m); 259 260 } catch (NumberFormatException e) { 261 logger.warn("Could not parse a matrix value from pdbx_struct_oper_list for id {}. The operator id will be ignored. Error: {}", oper.getId(), e.getMessage()); 262 } 263 } 264 265 ArrayList<BiologicalAssemblyTransformation> transformations = getBioUnitTransformationsListUnaryOperators(psa.getId(), psags); 266 transformations.addAll(getBioUnitTransformationsListBinaryOperators(psa.getId(), psags)); 267 transformations.trimToSize(); 268 return transformations; 269 } 270 271 272 private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListBinaryOperators(String assemblyId, List<PdbxStructAssemblyGen> psags) { 273 274 ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<BiologicalAssemblyTransformation>(); 275 276 List<OrderedPair<String>> operators = operatorResolver.getBinaryOperators(); 277 278 279 for ( PdbxStructAssemblyGen psag : psags){ 280 if ( psag.getAssembly_id().equals(assemblyId)) { 281 282 List<String>asymIds= Arrays.asList(psag.getAsym_id_list().split(",")); 283 284 operatorResolver.parseOperatorExpressionString(psag.getOper_expression()); 285 286 // apply binary operators to the specified chains 287 // Example 1M4X: generates all products of transformation matrices (1-60)(61-88) 288 for (String chainId : asymIds) { 289 290 for (OrderedPair<String> operator : operators) { 291 Matrix4d original1 = allTransformations.get(operator.getElement1()); 292 Matrix4d original2 = allTransformations.get(operator.getElement2()); 293 if (original1 == null || original2 == null) { 294 logger.warn("Could not find matrix operator for operator id {} or {}. Assembly id {} will not contain the composed operator.", operator.getElement1(), operator.getElement2(), assemblyId); 295 continue; 296 } 297 Matrix4d composed = new Matrix4d(original1); 298 composed.mul(original2); 299 BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation(); 300 transform.setChainId(chainId); 301 transform.setId(operator.getElement1() + "x" + operator.getElement2()); 302 transform.setTransformationMatrix(composed); 303 transformations.add(transform); 304 } 305 } 306 } 307 308 } 309 310 return transformations; 311 } 312 313 private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListUnaryOperators(String assemblyId, List<PdbxStructAssemblyGen> psags) { 314 ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<BiologicalAssemblyTransformation>(); 315 316 317 for ( PdbxStructAssemblyGen psag : psags){ 318 if ( psag.getAssembly_id().equals(assemblyId)) { 319 320 operatorResolver.parseOperatorExpressionString(psag.getOper_expression()); 321 List<String> operators = operatorResolver.getUnaryOperators(); 322 323 List<String>asymIds= Arrays.asList(psag.getAsym_id_list().split(",")); 324 325 // apply unary operators to the specified chains 326 for (String chainId : asymIds) { 327 for (String operator : operators) { 328 Matrix4d original = allTransformations.get(operator); 329 if (original == null) { 330 logger.warn("Could not find matrix operator for operator id {}. Assembly id {} will not contain the operator.", operator, assemblyId); 331 continue; 332 } 333 BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation(); 334 transform.setChainId(chainId); 335 transform.setId(operator); 336 transform.setTransformationMatrix(original); 337 transformations.add(transform); 338 } 339 } 340 } 341 } 342 343 return transformations; 344 } 345 346 private void init(){ 347 operatorResolver= new OperatorResolver(); 348 allTransformations = new HashMap<>(); 349 } 350}