001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.nbio.structure.quaternary; 023 024import org.biojava.nbio.structure.Calc; 025import org.biojava.nbio.structure.Chain; 026import org.biojava.nbio.structure.EntityInfo; 027import org.biojava.nbio.structure.Structure; 028import org.rcsb.cif.schema.mm.PdbxStructAssembly; 029import org.rcsb.cif.schema.mm.PdbxStructAssemblyGen; 030import org.rcsb.cif.schema.mm.PdbxStructOperList; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034import javax.vecmath.Matrix4d; 035import java.util.*; 036 037/** 038 * Reconstructs the quaternary structure of a protein from an asymmetric unit 039 * 040 * @author Peter Rose 041 * @author Andreas Prlic 042 * @author Jose Duarte 043 * 044 */ 045public class BiologicalAssemblyBuilder { 046 047 private static final Logger logger = LoggerFactory.getLogger(BiologicalAssemblyBuilder.class); 048 049 /** 050 * The character separating the original chain identifier from the operator id. 051 */ 052 public static final String SYM_CHAIN_ID_SEPARATOR = "_"; 053 054 /** 055 * The character separating operator ids that are composed. 056 */ 057 public static final String COMPOSED_OPERATOR_SEPARATOR = "x"; 058 059 private OperatorResolver operatorResolver; 060 061 062 /** 063 * All matrix operators present in _pdbx_struct_oper_list. 064 * Identifiers (_pdbx_struct_oper_list.id) to matrix operators. 065 */ 066 private Map<String, Matrix4d> allTransformations; 067 068 private List<String> modelIndex = new ArrayList<>(); 069 070 public BiologicalAssemblyBuilder(){ 071 init(); 072 } 073 074 /** 075 * Builds a Structure object containing the quaternary structure built from given asymUnit and transformations, 076 * by adding symmetry partners as new models. 077 * The output Structure will be different depending on the multiModel parameter: 078 * <ul> 079 * <li> 080 * the symmetry-expanded chains are added as new models, one per transformId. All original models but 081 * the first one are discarded. 082 * </li> 083 * <li> 084 * as original with symmetry-expanded chains added with renamed chain ids and names (in the form 085 * originalAsymId_transformId and originalAuthId_transformId) 086 * </li> 087 * </ul> 088 * @param asymUnit 089 * @param transformations 090 * @param useAsymIds if true use {@link Chain#getId()} to match the ids in the BiologicalAssemblyTransformation (needed if data read from mmCIF), 091 * if false use {@link Chain#getName()} for the chain matching (needed if data read from PDB). 092 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 093 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 094 * @return 095 */ 096 public Structure rebuildQuaternaryStructure(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations, boolean useAsymIds, boolean multiModel) { 097 098 // ensure that new chains are build in the same order as they appear in the asymmetric unit 099 orderTransformationsByChainId(asymUnit, transformations); 100 101 Structure s = asymUnit.clone(); 102 103 Map<Integer, EntityInfo> entityInfoMap = new HashMap<>(); 104 // this resets all models (not only the first one): this is important for NMR (multi-model) 105 // like that we can be sure we start with an empty structures and we add models or chains to it 106 s.resetModels(); 107 s.setEntityInfos(new ArrayList<>()); 108 109 for (BiologicalAssemblyTransformation transformation : transformations){ 110 111 List<Chain> chainsToTransform = new ArrayList<>(); 112 113 // note: for NMR structures (or any multi-model) we use the first model only and throw away the rest 114 if (useAsymIds) { 115 Chain c = asymUnit.getChain(transformation.getChainId()); 116 chainsToTransform.add(c); 117 } else { 118 Chain polyC = asymUnit.getPolyChainByPDB(transformation.getChainId()); 119 List<Chain> nonPolyCs = asymUnit.getNonPolyChainsByPDB(transformation.getChainId()); 120 Chain waterC = asymUnit.getWaterChainByPDB(transformation.getChainId()); 121 if (polyC!=null) 122 chainsToTransform.add(polyC); 123 if (!nonPolyCs.isEmpty()) 124 chainsToTransform.addAll(nonPolyCs); 125 if (waterC!=null) 126 chainsToTransform.add(waterC); 127 } 128 129 for (Chain c: chainsToTransform) { 130 131 Chain chain = (Chain)c.clone(); 132 133 Calc.transform(chain, transformation.getTransformationMatrix()); 134 135 String transformId = transformation.getId(); 136 137 // note that the Structure.addChain/Structure.addModel methods set the parent reference to the new Structure 138 139 if (multiModel) 140 addChainMultiModel(s, chain, transformId); 141 else 142 addChainFlattened(s, chain, transformId); 143 144 EntityInfo entityInfo; 145 if (!entityInfoMap.containsKey(chain.getEntityInfo().getMolId())) { 146 entityInfo = new EntityInfo(chain.getEntityInfo()); 147 entityInfoMap.put(chain.getEntityInfo().getMolId(), entityInfo); 148 s.addEntityInfo(entityInfo); 149 } else { 150 entityInfo = entityInfoMap.get(chain.getEntityInfo().getMolId()); 151 } 152 chain.setEntityInfo(entityInfo); 153 entityInfo.addChain(chain); 154 155 } 156 } 157 158 s.setBiologicalAssembly(true); 159 return s; 160 } 161 162 /** 163 * Orders model transformations by chain ids in the same order as in the asymmetric unit 164 * @param asymUnit 165 * @param transformations 166 */ 167 private void orderTransformationsByChainId(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations) { 168 final List<String> chainIds = getChainIds(asymUnit); 169 Collections.sort(transformations, new Comparator<BiologicalAssemblyTransformation>() { 170 @Override 171 public int compare(BiologicalAssemblyTransformation t1, BiologicalAssemblyTransformation t2) { 172 // set sort order only if the two ids are identical 173 if (t1.getId().equals(t2.getId())) { 174 return chainIds.indexOf(t1.getChainId()) - chainIds.indexOf(t2.getChainId()); 175 } else { 176 return t1.getId().compareTo(t2.getId()); 177 } 178 } 179 }); 180 } 181 182 /** 183 * Returns a list of chain ids in the order they are specified in the ATOM 184 * records in the asymmetric unit 185 * @param asymUnit 186 * @return 187 */ 188 private List<String> getChainIds(Structure asymUnit) { 189 List<String> chainIds = new ArrayList<>(); 190 for ( Chain c : asymUnit.getChains()){ 191 String intChainID = c.getId(); 192 chainIds.add(intChainID); 193 } 194 return chainIds; 195 } 196 197 /** 198 * Adds a chain to the given structure to form a biological assembly, 199 * adding the symmetry expanded chains as new models per transformId. 200 * @param s 201 * @param newChain 202 * @param transformId 203 */ 204 private void addChainMultiModel(Structure s, Chain newChain, String transformId) { 205 206 // multi-model bioassembly 207 208 if ( modelIndex.size() == 0) 209 modelIndex.add("PLACEHOLDER FOR ASYM UNIT"); 210 211 int modelCount = modelIndex.indexOf(transformId); 212 if ( modelCount == -1) { 213 modelIndex.add(transformId); 214 modelCount = modelIndex.indexOf(transformId); 215 } 216 217 if (modelCount == 0) { 218 s.addChain(newChain); 219 } else if (modelCount > s.nrModels()) { 220 List<Chain> newModel = new ArrayList<>(); 221 newModel.add(newChain); 222 s.addModel(newModel); 223 } else { 224 s.addChain(newChain, modelCount-1); 225 } 226 227 } 228 229 /** 230 * Adds a chain to the given structure to form a biological assembly, 231 * adding the symmetry-expanded chains as new chains with renamed 232 * chain ids and names (in the form originalAsymId_transformId and originalAuthId_transformId). 233 * @param s 234 * @param newChain 235 * @param transformId 236 */ 237 private void addChainFlattened(Structure s, Chain newChain, String transformId) { 238 newChain.setId(newChain.getId()+SYM_CHAIN_ID_SEPARATOR+transformId); 239 newChain.setName(newChain.getName()+SYM_CHAIN_ID_SEPARATOR+transformId); 240 s.addChain(newChain); 241 } 242 243 /** 244 * Returns a list of transformation matrices for the generation of a macromolecular 245 * assembly for the specified assembly Id. 246 * 247 * @param pdbxStructAssembly 248 * @param assemblyIndex 249 * @param pdbxStructAssemblyGen 250 * @param pdbxStructOperList 251 * @return list of transformation matrices to generate macromolecular assembly 252 */ 253 public List<BiologicalAssemblyTransformation> getBioUnitTransformationList(PdbxStructAssembly pdbxStructAssembly, 254 int assemblyIndex, 255 PdbxStructAssemblyGen pdbxStructAssemblyGen, 256 PdbxStructOperList pdbxStructOperList) { 257 init(); 258 259 // first we populate the list of all operators from pdbx_struct_oper_list so that we can then 260 // get them from getBioUnitTransformationsListUnaryOperators() and getBioUnitTransformationsListBinaryOperators() 261 for (int i = 0; i < pdbxStructOperList.getRowCount(); i++) { 262 try { 263 Matrix4d m = new Matrix4d(); 264 m.m00 = pdbxStructOperList.getMatrix11().get(i); 265 m.m01 = pdbxStructOperList.getMatrix12().get(i); 266 m.m02 = pdbxStructOperList.getMatrix13().get(i); 267 268 m.m10 = pdbxStructOperList.getMatrix21().get(i); 269 m.m11 = pdbxStructOperList.getMatrix22().get(i); 270 m.m12 = pdbxStructOperList.getMatrix23().get(i); 271 272 m.m20 = pdbxStructOperList.getMatrix31().get(i); 273 m.m21 = pdbxStructOperList.getMatrix32().get(i); 274 m.m22 = pdbxStructOperList.getMatrix33().get(i); 275 276 m.m03 = pdbxStructOperList.getVector1().get(i); 277 m.m13 = pdbxStructOperList.getVector2().get(i); 278 m.m23 = pdbxStructOperList.getVector3().get(i); 279 280 m.m30 = 0; 281 m.m31 = 0; 282 m.m32 = 0; 283 m.m33 = 1; 284 285 allTransformations.put(pdbxStructOperList.getId().get(i), m); 286 } catch (NumberFormatException e) { 287 logger.warn("Could not parse a matrix value from pdbx_struct_oper_list for id {}. The operator id will be ignored. Error: {}", pdbxStructOperList.getId().get(i), e.getMessage()); 288 } 289 } 290 291 String assemblyId = pdbxStructAssembly.getId().get(assemblyIndex); 292 ArrayList<BiologicalAssemblyTransformation> transformations = getBioUnitTransformationsListUnaryOperators(assemblyId, pdbxStructAssemblyGen); 293 transformations.addAll(getBioUnitTransformationsListBinaryOperators(assemblyId, pdbxStructAssemblyGen)); 294 transformations.trimToSize(); 295 return transformations; 296 } 297 298 private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListBinaryOperators(String assemblyId, PdbxStructAssemblyGen pdbxStructAssemblyGen) { 299 ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<>(); 300 List<OrderedPair<String>> operators = operatorResolver.getBinaryOperators(); 301 302 for (int i = 0; i < pdbxStructAssemblyGen.getRowCount(); i++) { 303 if (!pdbxStructAssemblyGen.getAssemblyId().get(i).equals(assemblyId)) { 304 continue; 305 } 306 307 String[] asymIds= pdbxStructAssemblyGen.getAsymIdList().get(i).split(","); 308 operatorResolver.parseOperatorExpressionString(pdbxStructAssemblyGen.getOperExpression().get(i)); 309 310 // apply binary operators to the specified chains 311 // Example 1M4X: generates all products of transformation matrices (1-60)(61-88) 312 for (String chainId : asymIds) { 313 for (OrderedPair<String> operator : operators) { 314 Matrix4d original1 = allTransformations.get(operator.getElement1()); 315 Matrix4d original2 = allTransformations.get(operator.getElement2()); 316 if (original1 == null || original2 == null) { 317 logger.warn("Could not find matrix operator for operator id {} or {}. Assembly id {} will not contain the composed operator.", operator.getElement1(), operator.getElement2(), assemblyId); 318 continue; 319 } 320 Matrix4d composed = new Matrix4d(original1); 321 composed.mul(original2); 322 BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation(); 323 transform.setChainId(chainId); 324 transform.setId(operator.getElement1() + COMPOSED_OPERATOR_SEPARATOR + operator.getElement2()); 325 transform.setTransformationMatrix(composed); 326 transformations.add(transform); 327 } 328 } 329 } 330 331 return transformations; 332 } 333 334 private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListUnaryOperators(String assemblyId, PdbxStructAssemblyGen pdbxStructAssemblyGen) { 335 ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<>(); 336 337 for (int i = 0; i < pdbxStructAssemblyGen.getRowCount(); i++) { 338 if (!pdbxStructAssemblyGen.getAssemblyId().get(i).equals(assemblyId)) { 339 continue; 340 } 341 342 operatorResolver.parseOperatorExpressionString(pdbxStructAssemblyGen.getOperExpression().get(i)); 343 List<String> operators = operatorResolver.getUnaryOperators(); 344 String[] asymIds = pdbxStructAssemblyGen.getAsymIdList().get(i).split(","); 345 346 // apply unary operators to the specified chains 347 for (String chainId : asymIds) { 348 for (String operator : operators) { 349 Matrix4d original = allTransformations.get(operator); 350 if (original == null) { 351 logger.warn("Could not find matrix operator for operator id {}. Assembly id {} will not contain the operator.", operator, assemblyId); 352 continue; 353 } 354 BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation(); 355 transform.setChainId(chainId); 356 transform.setId(operator); 357 transform.setTransformationMatrix(original); 358 transformations.add(transform); 359 } 360 } 361 } 362 363 return transformations; 364 } 365 366 private void init() { 367 operatorResolver = new OperatorResolver(); 368 allTransformations = new HashMap<>(); 369 } 370}