001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io; 022 023import org.biojava.nbio.structure.jama.Matrix; 024import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; 025import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 026//import org.slf4j.Logger; 027//import org.slf4j.LoggerFactory; 028 029import java.util.*; 030 031/** 032 * Parses REMARK 350 records in a PDB file and creates transformations to 033 * construct the quaternary structure of a protein from an asymmetric unit 034 * 035 * @author Peter Rose 036 * @author Andreas Prlic 037 * 038 */ 039public class PDBBioAssemblyParser { 040 041 //private static final Logger logger = LoggerFactory.getLogger(PDBBioAssemblyParser.class); 042 043 private Integer currentBioMolecule = null; 044 private List<String> currentChainIDs = new ArrayList<>(); 045 private Matrix currentMatrix = null; 046 private double[] shift = null; 047 private Map<Integer,BioAssemblyInfo> transformationMap = new HashMap<>(); 048 private int modelNumber = 1; 049 050 private List<BiologicalAssemblyTransformation> transformations; 051 052 /** 053 * Parses REMARK 350 line. See format description: 054 * http://www.wwpdb.org/documentation/format33/remarks2.html 055 * 056 * @param line 057 */ 058 public void pdb_REMARK_350_Handler(String line) { 059 060 if (line.startsWith("REMARK 350 BIOMOLECULE:")) { 061 initialize(); 062 currentBioMolecule = Integer.parseInt(line.substring(24).trim()); 063 064 } 065 // not parsing anymore the size (from biojava 5.0), thus this is not needed anymore 066 // eventually if needed this could be used to 067 // infer if bioassembly is author or software determined 068 //else if ( line.matches("REMARK 350 \\w+ DETERMINED BIOLOGICAL UNIT:.*" ) || 069 // line.matches("REMARK 350 \\w+ DETERMINED QUATERNARY STRUCTURE:.*" )) { 070 // text can be : 071 // author determined biological unit 072 // software determined quaternary structure 073 //} 074 else if ( line.startsWith("REMARK 350 APPLY THE FOLLOWING TO CHAINS:")) { 075 currentChainIDs.clear(); 076 addToCurrentChainList(line); 077 078 } else if ( line.startsWith("REMARK 350 IN ADDITION APPLY THE FOLLOWING TO CHAINS:")) { 079 currentChainIDs.clear(); 080 addToCurrentChainList(line); 081 082 } else if ( line.startsWith("REMARK 350") && line.contains("AND CHAINS:")) { 083 addToCurrentChainList(line); 084 085 } else if ( line.startsWith("REMARK 350 BIOMT")) { 086 if (readMatrix(line)) { 087 saveMatrix(); 088 modelNumber++; 089 } 090 } 091 } 092 093 /** 094 * Returns a map of bioassembly transformations 095 * @return 096 */ 097 public Map<Integer, BioAssemblyInfo> getTransformationMap() { 098 return transformationMap; 099 } 100 101 /** 102 * Parses a row of a BIOMT matrix in a REMARK 350 record. 103 * Example: REMARK 350 BIOMT1 2 1.000000 0.000000 0.000000 0.00000 104 * @param line 105 * @return true if 3rd line of matrix has been parsed (matrix is complete) 106 */ 107 private boolean readMatrix(String line) { 108 // split by one or more spaces 109 String[] items = line.split("[ ]+"); 110 111 // parse BIOMTx, where x is the position in the matrix 112 String pos = items[2].substring(5); 113 int row = Integer.parseInt(pos); 114 if (row == 1) { 115 currentMatrix = Matrix.identity(3,3); 116 shift = new double[3]; 117 } 118 119 currentMatrix.set((row-1), 0,Float.parseFloat(items[4])); 120 currentMatrix.set((row-1), 1,Float.parseFloat(items[5])); 121 currentMatrix.set((row-1), 2,Float.parseFloat(items[6])); 122 shift[row-1] = Float.parseFloat(items[7]); 123 124 // return true if 3rd row of matrix has been processed 125 return row == 3; 126 } 127 128 /** 129 * Saves transformation matrix for the list of current chains 130 */ 131 private void saveMatrix() { 132 133 for (String chainId : currentChainIDs) { 134 BiologicalAssemblyTransformation transformation = new BiologicalAssemblyTransformation(); 135 transformation.setRotationMatrix(currentMatrix.getArray()); 136 transformation.setTranslation(shift); 137 transformation.setId(String.valueOf(modelNumber)); 138 transformation.setChainId(chainId); 139 transformations.add(transformation); 140 } 141 142 if (!transformationMap.containsKey(currentBioMolecule)) { 143 BioAssemblyInfo bioAssembly = new BioAssemblyInfo(); 144 bioAssembly.setId(currentBioMolecule); 145 bioAssembly.setTransforms(transformations); 146 transformationMap.put(currentBioMolecule,bioAssembly); 147 } 148 } 149 150 /** 151 * Parses list of chain ids (A, B, C, etc.) 152 */ 153 private void addToCurrentChainList(String line) { 154 int index = line.indexOf(":"); 155 String chainList = line.substring(index+1).trim(); 156 // split by spaces or commas 157 String[] chainIds = chainList.split("[ ,]+"); 158 currentChainIDs.addAll(Arrays.asList(chainIds)); 159 } 160 161 private void initialize() { 162 transformations = new ArrayList<>(); 163 currentMatrix = Matrix.identity(3,3); 164 currentBioMolecule = null; 165 shift = new double[3]; 166 modelNumber = 1; 167 } 168 169 /** 170 * Set the macromolecularSize fields of the parsed bioassemblies. 171 * This can only be called after the full PDB file has been read so that 172 * all the info for all bioassemblies has been gathered. 173 * Note that an explicit method to set the field is necessary here because 174 * in PDB files the transformations contain only the author chain ids, corresponding 175 * to polymeric chains, whilst in mmCIF files the transformations 176 * contain all asym ids of both polymers and non-polymers. 177 */ 178 public void setMacromolecularSizes() { 179 for (BioAssemblyInfo bioAssembly : transformationMap.values()) { 180 bioAssembly.setMacromolecularSize(bioAssembly.getTransforms().size()); 181 } 182 } 183}