001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io; 022 023import org.biojava.nbio.structure.jama.Matrix; 024import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; 025import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 026import org.slf4j.Logger; 027import org.slf4j.LoggerFactory; 028 029import java.util.*; 030 031/** 032 * Parses REMARK 350 records in a PDB file and creates transformations to 033 * construct the quaternary structure of a protein from an asymmetric unit 034 * 035 * @author Peter Rose 036 * @author Andreas Prlic 037 * 038 */ 039public class PDBBioAssemblyParser { 040 041 private static final Logger logger = LoggerFactory.getLogger(PDBBioAssemblyParser.class); 042 043 private Integer currentBioMolecule = null; 044 private List<String> currentChainIDs = new ArrayList<String>(); 045 private Matrix currentMatrix = null; 046 private double[] shift = null; 047 private Map<Integer,BioAssemblyInfo> transformationMap = new HashMap<Integer, BioAssemblyInfo>(); 048 private int modelNumber = 1; 049 private int currentMmSize; 050 051 private List<BiologicalAssemblyTransformation> transformations; 052 053 /** 054 * Parses REMARK 350 line. See format description: 055 * http://www.wwpdb.org/documentation/format33/remarks2.html 056 * 057 * @param line 058 */ 059 public void pdb_REMARK_350_Handler(String line) { 060 061 if (line.startsWith("REMARK 350 BIOMOLECULE:")) { 062 initialize(); 063 currentBioMolecule = Integer.parseInt(line.substring(24).trim()); 064 065 } else if ( line.matches("REMARK 350 \\w+ DETERMINED BIOLOGICAL UNIT:.*" ) || 066 line.matches("REMARK 350 \\w+ DETERMINED QUATERNARY STRUCTURE:.*" )) { 067 // text can be : 068 // author determined biological unit 069 // software determined quaternary structure 070 currentMmSize = getMmSize(line); 071 } else if ( line.startsWith("REMARK 350 APPLY THE FOLLOWING TO CHAINS:")) { 072 currentChainIDs.clear(); 073 addToCurrentChainList(line); 074 075 } else if ( line.startsWith("REMARK 350 IN ADDITION APPLY THE FOLLOWING TO CHAINS:")) { 076 currentChainIDs.clear(); 077 addToCurrentChainList(line); 078 079 } else if ( line.startsWith("REMARK 350") && line.contains("AND CHAINS:")) { 080 addToCurrentChainList(line); 081 082 } else if ( line.startsWith("REMARK 350 BIOMT")) { 083 if (readMatrix(line)) { 084 saveMatrix(); 085 modelNumber++; 086 } 087 } 088 } 089 090 /** 091 * Returns a map of bioassembly transformations 092 * @return 093 */ 094 public Map<Integer, BioAssemblyInfo> getTransformationMap() { 095 return transformationMap; 096 } 097 098 /** 099 * Parses a row of a BIOMT matrix in a REMARK 350 record. 100 * Example: REMARK 350 BIOMT1 2 1.000000 0.000000 0.000000 0.00000 101 * @param line 102 * @return true if 3rd line of matrix has been parsed (matrix is complete) 103 */ 104 private boolean readMatrix(String line) { 105 // split by one or more spaces 106 String[] items = line.split("[ ]+"); 107 108 // parse BIOMTx, where x is the position in the matrix 109 String pos = items[2].substring(5); 110 int row = Integer.parseInt(pos); 111 if (row == 1) { 112 currentMatrix = Matrix.identity(3,3); 113 shift = new double[3]; 114 } 115 116 currentMatrix.set((row-1), 0,Float.parseFloat(items[4])); 117 currentMatrix.set((row-1), 1,Float.parseFloat(items[5])); 118 currentMatrix.set((row-1), 2,Float.parseFloat(items[6])); 119 shift[row-1] = Float.parseFloat(items[7]); 120 121 // return true if 3rd row of matrix has been processed 122 return row == 3; 123 } 124 125 /** 126 * Saves transformation matrix for the list of current chains 127 */ 128 private void saveMatrix() { 129 130 for (String chainId : currentChainIDs) { 131 BiologicalAssemblyTransformation transformation = new BiologicalAssemblyTransformation(); 132 transformation.setRotationMatrix(currentMatrix.getArray()); 133 transformation.setTranslation(shift); 134 transformation.setId(String.valueOf(modelNumber)); 135 transformation.setChainId(chainId); 136 transformations.add(transformation); 137 } 138 139 if (!transformationMap.containsKey(currentBioMolecule)) { 140 BioAssemblyInfo bioAssembly = new BioAssemblyInfo(); 141 bioAssembly.setId(currentBioMolecule); 142 if (currentMmSize==0) { 143 logger.warn("No macromolecular size could be parsed for biological assembly {}",currentBioMolecule); 144 } 145 bioAssembly.setMacromolecularSize(currentMmSize); 146 bioAssembly.setTransforms(transformations); 147 transformationMap.put(currentBioMolecule,bioAssembly); 148 } 149 } 150 151 private int getMmSize(String line) { 152 int index = line.indexOf(':'); 153 String mmString = line.substring(index+1,line.length()-1).trim().toLowerCase(); 154 return getSizefromString(mmString); 155 } 156 157 private static int getSizefromString(String oligomer){ 158 int size=0; 159 160 oligomer = oligomer.toLowerCase(); 161 162 if (oligomer.equals("monomeric")) { 163 size = 1; 164 } else if (oligomer.equals("dimeric")) { 165 size = 2; 166 } else if (oligomer.equals("trimeric")) { 167 size = 3; 168 } else if (oligomer.equals("tetrameric")) { 169 size = 4; 170 } else if (oligomer.equals("pentameric")) { 171 size = 5; 172 } else if (oligomer.equals("hexameric")) { 173 size = 6; 174 } else if (oligomer.equals("heptameric")) { 175 size = 7; 176 } else if (oligomer.equals("octameric")) { 177 size = 8; 178 } else if (oligomer.equals("nonameric")) { 179 size = 9; 180 } else if (oligomer.equals("decameric")) { 181 size = 10; 182 } else if (oligomer.equals("undecameric")) { 183 size = 11; 184 } else if (oligomer.equals("dodecameric")) { 185 size = 12; 186 } else if (oligomer.equals("tridecameric")) { 187 size = 13; 188 } else if (oligomer.equals("tetradecameric")) { 189 size = 14; 190 } else if (oligomer.equals("pentadecameric")) { 191 size = 15; 192 } else if (oligomer.equals("hexadecameric")) { 193 size = 16; 194 } else if (oligomer.equals("heptadecameric")) { 195 size = 17; 196 } else if (oligomer.equals("octadecameric")) { 197 size = 18; 198 } else if (oligomer.equals("nonadecameric")) { 199 size = 19; 200 } else if (oligomer.equals("eicosameric")) { 201 size = 20; 202 } else if( oligomer.matches("(\\d+).*")) { 203 size = Integer.parseInt((oligomer.replaceAll("(\\d+).*", "$1"))); 204 } 205 return size; 206 } 207 208 /** 209 * Parses list of chain ids (A, B, C, etc.) 210 */ 211 private void addToCurrentChainList(String line) { 212 int index = line.indexOf(":"); 213 String chainList = line.substring(index+1).trim(); 214 // split by spaces or commas 215 String[] chainIds = chainList.split("[ ,]+"); 216 currentChainIDs.addAll(Arrays.asList(chainIds)); 217 } 218 219 private void initialize() { 220 transformations = new ArrayList<BiologicalAssemblyTransformation>(); 221 currentMatrix = Matrix.identity(3,3); 222 currentBioMolecule = null; 223 shift = new double[3]; 224 modelNumber = 1; 225 currentMmSize = 0; 226 } 227}