001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.symmetry.analysis; 022 023import java.io.FileWriter; 024import java.io.IOException; 025import java.io.PrintWriter; 026import java.text.SimpleDateFormat; 027import java.util.Calendar; 028import java.util.List; 029import java.util.Set; 030 031import org.biojava.nbio.structure.PDBCrystallographicInfo; 032import org.biojava.nbio.structure.Structure; 033import org.biojava.nbio.structure.StructureException; 034import org.biojava.nbio.structure.StructureIO; 035import org.biojava.nbio.structure.align.util.AtomCache; 036import org.biojava.nbio.structure.io.FileParsingParameters; 037import org.biojava.nbio.structure.io.mmcif.AllChemCompProvider; 038import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; 039import org.biojava.nbio.structure.rcsb.GetRepresentatives; 040import org.biojava.nbio.structure.symmetry.core.QuatSymmetryDetector; 041import org.biojava.nbio.structure.symmetry.core.QuatSymmetryParameters; 042import org.biojava.nbio.structure.symmetry.core.QuatSymmetryResults; 043import org.biojava.nbio.structure.symmetry.core.Subunits; 044import org.biojava.nbio.structure.symmetry.misc.ProteinComplexSignature; 045import org.biojava.nbio.structure.symmetry.utils.BlastClustReader; 046import org.biojava.nbio.structure.xtal.SpaceGroup; 047 048 049public class ScanSymmetry implements Runnable { 050 private AtomCache cache = null; 051 private static String RESULT_DIR = "/Users/peter/Results/ScanSymmetry/"; 052 053 054 public ScanSymmetry () { 055 initializeCache(); 056 } 057 058 public static void main(String[] args) { 059 new ScanSymmetry().run(); 060 } 061 062 @Override 063 public void run() { 064 String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime()); 065 066 System.out.println("Reading blastclust files"); 067 068 BlastClustReader reader95 = new BlastClustReader(95); 069 BlastClustReader reader30 = new BlastClustReader(30); 070 071 072 PrintWriter out = null; 073 PrintWriter error = null; 074 075 try { 076 out = new PrintWriter(new FileWriter(RESULT_DIR + timeStamp + "_symm.csv")); 077 error = new PrintWriter(new FileWriter(RESULT_DIR + timeStamp + "_error.txt")); 078 } catch (IOException e1) { 079 e1.printStackTrace(); 080 System.exit(-1); 081 } 082 083 084 long t1 = System.nanoTime(); 085 086 int success = 0; 087 int proteins = 0; 088 int failure = 0; 089 090 String header = "pdbId,bioassembly,local,pseudostoichiometric,stoichiometry,pseudosymmetric,pointgroup,order," + 091 "lowSymmetry,minidentity,maxidentity,subunitrmsd,rmsd,tm,minrmsd,maxrmsd,mintm,maxtm,rmsdintra,tmintra,symdeviation,subunits,nucleiacids,cacount,time,signature95,stoich95,signature30,stoich30,spacegroup"; 092 out.println(header); 093 094 QuatSymmetryParameters parameters = new QuatSymmetryParameters(); 095 096 Set<String> set = GetRepresentatives.getAll(); 097 // pr testing 098 set.clear(); 099 set.add("4HHB"); 100 101 // set skip to true to restart calculation with a specified PDB ID 102 boolean skip = false; 103 String restartId = "10MH"; 104 105 for (String pdbId: set) { 106 if (skip && pdbId.equals(restartId)) { 107 skip = false; 108 } 109 if (skip) { 110 continue; 111 } 112 113 System.out.println("------------- " + pdbId + "-------------"); 114 115 StructureIO.setAtomCache(cache); 116 117 // get number of biological assemblies. If value is -1, the original PDB file is used (bio assembly id = 0) 118 int bioAssemblyCount = StructureIO.getNrBiologicalAssemblies(pdbId); 119 120 int first = 0; 121 int last = 1; 122 if (bioAssemblyCount != -1) { 123 first = 1; 124 last = bioAssemblyCount + 1; 125 } 126 127 for (int i = first; i < last; i++) { 128 Structure structure = null; 129 try { 130 structure = StructureIO.getBiologicalAssembly(pdbId, i); 131 } catch (IOException e) { 132 e.printStackTrace(); 133 error.println(pdbId + "[" + i + "]: " + e.getMessage()); 134 error.flush(); 135 } catch (StructureException e) { 136 e.printStackTrace(); 137 error.println(pdbId + "[" + i + "]: " + e.getMessage()); 138 error.flush(); 139 } 140 141 long ts1 = System.nanoTime(); 142 143 try { 144 SpaceGroup spaceGroup =null; 145 //float resolution = 0.0f; 146 if (structure != null) { 147 PDBCrystallographicInfo info = structure.getCrystallographicInfo(); 148 if (info != null) { 149 spaceGroup = info.getSpaceGroup(); 150 } 151 //PDBHeader pdbHeader = structure.getPDBHeader(); 152 //resolution = pdbHeader.getResolution(); 153 } 154 QuatSymmetryDetector detector = new QuatSymmetryDetector(structure, parameters); 155 156 if (detector.hasProteinSubunits()) { 157 long ts2 = System.nanoTime(); 158 159 int time = Math.round((ts2-ts1)/1000000.0f); 160 161 // save global symmetry results 162 List<QuatSymmetryResults> globalResults = detector.getGlobalSymmetry(); 163 printToCsv(reader95, reader30, out, pdbId, 164 i, time, globalResults, spaceGroup); 165 166 // save local symmetry results 167 for (List<QuatSymmetryResults> localResults: detector.getLocalSymmetries()) { 168 printToCsv(reader95, reader30, out, pdbId, 169 i, time, localResults, spaceGroup); 170 } 171 proteins++; 172 } 173 success++; 174 out.flush(); 175 } catch (Exception e) { 176 failure++; 177 e.printStackTrace(); 178 error.println(pdbId + "[" + i + "]: " + e.getMessage()); 179 error.flush(); 180 } 181 } 182 } 183 long t2 = System.nanoTime(); 184 185 System.out.println("PDBs succeeded: " + success); 186 System.out.println("PDBs failed : " + failure); 187 System.out.println("Proteins : " + proteins); 188 System.out.println("Total structure: " + set.size()); 189 System.out.println("Cpu time: " + (t2-t1)/1000000 + " ms."); 190 191 out.close(); 192 error.close(); 193 } 194 195 private void printToCsv(BlastClustReader reader95, 196 BlastClustReader reader30, PrintWriter out, String pdbId, 197 int bioAssemblyId, int time, List<QuatSymmetryResults> resultsList, SpaceGroup spaceGroup) { 198 199 for (QuatSymmetryResults results: resultsList) { 200 ProteinComplexSignature s95 = new ProteinComplexSignature(pdbId, results.getSubunits().getChainIds(), reader95); 201 String signature95 = s95.getComplexSignature(); 202 String stoich95 = s95.getComplexStoichiometry(); 203 ProteinComplexSignature s30 = new ProteinComplexSignature(pdbId, results.getSubunits().getChainIds(), reader30); 204 String signature30 = s30.getComplexSignature(); 205 String stoich30 = s30.getComplexStoichiometry(); 206 int order = 1; 207 if (!results.getSymmetry().equals("H")) { 208 order = results.getRotationGroup().getOrder(); 209 } 210 211 out.println("PDB" + pdbId +"," + bioAssemblyId + "," + results.isLocal() + 212 "," + results.getSubunits().isPseudoStoichiometric() + 213 "," + results.getSubunits().getStoichiometry() + 214 "," + results.getSubunits().isPseudoSymmetric() + 215 "," + results.getSymmetry() + 216 "," + order + 217 "," + isLowSymmetry(results) + 218 "," + Math.round(results.getSubunits().getMinSequenceIdentity()*100.0) + 219 "," + Math.round(results.getSubunits().getMaxSequenceIdentity()*100.0) + 220 "," + (float) results.getScores().getRmsdCenters() + 221 "," + (float) results.getScores().getRmsd() + 222 "," + (float) results.getScores().getTm() + 223 "," + (float) results.getScores().getMinRmsd() + 224 "," + (float) results.getScores().getMaxRmsd() + 225 "," + (float) results.getScores().getMinTm() + 226 "," + (float) results.getScores().getMaxTm() + 227 "," + (float) results.getScores().getRmsdIntra() + 228 "," + (float) results.getScores().getTmIntra() + 229 "," + (float) results.getScores().getSymDeviation() + 230 "," + results.getSubunits().getSubunitCount() + 231 "," + results.getNucleicAcidChainCount() + 232 "," + results.getSubunits().getCalphaCount() + 233 "," + time + 234 "," + signature95 + 235 "," + stoich95 + 236 "," + signature30 + 237 "," + stoich30 + 238 "," + spaceGroup); 239 } 240 } 241 242 private boolean isLowSymmetry(QuatSymmetryResults results) { 243 return getMinFold(results.getSubunits()) > 1 && results.getRotationGroup() != null && results.getRotationGroup().getPointGroup().equals("C1"); 244 } 245 246 private int getMinFold(Subunits subunits) { 247 if (subunits.getFolds().size() > 1) { 248 return subunits.getFolds().get(1); 249 } 250 return subunits.getFolds().get(0); 251 } 252 253 private void initializeCache() { 254 cache = new AtomCache(); 255 FileParsingParameters params = cache.getFileParsingParams(); 256 cache.setUseMmCif(true); 257 params.setParseCAOnly(true); 258// MmCifBiolAssemblyProvider mmcifProvider = new MmCifBiolAssemblyProvider(); 259// BioUnitDataProviderFactory.setBioUnitDataProvider(mmcifProvider.getClass().getCanonicalName()); 260 ChemCompGroupFactory.setChemCompProvider(new AllChemCompProvider()); 261// ChemCompGroupFactory.setChemCompProvider(new DownloadChemCompProvider()); 262 } 263}