001package demo; 002 003import org.biojava.nbio.structure.Structure; 004import org.biojava.nbio.structure.io.StructureFiletype; 005import org.biojava.nbio.structure.StructureTools; 006import org.biojava.nbio.structure.align.util.AtomCache; 007import org.biojava.nbio.structure.io.FileParsingParameters; 008import org.biojava.nbio.structure.StructureIO; 009 010/* 011 * BioJava development code 012 * 013 * This code may be freely distributed and modified under the 014 * terms of the GNU Lesser General Public Licence. This should 015 * be distributed with the code. If you do not have a copy, 016 * see: 017 * 018 * http://www.gnu.org/copyleft/lesser.html 019 * 020 * Copyright for this code is held jointly by the individual 021 * authors. These should be listed in @author doc comments. 022 * 023 * For more information on the BioJava project and its aims, 024 * or to join the biojava-l mailing list, visit the home page 025 * at: 026 * 027 * http://www.biojava.org/ 028 * 029 * created at Sep 19, 2013 030 * Author: Andreas Prlic 031 */ 032 033public class DemoShowLargeAssembly { 034 035 public static void main(String[] args){ 036 037 // This loads the PBCV-1 virus capsid, one of, if not the biggest biological assembly in terms on nr. of atoms. 038 // The 1m4x.pdb1.gz file has 313 MB (compressed) 039 // This Structure requires a minimum of 9 GB of memory to be able to be loaded in memory. 040 041 String pdbId = "1M4X"; 042 043 Structure bigStructure = readStructure(pdbId,1); 044 045 // let's take a look how much memory this consumes currently 046 047 Runtime r = Runtime.getRuntime(); 048 049 // let's try to trigger the Java Garbage collector 050 r.gc(); 051 052 System.out.println("Memory consumption after " + pdbId + 053 " structure has been loaded into memory:"); 054 055 String mem = String.format("Total %dMB, Used %dMB, Free %dMB, Max %dMB", 056 r.totalMemory() / 1048576, 057 (r.totalMemory() - r.freeMemory()) / 1048576, 058 r.freeMemory() / 1048576, 059 r.maxMemory() / 1048576); 060 061 System.out.println(mem); 062 063 // 9693 atoms in the asymmetric unit * 1680 copies per assembly = 16284240 atoms 064 System.out.println("# atoms: " + StructureTools.getNrAtoms(bigStructure)); 065 066 } 067 /** Load a specific biological assembly for a PDB entry 068 * 069 * @param pdbId .. the PDB ID 070 * @param bioAssemblyId .. the first assembly has the bioAssemblyId 1 071 * @return a Structure object or null if something went wrong. 072 */ 073 public static Structure readStructure(String pdbId, int bioAssemblyId) { 074 075 // pre-computed files use lower case PDB IDs 076 pdbId = pdbId.toLowerCase(); 077 078 // we just need this to track where to store PDB files 079 // this checks the PDB_DIR property (and uses a tmp location if not set) 080 AtomCache cache = new AtomCache(); 081 cache.setFiletype(StructureFiletype.CIF); 082 FileParsingParameters p = cache.getFileParsingParams(); 083 084 // some bio assemblies are large, we want an all atom representation and avoid 085 // switching to a Calpha-only representation for large molecules 086 // note, this requires several GB of memory for some of the largest assemblies, such a 1MX4 087 p.setAtomCaThreshold(Integer.MAX_VALUE); 088 089 // parse remark 350 090 p.setParseBioAssembly(true); 091 092 // download missing files 093 094 Structure structure = null; 095 try { 096 structure = StructureIO.getBiologicalAssembly(pdbId,bioAssemblyId); 097 } catch (Exception e){ 098 e.printStackTrace(); 099 return null; 100 } 101 return structure; 102 } 103}