001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.xtal; 022 023import org.biojava.nbio.structure.xtal.io.SpaceGroupMapRoot; 024import org.slf4j.Logger; 025import org.slf4j.LoggerFactory; 026 027import jakarta.xml.bind.JAXBException; 028import java.io.BufferedReader; 029import java.io.IOException; 030import java.io.InputStream; 031import java.io.InputStreamReader; 032import java.util.HashMap; 033import java.util.TreeMap; 034import java.util.regex.Matcher; 035import java.util.regex.Pattern; 036 037 038/** 039 * A class containing static methods to parse the symop.lib file from the 040 * CCP4 package. The file contains the transformations belonging to all 041 * protein crystallography space groups. 042 * 043 * See http://structure.usc.edu/ccp4/symlib.html for documentation 044 * 045 * @author duarte_j 046 * 047 */ 048public class SymoplibParser { 049 050 private static final Logger logger = LoggerFactory.getLogger(SymoplibParser.class); 051 052 private static final String newline = System.getProperty("line.separator"); 053 054 private static final String SPACE_GROUPS_FILE = "org/biojava/nbio/structure/xtal/spacegroups.xml"; 055 056 private static final Pattern namePat = Pattern.compile(".*\\s([A-Z]+)(\\s'.+')?\\s+'(.+)'.*"); 057 058 private static TreeMap<Integer, SpaceGroup> sgs = parseSpaceGroupsXML(); 059 060 061 private static HashMap<String, SpaceGroup> name2sgs; // map for lookups based on short names 062 063 /** 064 * Gets the space group for the given standard identifier. 065 * See for example http://en.wikipedia.org/wiki/Space_group 066 * @param id 067 * @return 068 */ 069 public static SpaceGroup getSpaceGroup(int id) { 070 return sgs.get(id); 071 } 072 073 074 /** 075 * Load all SpaceGroup information from the file spacegroups.xml 076 * 077 * @return a map providing information for all spacegroups 078 */ 079 private static TreeMap<Integer, SpaceGroup> parseSpaceGroupsXML() { 080 081 // NOTE: if the space group file is requested by some part of the code (i.e. this method is called) and 082 // there is a problem in reading it, then that's truly a FATAL problem, since this is not a user file 083 // but a file that's part of the distribution: it MUST be there and MUST have the right format. A failure 084 // to read it is more of a "compilation" error than a runtime error. That's the reason that System.exit 085 // is called (which otherwise usually is not a good idea). 086 // 087 // The rest of the application will simply not work: there are 3 options to handle it 088 // a) returning null and then a NullPointer will happen down the line and thus a not very clear 089 // error message will be printed 090 // b) throw the exception forward and catch it in the final main but that would also be bad because 091 // this is a file that the user didn't input but that should be part of the distribution 092 // c) call System.exit(1) and "crash" the application with a human-understandable error message 093 094 InputStream spaceGroupIS = SymoplibParser.class.getClassLoader().getResourceAsStream(SPACE_GROUPS_FILE); 095 096 if ( spaceGroupIS == null) { 097 logger.error("Fatal error! Could not find resource: " + SPACE_GROUPS_FILE + ". This probably means that your biojava jar file is corrupt or incorrectly built."); 098 System.exit(1); 099 } 100 101 TreeMap<Integer, SpaceGroup> map = new TreeMap<>(); 102 103 try { 104 map = parseSpaceGroupsXML(spaceGroupIS); 105 } catch (IOException e) { 106 logger.error("Fatal error! Could not parse resource: "+SPACE_GROUPS_FILE+". Error: "+e.getMessage()); 107 System.exit(1); 108 } catch (JAXBException e) { 109 logger.error("Fatal error! Could not parse resource: "+SPACE_GROUPS_FILE+". Problem in xml formatting: "+e.getMessage()); 110 System.exit(1); 111 } 112 113 name2sgs = new HashMap<>(); 114 115 for (SpaceGroup sg:map.values()) { 116 117 sg.initializeCellTranslations(); 118 name2sgs.put(sg.getShortSymbol(), sg); 119 if (sg.getAltShortSymbol()!=null) { 120 // we add also alternative name to map so we can look it up 121 name2sgs.put(sg.getAltShortSymbol(), sg); 122 } 123 } 124 125 return map; 126 127 } 128 129 130 /** 131 * Load all SpaceGroup information from the file spacegroups.xml 132 * 133 * @return a map providing information for all spacegroups 134 */ 135 public static TreeMap<Integer, SpaceGroup> parseSpaceGroupsXML( 136 InputStream spaceGroupIS) throws IOException, JAXBException { 137 138 String xml = convertStreamToString(spaceGroupIS); 139 140 SpaceGroupMapRoot spaceGroups = SpaceGroupMapRoot.fromXML(xml); 141 return spaceGroups.getMapProperty(); 142 143 } 144 145 146 private static String convertStreamToString(InputStream stream) throws IOException { 147 BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); 148 StringBuilder sb = new StringBuilder(); 149 150 String line = null; 151 152 while ((line = reader.readLine()) != null) { 153 sb.append(line).append(newline); 154 } 155 156 return sb.toString(); 157 } 158 159 /** 160 * Get the space group for the given international short name, using 161 * the PDB format, e.g. 'P 21 21 21' or 'C 1 c 1' 162 * @param shortName 163 * @return the SpaceGroup or null if the shortName is not valid 164 */ 165 public static SpaceGroup getSpaceGroup(String shortName) { 166 if (shortName==null || shortName.length()<=2) return null; 167 168 // PDB uses group "P 1-" for 13 racemic mixture entries (as of Sep2011), e.g. 3e7r 169 // they call the space group "P 1-" unusually (symop.lib and everyone else call it "P -1") 170 if ("P 1-".equals(shortName)) shortName="P -1"; 171 172 // enantiomorphic space groups contain sometime letters indicating glide planes which should always be lower case 173 // in some PDB entries like 4gwv they are in upper case, we fix that here: convert any non-first letter to lower case 174 shortName = shortName.substring(0, 1)+shortName.substring(1).toLowerCase(); 175 176 return name2sgs.get(shortName); 177 } 178 179 public static TreeMap<Integer,SpaceGroup> getAllSpaceGroups() { 180 return sgs; 181 } 182 183 184 /** 185 * A parser for the symop.lib file provided by CCP4. Note: this file is not getting re-distributed by BioJava. 186 * It can be downloaded from: 187 * 188 * <a href="http://www.ccp4.ac.uk/cvs/viewvc.cgi/libccp4/data/symop.lib?revision=1.10&view=markup">http://www.ccp4.ac.uk/cvs/viewvc.cgi/libccp4/data/symop.lib?revision=1.10&view=markup</a> 189 * <p> 190 * Note: this file is not needed by BioJava. BioJava loads equivalent information from the file spacegroups.xml 191 * 192 * @param symoplibIS 193 * @return 194 */ 195 public static TreeMap<Integer,SpaceGroup> parseSymopLib(InputStream symoplibIS) { 196 TreeMap<Integer, SpaceGroup> map = new TreeMap<>(); 197 name2sgs = new HashMap<>(); 198 try { 199 BufferedReader br = new BufferedReader(new InputStreamReader(symoplibIS)); 200 String line; 201 SpaceGroup currentSG = null; 202 while ((line=br.readLine())!=null) { 203 if (!line.startsWith(" ")) { 204 if (currentSG!=null) { 205 map.put(currentSG.getId(),currentSG); 206 name2sgs.put(currentSG.getShortSymbol(), currentSG); 207 if (currentSG.getAltShortSymbol()!=null) { 208 // we add also alternative name to map so we can look it up 209 name2sgs.put(currentSG.getAltShortSymbol(), currentSG); 210 } 211 } 212 213 int idxFirstSpace = line.indexOf(' '); 214 int idxSecondSpace = line.indexOf(' ',idxFirstSpace+1); 215 int idxThirdSpace = line.indexOf(' ',idxSecondSpace+1); 216 int id = Integer.parseInt(line.substring(0, idxFirstSpace)); 217 int multiplicity = Integer.parseInt(line.substring(idxFirstSpace+1, idxSecondSpace)); 218 int primitiveMultiplicity = Integer.parseInt(line.substring(idxSecondSpace+1, idxThirdSpace)); 219 Matcher m = namePat.matcher(line); 220 String shortSymbol = null; 221 String altShortSymbol = null; 222 String brav = null; 223 if (m.matches()) { 224 brav = m.group(1); 225 altShortSymbol = m.group(2); // null if there is no match 226 if (altShortSymbol!=null) altShortSymbol = altShortSymbol.trim().replaceAll("'", ""); 227 shortSymbol = m.group(3); 228 } 229 currentSG = new SpaceGroup(id, multiplicity, primitiveMultiplicity, shortSymbol, altShortSymbol, BravaisLattice.getByName(brav)); 230 } else { 231 currentSG.addTransformation(line.trim()); 232 } 233 } 234 br.close(); 235 // and we add the last SG 236 map.put(currentSG.getId(), currentSG); 237 name2sgs.put(currentSG.getShortSymbol(), currentSG); 238 if (currentSG.getAltShortSymbol()!=null) { 239 // we add also alternative name to map so we can look it up 240 name2sgs.put(currentSG.getAltShortSymbol(), currentSG); 241 } 242 243 } catch (IOException e) { 244 logger.error("Fatal error! Can't read symop.lib file. Error: "+e.getMessage()+". "); 245 System.exit(1); 246 } 247 248 for (SpaceGroup sg:map.values()) { 249 sg.initializeCellTranslations(); 250 } 251 return map; 252 } 253 254 255}