001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.xtal;
022
023import org.biojava.nbio.structure.xtal.io.SpaceGroupMapRoot;
024import org.slf4j.Logger;
025import org.slf4j.LoggerFactory;
026
027import jakarta.xml.bind.JAXBException;
028import java.io.BufferedReader;
029import java.io.IOException;
030import java.io.InputStream;
031import java.io.InputStreamReader;
032import java.util.HashMap;
033import java.util.TreeMap;
034import java.util.regex.Matcher;
035import java.util.regex.Pattern;
036
037
038/**
039 * A class containing static methods to parse the symop.lib file from the
040 * CCP4 package. The file contains the transformations belonging to all
041 * protein crystallography space groups.
042 *
043 * See http://structure.usc.edu/ccp4/symlib.html for documentation
044 *
045 * @author duarte_j
046 *
047 */
048public class SymoplibParser {
049
050        private static final Logger logger = LoggerFactory.getLogger(SymoplibParser.class);
051
052        private static final String newline = System.getProperty("line.separator");
053
054        private static final String SPACE_GROUPS_FILE = "org/biojava/nbio/structure/xtal/spacegroups.xml";
055
056        private static final Pattern namePat = Pattern.compile(".*\\s([A-Z]+)(\\s'.+')?\\s+'(.+)'.*");
057
058        private static  TreeMap<Integer, SpaceGroup> sgs = parseSpaceGroupsXML();
059
060
061        private static HashMap<String, SpaceGroup> name2sgs; // map for lookups based on short names
062
063        /**
064         * Gets the space group for the given standard identifier.
065         * See for example http://en.wikipedia.org/wiki/Space_group
066         * @param id
067         * @return
068         */
069        public static SpaceGroup getSpaceGroup(int id) {
070                return sgs.get(id);
071        }
072
073
074        /**
075         * Load all SpaceGroup information from the file spacegroups.xml
076         *
077         * @return a map providing information for all spacegroups
078         */
079        private static TreeMap<Integer, SpaceGroup> parseSpaceGroupsXML() {
080
081                // NOTE: if the space group file is requested by some part of the code (i.e. this method is called) and
082                //       there is a problem in reading it, then that's truly a FATAL problem, since this is not a user file
083                //       but a file that's part of the distribution: it MUST be there and MUST have the right format. A failure
084                //       to read it is more of a "compilation" error than a runtime error. That's the reason that System.exit
085                //       is called (which otherwise usually is not a good idea).
086                //
087                //       The rest of the application will simply not work: there are 3 options to handle it
088                //           a) returning null and then a NullPointer will happen down the line and thus a not very clear
089                //          error message will be printed
090                //       b) throw the exception forward and catch it in the final main but that would also be bad because
091                //          this is a file that the user didn't input but that should be part of the distribution
092                //               c) call System.exit(1) and "crash" the application with a human-understandable error message
093
094                InputStream spaceGroupIS = SymoplibParser.class.getClassLoader().getResourceAsStream(SPACE_GROUPS_FILE);
095
096                if ( spaceGroupIS == null) {
097                        logger.error("Fatal error! Could not find resource: " + SPACE_GROUPS_FILE + ". This probably means that your biojava jar file is corrupt or incorrectly built.");
098                        System.exit(1);
099                }
100
101                TreeMap<Integer, SpaceGroup> map = new TreeMap<>();
102
103                try {
104                        map = parseSpaceGroupsXML(spaceGroupIS);
105                } catch (IOException e) {
106                        logger.error("Fatal error! Could not parse resource: "+SPACE_GROUPS_FILE+". Error: "+e.getMessage());
107                        System.exit(1);
108                } catch (JAXBException e) {
109                        logger.error("Fatal error! Could not parse resource: "+SPACE_GROUPS_FILE+". Problem in xml formatting: "+e.getMessage());
110                        System.exit(1);
111                }
112
113                name2sgs = new HashMap<>();
114
115                for (SpaceGroup sg:map.values()) {
116
117                        sg.initializeCellTranslations();
118                        name2sgs.put(sg.getShortSymbol(), sg);
119                        if (sg.getAltShortSymbol()!=null) {
120                                // we add also alternative name to map so we can look it up
121                                name2sgs.put(sg.getAltShortSymbol(), sg);
122                        }
123                }
124
125                return map;
126
127        }
128
129
130        /**
131         * Load all SpaceGroup information from the file spacegroups.xml
132         *
133         * @return a map providing information for all spacegroups
134         */
135        public static TreeMap<Integer, SpaceGroup> parseSpaceGroupsXML(
136                        InputStream spaceGroupIS) throws IOException, JAXBException {
137
138                String xml = convertStreamToString(spaceGroupIS);
139
140                SpaceGroupMapRoot spaceGroups = SpaceGroupMapRoot.fromXML(xml);
141                return spaceGroups.getMapProperty();
142
143        }
144
145
146        private static String convertStreamToString(InputStream stream) throws IOException {
147                BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
148                StringBuilder sb = new StringBuilder();
149
150                String line = null;
151
152                while ((line = reader.readLine()) != null) {
153                        sb.append(line).append(newline);
154                }
155
156                return sb.toString();
157        }
158
159        /**
160         * Get the space group for the given international short name, using
161         * the PDB format, e.g. 'P 21 21 21' or 'C 1 c 1'
162         * @param shortName
163         * @return the SpaceGroup or null if the shortName is not valid
164         */
165        public static SpaceGroup getSpaceGroup(String shortName) {
166                if (shortName==null || shortName.length()<=2) return null;
167
168                // PDB uses group "P 1-" for 13 racemic mixture entries (as of Sep2011), e.g. 3e7r
169                // they call the space group "P 1-" unusually (symop.lib and everyone else call it "P -1")
170                if ("P 1-".equals(shortName)) shortName="P -1";
171
172                // enantiomorphic space groups contain sometime letters indicating glide planes which should always be lower case
173                // in some PDB entries like 4gwv they are in upper case, we fix that here: convert any non-first letter to lower case
174                shortName = shortName.substring(0, 1)+shortName.substring(1).toLowerCase();
175
176                return name2sgs.get(shortName);
177        }
178
179        public static TreeMap<Integer,SpaceGroup> getAllSpaceGroups() {
180                return sgs;
181        }
182
183
184        /**
185         * A parser for the symop.lib file provided by CCP4. Note: this file is not getting re-distributed by BioJava.
186         * It can be downloaded from:
187         *
188         * <a href="http://www.ccp4.ac.uk/cvs/viewvc.cgi/libccp4/data/symop.lib?revision=1.10&view=markup">http://www.ccp4.ac.uk/cvs/viewvc.cgi/libccp4/data/symop.lib?revision=1.10&amp;view=markup</a>
189         * <p>
190         * Note: this file is not needed by BioJava. BioJava loads equivalent information from the file spacegroups.xml
191         *
192         * @param symoplibIS
193         * @return
194         */
195        public static TreeMap<Integer,SpaceGroup> parseSymopLib(InputStream symoplibIS) {
196                TreeMap<Integer, SpaceGroup> map = new TreeMap<>();
197                name2sgs = new HashMap<>();
198                try {
199                        BufferedReader br = new BufferedReader(new InputStreamReader(symoplibIS));
200                        String line;
201                        SpaceGroup currentSG = null;
202                        while ((line=br.readLine())!=null) {
203                                if (!line.startsWith(" ")) {
204                                        if (currentSG!=null) {
205                                                map.put(currentSG.getId(),currentSG);
206                                                name2sgs.put(currentSG.getShortSymbol(), currentSG);
207                                                if (currentSG.getAltShortSymbol()!=null) {
208                                                        // we add also alternative name to map so we can look it up
209                                                        name2sgs.put(currentSG.getAltShortSymbol(), currentSG);
210                                                }
211                                        }
212
213                                        int idxFirstSpace = line.indexOf(' ');
214                                        int idxSecondSpace = line.indexOf(' ',idxFirstSpace+1);
215                                        int idxThirdSpace = line.indexOf(' ',idxSecondSpace+1);
216                                        int id = Integer.parseInt(line.substring(0, idxFirstSpace));
217                                        int multiplicity = Integer.parseInt(line.substring(idxFirstSpace+1, idxSecondSpace));
218                                        int primitiveMultiplicity = Integer.parseInt(line.substring(idxSecondSpace+1, idxThirdSpace));
219                                        Matcher m = namePat.matcher(line);
220                                        String shortSymbol = null;
221                                        String altShortSymbol = null;
222                                        String brav = null;
223                                        if (m.matches()) {
224                                                brav = m.group(1);
225                                                altShortSymbol = m.group(2); // null if there is no match
226                                                if (altShortSymbol!=null) altShortSymbol = altShortSymbol.trim().replaceAll("'", "");
227                                                shortSymbol = m.group(3);
228                                        }
229                                        currentSG = new SpaceGroup(id, multiplicity, primitiveMultiplicity, shortSymbol, altShortSymbol, BravaisLattice.getByName(brav));
230                                } else {
231                                        currentSG.addTransformation(line.trim());
232                                }
233                        }
234                        br.close();
235                        // and we add the last SG
236                        map.put(currentSG.getId(), currentSG);
237                        name2sgs.put(currentSG.getShortSymbol(), currentSG);
238                        if (currentSG.getAltShortSymbol()!=null) {
239                                // we add also alternative name to map so we can look it up
240                                name2sgs.put(currentSG.getAltShortSymbol(), currentSG);
241                        }
242
243                } catch (IOException e) {
244                        logger.error("Fatal error! Can't read symop.lib file. Error: "+e.getMessage()+". ");
245                        System.exit(1);
246                }
247
248                for (SpaceGroup sg:map.values()) {
249                        sg.initializeCellTranslations();
250                }
251                return map;
252        }
253
254
255}