001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmcif;
022
023
024import java.lang.reflect.Field;
025import java.util.*;
026
027import org.biojava.nbio.structure.Atom;
028import org.biojava.nbio.structure.Chain;
029import org.biojava.nbio.structure.Element;
030import org.biojava.nbio.structure.Group;
031import org.biojava.nbio.structure.GroupType;
032import org.biojava.nbio.structure.Structure;
033import org.biojava.nbio.structure.io.FileConvert;
034import org.biojava.nbio.structure.io.mmcif.model.AbstractBean;
035import org.biojava.nbio.structure.io.mmcif.model.AtomSite;
036import org.biojava.nbio.structure.io.mmcif.model.CIFLabel;
037import org.biojava.nbio.structure.io.mmcif.model.Cell;
038import org.biojava.nbio.structure.io.mmcif.model.IgnoreField;
039import org.biojava.nbio.structure.io.mmcif.model.Symmetry;
040import org.biojava.nbio.structure.xtal.CrystalCell;
041import org.biojava.nbio.structure.xtal.SpaceGroup;
042import org.slf4j.Logger;
043import org.slf4j.LoggerFactory;
044
045/**
046 * Some tools for mmCIF file writing.
047 *
048 * See http://www.iucr.org/__data/assets/pdf_file/0019/22618/cifguide.pdf
049 *
050 * CIF categories are represented as a simple bean, typically extending {@link AbstractBean}.
051 * By default, all fields from the bean are taken as the CIF labels. Fields
052 * may be omitted by annotating them as {@link IgnoreField @IgnoreField}.
053 * The CIF label for a field may be changed (for instance, for fields that
054 * are not valid Java identifiers) by defining a function
055 * <tt>static Map<String,String> getCIFLabelMap()</tt>
056 * mapping from the field's name to the correct label.
057 *
058 * @author Jose Duarte
059 * @author Spencer Bliven
060 */
061public class MMCIFFileTools {
062
063        private static final Logger logger = LoggerFactory.getLogger(MMCIFFileTools.class);
064
065        private static final String newline = System.getProperty("line.separator");
066
067        /**
068         * The character to be printed out in cases where a value is not assigned in mmCIF files
069         */
070        public static final String MMCIF_MISSING_VALUE = "?";
071
072        /**
073         * The character to be printed out as a default value in mmCIF files, e.g. for the default alt_locs
074         */
075        public static final String MMCIF_DEFAULT_VALUE = ".";
076
077
078        /**
079         * Produces a mmCIF loop header string for the given categoryName and className.
080         * className must be one of the beans in the {@link org.biojava.nbio.structure.io.mmcif.model} package
081         * @param categoryName
082         * @param className
083         * @return
084         * @throws ClassNotFoundException if the given className can not be found
085         */
086        public static String toLoopMmCifHeaderString(String categoryName, String className) throws ClassNotFoundException {
087                StringBuilder str = new StringBuilder();
088
089                str.append(SimpleMMcifParser.LOOP_START+newline);
090
091                Class<?> c = Class.forName(className);
092
093                for (Field f : getFields(c)) {
094                        str.append(categoryName+"."+f.getName()+newline);
095                }
096
097                return str.toString();
098        }
099
100        /**
101         * Converts a mmCIF bean (see {@link org.biojava.nbio.structure.io.mmcif.model} to
102         * a String representing it in mmCIF (single-record) format.
103         * @param categoryName
104         * @param o
105         * @return
106         */
107        public static String toMMCIF(String categoryName, Object o) {
108
109                StringBuilder sb = new StringBuilder();
110
111                Class<?> c = o.getClass();
112
113
114                Field[] fields = getFields(c);
115                String[] names = getFieldNames(fields);
116
117                int maxFieldNameLength = getMaxStringLength(names);
118
119                for (int i=0;i<fields.length;i++) {
120                        Field f = fields[i];
121                        String name = names[i];
122
123                        sb.append(categoryName).append(".").append(name);
124
125                        int spacing = maxFieldNameLength - name.length() + 3;
126
127                        try {
128                                Object obj = f.get(o);
129                                String val;
130                                if (obj==null) {
131                                        logger.debug("Field {} is null, will write it out as {}",name,MMCIF_MISSING_VALUE);
132                                        val = MMCIF_MISSING_VALUE;
133                                } else {
134                                        val = (String) obj;
135                                }
136                                for (int j=0;j<spacing;j++) sb.append(' ');
137                                sb.append(addMmCifQuoting(val));
138                                sb.append(newline);
139
140                        } catch (IllegalAccessException e) {
141                                logger.warn("Field {} is inaccessible", name);
142                                continue;
143                        } catch (ClassCastException e) {
144                                logger.warn("Could not cast value to String for field {}",name);
145                                continue;
146                        }
147
148                }
149
150                sb.append(SimpleMMcifParser.COMMENT_CHAR+newline);
151
152                return sb.toString();
153        }
154
155        /**
156         * Gets all fields for a particular class, filtering fields annotated
157         * with {@link IgnoreField @IgnoreField}.
158         *
159         * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)}
160         * on all fields.
161         * @param c
162         * @return
163         */
164        public static Field[] getFields(Class<?> c) {
165                Field[] allFields = c.getDeclaredFields();
166                Field[] fields = new Field[allFields.length];
167                int n = 0;
168                for(Field f : allFields) {
169                        f.setAccessible(true);
170                        IgnoreField anno = f.getAnnotation(IgnoreField.class);
171                        if(anno == null) {
172                                fields[n] = f;
173                                n++;
174                        }
175                }
176                return Arrays.copyOf(fields, n);
177        }
178
179        /**
180         * Gets the mmCIF record name for each field. This is generally just
181         * the name of the field or the value specified by the {@link CIFLabel @CIFLabel} annotation.
182         *
183         * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)}
184         * on all fields.
185         * @param fields
186         * @return
187         */
188        public static String[] getFieldNames(Field[] fields) {
189                String[] names = new String[fields.length];
190                for(int i=0;i<fields.length;i++) {
191                        Field f = fields[i];
192                        f.setAccessible(true);
193                        String rawName = fields[i].getName();
194                        CIFLabel cifLabel = f.getAnnotation(CIFLabel.class);
195                        if(cifLabel != null) {
196                                names[i] = cifLabel.label();
197                        } else {
198                                names[i] = rawName;
199                        }
200                }
201                return names;
202        }
203
204        /**
205         * Converts a list of mmCIF beans (see {@link org.biojava.nbio.structure.io.mmcif.model} to
206         * a String representing them in mmCIF loop format with one record per line.
207         * @param list
208         * @return
209         */
210        public static <T> String toMMCIF(List<T> list, Class<T> klass) {
211                if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!");
212
213                Field[] fields = getFields(klass);
214                int[] sizes = getFieldSizes(list,fields);
215
216                StringBuilder sb = new StringBuilder();
217
218                for (T o:list) {
219                        sb.append(toSingleLoopLineMmCifString(o, fields, sizes));
220                }
221
222                sb.append(SimpleMMcifParser.COMMENT_CHAR+newline);
223
224                return sb.toString();
225        }
226
227        /**
228         * Given a mmCIF bean produces a String representing it in mmCIF loop format as a single record line
229         * @param record
230         * @param fields Set of fields for the record. If null, will be calculated from the class of the record
231         * @param sizes the size of each of the fields
232         * @return
233         */
234        private static String toSingleLoopLineMmCifString(Object record, Field[] fields, int[] sizes) {
235
236                StringBuilder str = new StringBuilder();
237
238                Class<?> c = record.getClass();
239
240                if(fields == null)
241                        fields = getFields(c);
242
243                if (sizes.length!=fields.length)
244                        throw new IllegalArgumentException("The given sizes of fields differ from the number of declared fields");
245
246                int i = -1;
247                for (Field f : fields) {
248                        i++;
249                        f.setAccessible(true);
250
251                        try {
252                                Object obj = f.get(record);
253                                String val;
254                                if (obj==null) {
255                                        logger.debug("Field {} is null, will write it out as {}",f.getName(),MMCIF_MISSING_VALUE);
256                                        val = MMCIF_MISSING_VALUE;
257                                } else {
258                                        val = (String) obj;
259                                }
260
261                                str.append(String.format("%-"+sizes[i]+"s ", addMmCifQuoting(val)));
262
263
264                        } catch (IllegalAccessException e) {
265                                logger.warn("Field {} is inaccessible", f.getName());
266                                continue;
267                        } catch (ClassCastException e) {
268                                logger.warn("Could not cast value to String for field {}",f.getName());
269                                continue;
270                        }
271                }
272
273                str.append(newline);
274
275                return str.toString();
276
277        }
278
279        /**
280         * Adds quoting to a String according to the STAR format (mmCIF) rules
281         * @param val
282         * @return
283         */
284        private static String addMmCifQuoting(String val) {
285                String newval;
286
287                if (val.contains("'")) {
288                        // double quoting for strings containing single quotes (not strictly necessary but it's what the PDB usually does)
289                        newval = "\""+val+"\"";
290                } else if (val.contains(" ")) {
291                        // single quoting for stings containing spaces
292                        newval = "'"+val+"'";
293                } else {
294                        if (val.contains(" ") && val.contains("'")) {
295                                // TODO deal with this case
296                                logger.warn("Value contains both spaces and single quotes, won't format it: {}. CIF ouptut will likely be invalid.",val);
297                        }
298                        newval = val;
299                }
300                // TODO deal with all the other cases: e.g. multi-line quoting with ;;
301
302                return newval;
303        }
304
305        /**
306         * Converts a SpaceGroup object to a {@link Symmetry} object.
307         * @param sg
308         * @return
309         */
310        public static Symmetry convertSpaceGroupToSymmetry(SpaceGroup sg) {
311                Symmetry sym = new Symmetry();
312                sym.setSpace_group_name_H_M(sg.getShortSymbol());
313                // TODO do we need to fill any of the other values?
314                return sym;
315        }
316
317        /**
318         * Converts a CrystalCell object to a {@link Cell} object.
319         * @param c
320         * @return
321         */
322        public static Cell convertCrystalCellToCell(CrystalCell c) {
323                Cell cell = new Cell();
324                cell.setLength_a(String.format("%.3f",c.getA()));
325                cell.setLength_b(String.format("%.3f",c.getB()));
326                cell.setLength_c(String.format("%.3f",c.getC()));
327                cell.setAngle_alpha(String.format("%.3f",c.getAlpha()));
328                cell.setAngle_beta(String.format("%.3f",c.getBeta()));
329                cell.setAngle_gamma(String.format("%.3f",c.getGamma()));
330
331                return cell;
332        }
333
334        /**
335         * Converts an Atom object to an {@link AtomSite} object.
336         * @param a
337         * @param model the model number for the output AtomSites
338         * @param chainName the chain identifier (author id) for the output AtomSites
339         * @param chainId the internal chain identifier (asym id) for the output AtomSites
340         * @return
341         */
342        public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainName, String chainId) {
343                return convertAtomToAtomSite(a, model, chainName, chainId, a.getPDBserial());
344        }
345
346        /**
347         * Converts an Atom object to an {@link AtomSite} object.
348         * @param a the atom
349         * @param model the model number for the output AtomSites
350         * @param chainName the chain identifier (author id) for the output AtomSites
351         * @param chainId the internal chain identifier (asym id) for the output AtomSites
352         * @param atomId the atom id to be written to AtomSite
353         * @return
354         */
355        public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainName, String chainId, int atomId) {
356
357                /*
358                ATOM 7    C CD  . GLU A 1 24  ? -10.109 15.374 38.853 1.00 50.05 ? ? ? ? ? ? 24  GLU A CD  1
359                ATOM 8    O OE1 . GLU A 1 24  ? -9.659  14.764 37.849 1.00 49.80 ? ? ? ? ? ? 24  GLU A OE1 1
360                ATOM 9    O OE2 . GLU A 1 24  ? -11.259 15.171 39.310 1.00 50.51 ? ? ? ? ? ? 24  GLU A OE2 1
361                ATOM 10   N N   . LEU A 1 25  ? -5.907  18.743 37.412 1.00 41.55 ? ? ? ? ? ? 25  LEU A N   1
362                ATOM 11   C CA  . LEU A 1 25  ? -5.168  19.939 37.026 1.00 37.55 ? ? ? ? ? ? 25  LEU A CA  1
363                */
364
365                Group g = a.getGroup();
366
367                String record ;
368                if ( g.getType().equals(GroupType.HETATM) ) {
369                        record = "HETATM";
370                } else {
371                        record = "ATOM";
372                }
373
374                String entityId = "0";
375                String labelSeqId = Integer.toString(g.getResidueNumber().getSeqNum());
376                if (g.getChain()!=null && g.getChain().getEntityInfo()!=null) {
377                        entityId = Integer.toString(g.getChain().getEntityInfo().getMolId());
378                        labelSeqId = Integer.toString(g.getChain().getEntityInfo().getAlignedResIndex(g, g.getChain()));
379                }
380
381                Character  altLoc = a.getAltLoc()           ;
382                String altLocStr;
383                if (altLoc==null || altLoc == ' ') {
384                        altLocStr = MMCIF_DEFAULT_VALUE;
385                } else {
386                        altLocStr = altLoc.toString();
387                }
388
389                Element e = a.getElement();
390                String eString = e.toString().toUpperCase();
391                if ( e.equals(Element.R)) {
392                        eString = "X";
393                }
394
395                String insCode = MMCIF_MISSING_VALUE;
396                if (g.getResidueNumber().getInsCode()!=null ) {
397                        insCode = Character.toString(g.getResidueNumber().getInsCode());
398                }
399
400                AtomSite atomSite = new AtomSite();
401                atomSite.setGroup_PDB(record);
402                atomSite.setId(Integer.toString(atomId));
403                atomSite.setType_symbol(eString);
404                atomSite.setLabel_atom_id(a.getName());
405                atomSite.setLabel_alt_id(altLocStr);
406                atomSite.setLabel_comp_id(g.getPDBName());
407                atomSite.setLabel_asym_id(chainId);
408                atomSite.setLabel_entity_id(entityId);
409                atomSite.setLabel_seq_id(labelSeqId);
410                atomSite.setPdbx_PDB_ins_code(insCode);
411                atomSite.setCartn_x(FileConvert.d3.format(a.getX()));
412                atomSite.setCartn_y(FileConvert.d3.format(a.getY()));
413                atomSite.setCartn_z(FileConvert.d3.format(a.getZ()));
414                atomSite.setOccupancy(FileConvert.d2.format(a.getOccupancy()));
415                atomSite.setB_iso_or_equiv(FileConvert.d2.format(a.getTempFactor()));
416                atomSite.setAuth_seq_id(Integer.toString(g.getResidueNumber().getSeqNum()));
417                atomSite.setAuth_comp_id(g.getPDBName());
418                atomSite.setAuth_asym_id(chainName);
419                atomSite.setAuth_atom_id(a.getName());
420                atomSite.setPdbx_PDB_model_num(Integer.toString(model));
421
422                return atomSite;
423        }
424
425        /**
426         * Converts a Group into a List of {@link AtomSite} objects.
427         * Atoms in other altloc groups (different from the main group) are also included, removing possible duplicates
428         * via using the atom identifier to assess uniqueness.
429         * @param g the group
430         * @param model the model number for the output AtomSites
431         * @param chainName the chain identifier (author id) for the output AtomSites
432         * @param chainId the internal chain identifier (asym id) for the output AtomSites
433         * @return
434         */
435        public static List<AtomSite> convertGroupToAtomSites(Group g, int model, String chainName, String chainId) {
436
437                // The alt locs can have duplicates, since at parsing time we make sure that all alt loc groups have
438                // all atoms (see StructureTools#cleanUpAltLocs)
439                // Thus we have to remove duplicates here by using the atom id
440                // See issue https://github.com/biojava/biojava/issues/778 and TestAltLocs.testMmcifWritingAllAltlocs/testMmcifWritingPartialAltlocs
441                Map<Integer, AtomSite> uniqueAtomSites = new LinkedHashMap<>();
442
443                int groupsize  = g.size();
444
445                for ( int atompos = 0 ; atompos < groupsize; atompos++) {
446                        Atom a = g.getAtom(atompos);
447                        if ( a == null)
448                                continue ;
449
450                        uniqueAtomSites.put(a.getPDBserial(), convertAtomToAtomSite(a, model, chainName, chainId));
451                }
452
453                if ( g.hasAltLoc()){
454                        for (Group alt : g.getAltLocs() ) {
455                                for (AtomSite atomSite : convertGroupToAtomSites(alt, model, chainName, chainId)) {
456                                        uniqueAtomSites.put(Integer.parseInt(atomSite.getId()), atomSite);
457                                }
458                        }
459                }
460                return new ArrayList<>(uniqueAtomSites.values());
461        }
462
463        /**
464         * Converts a Chain into a List of {@link AtomSite} objects
465         * @param c the chain
466         * @param model the model number for the output AtomSites
467         * @param chainName the chain identifier (author id) for the output AtomSites
468         * @param chainId the internal chain identifier (asym id) for the output AtomSites
469         * @return
470         */
471        public static List<AtomSite> convertChainToAtomSites(Chain c, int model, String chainName, String chainId) {
472
473                List<AtomSite> list = new ArrayList<>();
474
475                if (c.getEntityInfo()==null) {
476                        logger.warn("No Compound (entity) found for chain {}: entity_id will be set to 0, label_seq_id will be the same as auth_seq_id", c.getName());
477                }
478
479                for ( int h=0; h<c.getAtomLength();h++){
480
481                        Group g= c.getAtomGroup(h);
482
483                        list.addAll(convertGroupToAtomSites(g, model, chainName, chainId));
484
485                }
486
487                return list;
488        }
489
490        /**
491         * Converts a Structure into a List of {@link AtomSite} objects
492         * @param s
493         * @return
494         */
495        public static List<AtomSite> convertStructureToAtomSites(Structure s) {
496                List<AtomSite> list = new ArrayList<AtomSite>();
497
498                for (int m=0;m<s.nrModels();m++) {
499                        for (Chain c:s.getChains(m)) {
500                                list.addAll(convertChainToAtomSites(c, m+1, c.getName(), c.getId()));
501                        }
502                }
503                return list;
504        }
505
506        /**
507         * Finds the max length of each of the String values contained in each of the fields of the given list of beans.
508         * Useful for producing mmCIF loop data that is aligned for all columns.
509         * @param list list of objects. All objects should have the same class.
510         * @param fields Set of fields for the record. If null, will be calculated from the class of the first record
511         * @return
512         * @see #toMMCIF(List, Class)
513         */
514        private static <T> int[] getFieldSizes(List<T> list, Field[] fields) {
515
516                if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!");
517
518                if(fields == null)
519                        fields = getFields(list.get(0).getClass());
520
521                int[] sizes = new int [fields.length];
522
523
524                for (T a:list) {
525                        int i = -1;
526                        for (Field f : fields) {
527                                i++;
528
529                                f.setAccessible(true);
530
531                                try {
532                                        Object obj = f.get(a);
533                                        int length;
534                                        if (obj==null) {
535                                                length = MMCIF_MISSING_VALUE.length();
536                                        } else {
537                                                String val = (String) obj;
538                                                length = addMmCifQuoting(val).length();
539                                        }
540
541                                        if (length>sizes[i]) sizes[i] = length;
542
543                                } catch (IllegalAccessException e) {
544                                        logger.warn("Field {} is inaccessible", f.getName());
545                                        continue;
546                                } catch (ClassCastException e) {
547                                        logger.warn("Could not cast value to String for field {}",f.getName());
548                                        continue;
549                                }
550                        }
551                }
552                return sizes;
553        }
554
555        /**
556         * Finds the max length of a list of strings
557         * Useful for producing mmCIF single-record data that is aligned for all values.
558         * @param names
559         * @return
560         * @see #toMMCIF(String, Object)
561         */
562        private static int getMaxStringLength(String[] names) {
563                int size = 0;
564                for(String s : names) {
565                        if(s.length()>size) {
566                                size = s.length();
567                        }
568                }
569                return size;
570        }
571}