001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * created at Mar 4, 2008
021 */
022package org.biojava.nbio.structure.io.mmcif;
023
024import java.io.BufferedReader;
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.InputStreamReader;
028import java.lang.reflect.Field;
029import java.lang.reflect.InvocationTargetException;
030import java.lang.reflect.Method;
031import java.util.ArrayList;
032import java.util.HashMap;
033import java.util.HashSet;
034import java.util.List;
035import java.util.Map;
036import java.util.Set;
037
038
039import org.biojava.nbio.structure.Structure;
040import org.biojava.nbio.structure.io.MMCIFFileReader;
041import org.biojava.nbio.structure.io.StructureIOFile;
042import org.biojava.nbio.structure.io.mmcif.model.AtomSite;
043import org.biojava.nbio.structure.io.mmcif.model.AtomSites;
044import org.biojava.nbio.structure.io.mmcif.model.AuditAuthor;
045import org.biojava.nbio.structure.io.mmcif.model.CIFLabel;
046import org.biojava.nbio.structure.io.mmcif.model.Cell;
047import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
048import org.biojava.nbio.structure.io.mmcif.model.ChemCompAtom;
049import org.biojava.nbio.structure.io.mmcif.model.ChemCompBond;
050import org.biojava.nbio.structure.io.mmcif.model.ChemCompDescriptor;
051import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBremark;
052import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBrev;
053import org.biojava.nbio.structure.io.mmcif.model.DatabasePdbrevRecord;
054import org.biojava.nbio.structure.io.mmcif.model.Entity;
055import org.biojava.nbio.structure.io.mmcif.model.EntityPolySeq;
056import org.biojava.nbio.structure.io.mmcif.model.EntitySrcGen;
057import org.biojava.nbio.structure.io.mmcif.model.EntitySrcNat;
058import org.biojava.nbio.structure.io.mmcif.model.EntitySrcSyn;
059import org.biojava.nbio.structure.io.mmcif.model.Exptl;
060import org.biojava.nbio.structure.io.mmcif.model.IgnoreField;
061import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompDescriptor;
062import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompIdentifier;
063import org.biojava.nbio.structure.io.mmcif.model.PdbxEntityNonPoly;
064import org.biojava.nbio.structure.io.mmcif.model.PdbxNonPolyScheme;
065import org.biojava.nbio.structure.io.mmcif.model.PdbxPolySeqScheme;
066import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssembly;
067import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssemblyGen;
068import org.biojava.nbio.structure.io.mmcif.model.PdbxStructOperList;
069import org.biojava.nbio.structure.io.mmcif.model.Refine;
070import org.biojava.nbio.structure.io.mmcif.model.Struct;
071import org.biojava.nbio.structure.io.mmcif.model.StructAsym;
072import org.biojava.nbio.structure.io.mmcif.model.StructConn;
073import org.biojava.nbio.structure.io.mmcif.model.StructKeywords;
074import org.biojava.nbio.structure.io.mmcif.model.StructNcsOper;
075import org.biojava.nbio.structure.io.mmcif.model.StructRef;
076import org.biojava.nbio.structure.io.mmcif.model.StructRefSeq;
077import org.biojava.nbio.structure.io.mmcif.model.StructRefSeqDif;
078import org.biojava.nbio.structure.io.mmcif.model.StructSite;
079import org.biojava.nbio.structure.io.mmcif.model.StructSiteGen;
080import org.biojava.nbio.structure.io.mmcif.model.Symmetry;
081import org.slf4j.Logger;
082import org.slf4j.LoggerFactory;
083
084/**
085 * A simple mmCif file parser
086 *
087 *
088 * Usage:
089 * <pre>
090String file = "path/to/mmcif/file";
091StructureIOFile pdbreader = new MMCIFFileReader();
092
093Structure s = pdbreader.getStructure(file);
094System.out.println(s);
095
096// you can convert it to a PDB file...
097System.out.println(s.toPDB());
098
099 * </pre>
100 * For more documentation see <a href="http://biojava.org/wiki/BioJava:CookBook#Protein_Structure">http://biojava.org/wiki/BioJava:CookBook#Protein_Structure</a>.
101 *
102 * @author Andreas Prlic
103 * @author Jose Duarte
104 * @since 1.7
105 */
106public class SimpleMMcifParser implements MMcifParser {
107
108
109
110        /**
111         * The header appearing at the beginning of a mmCIF file.
112         * A "block code" can be added to it of no more than 32 chars.
113         * See http://www.iucr.org/__data/assets/pdf_file/0019/22618/cifguide.pdf
114         */
115        public static final String MMCIF_TOP_HEADER = "data_";
116
117        public static final String COMMENT_CHAR = "#";
118        public static final String LOOP_START = "loop_";
119        public static final String FIELD_LINE = "_";
120
121        // the following are the 3 valid quoting characters in CIF
122        /**
123         * Quoting character '
124         */
125        private static final char S1 = '\'';
126
127        /**
128         * Quoting character "
129         */
130        private static final char S2 = '\"';
131
132        /**
133         * Quoting character ; (multi-line quoting)
134         */
135        public static final String STRING_LIMIT = ";";
136
137
138        private List<MMcifConsumer> consumers ;
139
140        private Struct struct ;
141
142        private static final Logger logger = LoggerFactory.getLogger(SimpleMMcifParser.class);
143
144        public SimpleMMcifParser(){
145                consumers = new ArrayList<MMcifConsumer>();
146                struct = null;
147        }
148
149        @Override
150        public void addMMcifConsumer(MMcifConsumer consumer) {
151                consumers.add(consumer);
152
153        }
154
155        @Override
156        public void clearConsumers() {
157                consumers.clear();
158
159        }
160
161        @Override
162        public void removeMMcifConsumer(MMcifConsumer consumer) {
163                consumers.remove(consumer);
164        }
165
166        public static void main(String[] args){
167                String file = "/Users/andreas/WORK/PDB/mmCif/a9/1a9n.cif.gz";
168                //String file = "/Users/andreas/WORK/PDB/MMCIF/1gav.mmcif";
169                //String file = "/Users/andreas/WORK/PDB/MMCIF/100d.cif";
170                //String file = "/Users/andreas/WORK/PDB/MMCIF/1a4a.mmcif";
171                System.out.println("parsing " + file);
172
173                StructureIOFile pdbreader = new MMCIFFileReader();
174                try {
175                        Structure s = pdbreader.getStructure(file);
176                        System.out.println(s);
177                        // convert it to a PDB file...
178                        System.out.println(s.toPDB());
179                } catch (IOException e) {
180                        e.printStackTrace();
181                }
182
183        }
184
185        @Override
186        public void parse(InputStream inStream) throws IOException {
187                parse(new BufferedReader(new InputStreamReader(inStream)));
188
189        }
190
191        @Override
192        public void parse(BufferedReader buf)
193                        throws IOException {
194
195                triggerDocumentStart();
196
197
198                // init container objects...
199                struct = new Struct();
200                String line = null;
201
202                boolean inLoop = false;
203                boolean inLoopData = false;
204
205
206                List<String> loopFields = new ArrayList<String>();
207                List<String> lineData   = new ArrayList<String>();
208                Set<String> loopWarnings = new HashSet<String>(); // used only to reduce logging statements
209
210                String category = null;
211
212
213                // the first line is a data_PDBCODE line, test if this looks like a mmcif file
214                line = buf.readLine();
215                if (line == null || !line.startsWith(MMCIF_TOP_HEADER)){
216                        logger.error("This does not look like a valid mmCIF file! The first line should start with 'data_', but is: '" + line+"'");
217                        triggerDocumentEnd();
218                        return;
219                }
220
221                while ( (line = buf.readLine ()) != null ){
222
223                        if (line.isEmpty() || line.startsWith(COMMENT_CHAR)) continue;
224
225                        logger.debug(inLoop + " " + line);
226
227                        if (line.startsWith(MMCIF_TOP_HEADER)){
228                                // either first line in file, or beginning of new section
229                                if ( inLoop) {
230                                        //System.out.println("new data and in loop: " + line);
231                                        inLoop = false;
232                                        inLoopData = false;
233                                        lineData.clear();
234                                        loopFields.clear();
235                                }
236
237                        }
238
239
240                        if ( inLoop) {
241
242
243                                if ( line.startsWith(LOOP_START)){
244                                        loopFields.clear();
245                                        inLoop = true;
246                                        inLoopData = false;
247                                        continue;
248                                }
249
250                                if ( line.matches("\\s*"+FIELD_LINE+"\\w+.*")) {
251
252                                        if (inLoopData && line.startsWith(FIELD_LINE)) {
253                                                logger.debug("Found a field line after reading loop data. Toggling to inLoop=false");
254                                                inLoop = false;
255                                                inLoopData = false;
256                                                loopFields.clear();
257
258
259                                                // a boring normal line
260                                                List<String> data = processLine(line, buf, 2);
261
262                                                if ( data.size() < 1){
263                                                        // this can happen if empty lines at end of file
264                                                        lineData.clear();
265                                                        continue;
266                                                }
267                                                String key = data.get(0);
268                                                int pos = key.indexOf(".");
269                                                if ( pos < 0 ) {
270                                                        // looks like a chem_comp file
271                                                        // line should start with data, otherwise something is wrong!
272                                                        if (! line.startsWith(MMCIF_TOP_HEADER)){
273                                                                logger.warn("This does not look like a valid mmCIF file! The first line should start with 'data_', but is '" + line+"'");
274                                                                triggerDocumentEnd();
275                                                                return;
276                                                        }
277                                                        // ignore the first line...
278                                                        category=null;
279                                                        lineData.clear();
280                                                        continue;
281                                                }
282                                                category = key.substring(0,pos);
283                                                String value = data.get(1);
284                                                loopFields.add(key.substring(pos+1,key.length()));
285                                                lineData.add(value);
286
287                                                logger.debug("Found data for category {}: {}", key, value);
288                                                continue;
289                                        }
290
291                                        // found another field.
292                                        String txt = line.trim();
293                                        if ( txt.indexOf('.') > -1){
294
295                                                String[] spl = txt.split("\\.");
296                                                category = spl[0];
297                                                String attribute = spl[1];
298                                                loopFields.add(attribute);
299                                                logger.debug("Found category: {}, attribute: {}",category, attribute);
300                                                if ( spl.length > 2){
301                                                        logger.warn("Found nested attribute in {}, not supported yet!",txt);
302                                                }
303
304                                        } else {
305                                                category = txt;
306                                                logger.debug("Found category without attribute: {}",category);
307                                        }
308
309
310                                } else {
311
312                                        // in loop and we found a data line
313                                        lineData = processLine(line, buf, loopFields.size());
314                                        logger.debug("Found a loop data line with {} data fields", lineData.size());
315                                        logger.debug("Data fields: {}", lineData.toString());
316                                        if ( lineData.size() != loopFields.size()){
317                                                logger.warn("Expected {} data fields, but found {} in line: {}",loopFields.size(),lineData.size(),line);
318
319                                        }
320
321                                        endLineChecks(category, loopFields, lineData, loopWarnings);
322
323                                        lineData.clear();
324
325                                        inLoopData = true;
326                                }
327
328                        } else {
329                                // not in loop
330
331                                if ( line.startsWith(LOOP_START)){
332                                        if ( category != null)
333                                                endLineChecks(category, loopFields, lineData, loopWarnings);
334
335                                        resetBuffers(loopFields, lineData, loopWarnings);
336                                        category = null;
337                                        inLoop = true;
338                                        inLoopData = false;
339                                        logger.debug("Detected LOOP_START: '{}'. Toggling to inLoop=true", LOOP_START);
340                                        continue;
341                                } else {
342                                        logger.debug("Normal line ");
343                                        inLoop = false;
344
345                                        // a boring normal line
346                                        List<String> data = processLine(line, buf, 2);
347
348                                        if ( data.size() < 1){
349                                                // this can happen if empty lines at end of file
350                                                lineData.clear();
351                                                continue;
352                                        }
353                                        String key = data.get(0);
354                                        int pos = key.indexOf(".");
355                                        if ( pos < 0 ) {
356                                                // looks like a chem_comp file
357                                                // line should start with data, otherwise something is wrong!
358                                                if (! line.startsWith(MMCIF_TOP_HEADER)){
359                                                        logger.warn("This does not look like a valid mmCIF file! The first line should start with 'data_', but is '" + line+"'");
360                                                        triggerDocumentEnd();
361                                                        return;
362                                                }
363                                                // ignore the first line...
364                                                category=null;
365                                                lineData.clear();
366                                                continue;
367                                        }
368
369                                        if (category!=null && !key.substring(0,pos).equals(category)) {
370                                                // we've changed category: need to flush the previous one
371                                                endLineChecks(category, loopFields, lineData, loopWarnings);
372                                                resetBuffers(loopFields, lineData, loopWarnings);
373                                        }
374
375                                        category = key.substring(0,pos);
376
377                                        String value = data.get(1);
378                                        loopFields.add(key.substring(pos+1,key.length()));
379                                        lineData.add(value);
380
381                                        logger.debug("Found data for category {}: {}", key, value);
382
383                                }
384                        }
385                }
386
387                if (category!=null && lineData.size()>0 && lineData.size()==loopFields.size()) {
388                        // the last category in the file will still be missing, we add it now
389                        endLineChecks(category, loopFields, lineData, loopWarnings);
390                        resetBuffers(loopFields, lineData, loopWarnings);
391                }
392
393                if (struct != null){
394                        triggerStructData(struct);
395                }
396
397                triggerDocumentEnd();
398
399        }
400
401        private void resetBuffers(List<String> loopFields, List<String> lineData, Set<String> loopWarnings) {
402                loopFields.clear();
403                lineData.clear();
404                loopWarnings.clear();
405        }
406
407        private List<String> processSingleLine(String line){
408
409                List<String> data = new ArrayList<String>();
410
411                if ( line.trim().length() == 0){
412                        return data;
413                }
414
415                if ( line.trim().length() == 1){
416                        if ( line.startsWith(STRING_LIMIT))
417                                return data;
418                }
419                boolean inString = false; // semicolon (;) quoting
420                boolean inS1     = false; // single quote (') quoting
421                boolean inS2     = false; // double quote (") quoting
422                String word      = "";
423
424                for (int i=0; i< line.length(); i++ ){
425
426                        Character c = line.charAt(i);
427
428                        Character nextC = null;
429                        if (i < line.length() - 1)
430                                nextC = line.charAt(i+1);
431
432                        Character prevC = null;
433                        if (i>0)
434                                prevC = line.charAt(i-1);
435
436                        if  (c == ' ') {
437
438                                if ( ! inString){
439                                        if ( ! word.equals(""))
440                                                data.add(word.trim());
441                                        word = "";
442                                } else {
443                                        // we are in a string, add the space
444                                        word += c;
445                                }
446
447                        } else if (c == S1 )  {
448
449                                if ( inString){
450
451                                        boolean wordEnd = false;
452                                        if (! inS2) {
453                                                if (nextC==null || Character.isWhitespace(nextC)){
454                                                        i++;
455                                                        wordEnd = true;
456                                                }
457                                        }
458
459
460                                        if ( wordEnd ) {
461
462                                                // at end of string
463                                                if ( ! word.equals(""))
464                                                        data.add(word.trim());
465                                                word     = "";
466                                                inString = false;
467                                                inS1     = false;
468                                        } else {
469                                                word += c;
470                                        }
471
472                                } else if (prevC==null || prevC==' ') {
473                                        // the beginning of a new string
474                                        inString = true;
475                                        inS1     = true;
476                                } else {
477                                        word += c;
478                                }
479                        } else if ( c == S2 ){
480                                if ( inString){
481
482                                        boolean wordEnd = false;
483                                        if (! inS1) {
484                                                if (nextC==null || Character.isWhitespace(nextC)){
485                                                        i++;
486                                                        wordEnd = true;
487                                                }
488                                        }
489
490                                        if ( wordEnd ) {
491
492                                                // at end of string
493                                                if ( ! word.equals(""))
494                                                        data.add(word.trim());
495                                                word     = "";
496                                                inString = false;
497                                                inS2     = false;
498                                        } else {
499                                                word += c;
500                                        }
501                                }  else if (prevC==null || prevC==' ') {
502                                        // the beginning of a new string
503                                        inString = true;
504                                        inS2     = true;
505                                } else {
506                                        word += c;
507                                }
508                        } else {
509                                word += c;
510                        }
511
512                }
513                if ( ! word.trim().equals(""))
514                        data.add(word);
515
516
517                return data;
518
519        }
520
521        /**
522         * Get the content of a cif entry
523         *
524         * @param line
525         * @param buf
526         * @return
527         */
528        private List<String> processLine(String line,
529                        BufferedReader buf,
530                        int fieldLength)
531                                        throws IOException{
532
533                //System.out.println("XX processLine " + fieldLength + " " + line);
534                // go through the line and process each character
535                List<String> lineData = new ArrayList<String>();
536
537                boolean inString = false;
538
539                StringBuilder bigWord = null;
540
541                while ( true ){
542
543                        if ( line.startsWith(STRING_LIMIT)){
544                                if (! inString){
545
546                                        inString = true;
547                                        if ( line.length() > 1)
548                                                bigWord = new StringBuilder(line.substring(1));
549                                        else
550                                                bigWord = new StringBuilder("");
551
552
553                                } else {
554                                        // the end of a word
555                                        lineData.add(bigWord.toString());
556                                        bigWord = null;
557                                        inString = false;
558
559                                }
560                        } else {
561                                if ( inString )
562                                        bigWord.append(line);
563                                else {
564
565                                        List<String> dat = processSingleLine(line);
566
567                                        for (String d : dat){
568                                                lineData.add(d);
569                                        }
570                                }
571                        }
572
573                        //System.out.println("in process line : " + lineData.size() + " " + fieldLength);
574
575                        if ( lineData.size() > fieldLength){
576
577                                logger.warn("wrong data length ("+lineData.size()+
578                                                ") should be ("+fieldLength+") at line " + line + " got lineData: " + lineData);
579                                return lineData;
580                        }
581
582                        if ( lineData.size() == fieldLength)
583                                return lineData;
584
585
586                        line = buf.readLine();
587                        if ( line == null)
588                                break;
589                }
590                return lineData;
591
592        }
593
594
595
596        private void endLineChecks(String category,List<String> loopFields, List<String> lineData, Set<String> loopWarnings ) throws IOException{
597
598                logger.debug("Processing category {}, with fields: {}",category,loopFields.toString());
599                //              System.out.println("parsed the following data: " +category + " fields: "+
600                //                              loopFields + " DATA: " +
601                //                              lineData);
602
603                if ( loopFields.size() != lineData.size()){
604                        logger.warn("looks like we got a problem with nested string quote characters:");
605                        throw new IOException("data length ("+ lineData.size() +
606                                        ") != fields length ("+loopFields.size()+
607                                        ") category: " +category + " fields: "+
608                                        loopFields + " DATA: " +
609                                        lineData );
610                }
611
612                if ( category.equals("_entity")){
613
614                        Entity e =  (Entity) buildObject(
615                                        Entity.class.getName(),
616                                        loopFields,lineData, loopWarnings);
617                        triggerNewEntity(e);
618
619                } else if ( category.equals("_struct")){
620
621                        struct =  (Struct) buildObject(
622                                        Struct.class.getName(),
623                                        loopFields, lineData, loopWarnings);
624
625                } else if ( category.equals("_atom_site")){
626
627                        AtomSite a = (AtomSite) buildObject(
628                                        AtomSite.class.getName(),
629                                        loopFields, lineData, loopWarnings);
630                        triggerNewAtomSite(a);
631
632                } else if ( category.equals("_database_PDB_rev")){
633                        DatabasePDBrev dbrev = (DatabasePDBrev) buildObject(
634                                        DatabasePDBrev.class.getName(),
635                                        loopFields, lineData, loopWarnings);
636
637                        triggerNewDatabasePDBrev(dbrev);
638
639                } else if ( category.equals("_database_PDB_rev_record")){
640                        DatabasePdbrevRecord dbrev = (DatabasePdbrevRecord) buildObject(
641                                        DatabasePdbrevRecord.class.getName(),
642                                        loopFields, lineData, loopWarnings);
643
644                        triggerNewDatabasePDBrevRecord(dbrev);
645
646                }else if (  category.equals("_database_PDB_remark")){
647                        DatabasePDBremark remark = (DatabasePDBremark) buildObject(
648                                        DatabasePDBremark.class.getName(),
649                                        loopFields, lineData, loopWarnings);
650
651                        triggerNewDatabasePDBremark(remark);
652
653                } else if ( category.equals("_exptl")){
654                        Exptl exptl  = (Exptl) buildObject(
655                                        Exptl.class.getName(),
656                                        loopFields,lineData, loopWarnings);
657
658                        triggerExptl(exptl);
659
660                } else if ( category.equals("_cell")){
661                        Cell cell  = (Cell) buildObject(
662                                        Cell.class.getName(),
663                                        loopFields,lineData, loopWarnings);
664
665                        triggerNewCell(cell);
666
667                } else if ( category.equals("_symmetry")){
668                        Symmetry symmetry  = (Symmetry) buildObject(
669                                        Symmetry.class.getName(),
670                                        loopFields,lineData, loopWarnings);
671
672                        triggerNewSymmetry(symmetry);
673                } else if ( category.equals("_struct_ncs_oper")) {
674
675                        StructNcsOper sNcsOper = (StructNcsOper) buildObject(
676                                        StructNcsOper.class.getName(), 
677                                        loopFields, lineData, loopWarnings);
678                        triggerNewStructNcsOper(sNcsOper);
679                } else if ( category.equals("_atom_sites")) {
680                        
681                        AtomSites atomSites = (AtomSites) buildObject(
682                                        AtomSites.class.getName(),
683                                        loopFields, lineData, loopWarnings);
684                        triggerNewAtomSites(atomSites);
685
686                } else if ( category.equals("_struct_ref")){
687                        StructRef sref  = (StructRef) buildObject(
688                                        StructRef.class.getName(),
689                                        loopFields,lineData, loopWarnings);
690
691                        triggerNewStrucRef(sref);
692
693                } else if ( category.equals("_struct_ref_seq")){
694                        StructRefSeq sref  = (StructRefSeq) buildObject(
695                                        StructRefSeq.class.getName(),
696                                        loopFields,lineData, loopWarnings);
697
698                        triggerNewStrucRefSeq(sref);
699                } else if ( category.equals("_struct_ref_seq_dif")) {
700                        StructRefSeqDif sref = (StructRefSeqDif) buildObject(
701                                        StructRefSeqDif.class.getName(),
702                                        loopFields, lineData, loopWarnings);
703
704                        triggerNewStrucRefSeqDif(sref);
705                } else if ( category.equals("_struct_site_gen")) {
706                        StructSiteGen sref = (StructSiteGen) buildObject(
707                                        StructSiteGen.class.getName(),
708                                        loopFields, lineData, loopWarnings);
709
710                        triggerNewStructSiteGen(sref);
711                } else if ( category.equals("_struct_site")) {
712                        StructSite sref = (StructSite) buildObject(
713                                        StructSite.class.getName(),
714                                        loopFields, lineData, loopWarnings);
715                        triggerNewStructSite(sref);
716                } else if ( category.equals("_entity_poly_seq")){
717                        EntityPolySeq exptl  = (EntityPolySeq) buildObject(
718                                        EntityPolySeq.class.getName(),
719                                        loopFields,lineData, loopWarnings);
720
721                        triggerNewEntityPolySeq(exptl);
722                } else if ( category.equals("_entity_src_gen")){
723                        EntitySrcGen entitySrcGen = (EntitySrcGen) buildObject(
724                                        EntitySrcGen.class.getName(),
725                                        loopFields,lineData, loopWarnings);
726                        triggerNewEntitySrcGen(entitySrcGen);
727                } else if ( category.equals("_entity_src_nat")){
728                        EntitySrcNat entitySrcNat = (EntitySrcNat) buildObject(
729                                        EntitySrcNat.class.getName(),
730                                        loopFields,lineData, loopWarnings);
731                        triggerNewEntitySrcNat(entitySrcNat);
732                } else if ( category.equals("_pdbx_entity_src_syn")){
733                        EntitySrcSyn entitySrcSyn = (EntitySrcSyn) buildObject(
734                                        EntitySrcSyn.class.getName(),
735                                        loopFields,lineData, loopWarnings);
736                        triggerNewEntitySrcSyn(entitySrcSyn);
737                } else if ( category.equals("_struct_asym")){
738                        StructAsym sasym  = (StructAsym) buildObject(
739                                        StructAsym.class.getName(),
740                                        loopFields,lineData, loopWarnings);
741
742                        triggerNewStructAsym(sasym);
743
744                } else if ( category.equals("_pdbx_poly_seq_scheme")){
745                        PdbxPolySeqScheme ppss  = (PdbxPolySeqScheme) buildObject(
746                                        PdbxPolySeqScheme.class.getName(),
747                                        loopFields,lineData, loopWarnings);
748
749                        triggerNewPdbxPolySeqScheme(ppss);
750
751                } else if ( category.equals("_pdbx_nonpoly_scheme")){
752                        PdbxNonPolyScheme ppss  = (PdbxNonPolyScheme) buildObject(
753                                        PdbxNonPolyScheme.class.getName(),
754                                        loopFields,lineData, loopWarnings);
755
756                        triggerNewPdbxNonPolyScheme(ppss);
757
758                } else if ( category.equals("_pdbx_entity_nonpoly")){
759                        PdbxEntityNonPoly pen = (PdbxEntityNonPoly) buildObject(
760                                        PdbxEntityNonPoly.class.getName(),
761                                        loopFields,lineData, loopWarnings
762                                        );
763                        triggerNewPdbxEntityNonPoly(pen);
764                } else if ( category.equals("_struct_keywords")){
765                        StructKeywords kw = (StructKeywords)buildObject(
766                                        StructKeywords.class.getName(),
767                                        loopFields,lineData, loopWarnings
768                                        );
769                        triggerNewStructKeywords(kw);
770                } else if (category.equals("_refine")){
771                        Refine r = (Refine)buildObject(
772                                        Refine.class.getName(),
773                                        loopFields,lineData, loopWarnings
774                                        );
775                        triggerNewRefine(r);
776                } else if (category.equals("_chem_comp")){
777                        ChemComp c = (ChemComp)buildObject(
778                                        ChemComp.class.getName(),
779                                        loopFields, lineData, loopWarnings
780                                        );
781                        triggerNewChemComp(c);
782                } else if (category.equals("_audit_author")) {
783                        AuditAuthor aa = (AuditAuthor)buildObject(
784                                        AuditAuthor.class.getName(),
785                                        loopFields, lineData, loopWarnings);
786                        triggerNewAuditAuthor(aa);
787                } else if (category.equals("_pdbx_chem_comp_descriptor")) {
788                        ChemCompDescriptor ccd = (ChemCompDescriptor) buildObject(
789                                        ChemCompDescriptor.class.getName(),
790                                        loopFields, lineData, loopWarnings);
791                        triggerNewChemCompDescriptor(ccd);
792                } else if (category.equals("_pdbx_struct_oper_list")) {
793
794                        PdbxStructOperList structOper = (PdbxStructOperList) buildObject(
795                                        PdbxStructOperList.class.getName(),
796                                        loopFields, lineData, loopWarnings
797                                        );
798                        triggerNewPdbxStructOper(structOper);
799
800                } else if (category.equals("_pdbx_struct_assembly")) {
801                        PdbxStructAssembly sa = (PdbxStructAssembly) buildObject(
802                                        PdbxStructAssembly.class.getName(),
803                                        loopFields, lineData, loopWarnings);
804                        triggerNewPdbxStructAssembly(sa);
805
806                } else if (category.equals("_pdbx_struct_assembly_gen")) {
807                        PdbxStructAssemblyGen sa = (PdbxStructAssemblyGen) buildObject(
808                                        PdbxStructAssemblyGen.class.getName(),
809                                        loopFields, lineData, loopWarnings);
810                        triggerNewPdbxStructAssemblyGen(sa);
811                } else if ( category.equals("_chem_comp_atom")){
812                        ChemCompAtom atom = (ChemCompAtom)buildObject(
813                                        ChemCompAtom.class.getName(),
814                                        loopFields,lineData, loopWarnings);
815                        triggerNewChemCompAtom(atom);
816
817                }else if ( category.equals("_chem_comp_bond")){
818                        ChemCompBond bond = (ChemCompBond)buildObject(
819                                        ChemCompBond.class.getName(),
820                                        loopFields,lineData, loopWarnings);
821                        triggerNewChemCompBond(bond);
822                } else if ( category.equals("_pdbx_chem_comp_identifier")){
823                        PdbxChemCompIdentifier id = (PdbxChemCompIdentifier)buildObject(
824                                        PdbxChemCompIdentifier.class.getName(),
825                                        loopFields,lineData, loopWarnings);
826                        triggerNewPdbxChemCompIdentifier(id);
827                } else if ( category.equals("_pdbx_chem_comp_descriptor")){
828                        PdbxChemCompDescriptor id = (PdbxChemCompDescriptor)buildObject(
829                                        PdbxChemCompDescriptor.class.getName(),
830                                        loopFields,lineData, loopWarnings);
831                        triggerNewPdbxChemCompDescriptor(id);
832                } else if ( category.equals("_struct_conn")){
833                        StructConn id = (StructConn)buildObject(
834                                        StructConn.class.getName(),
835                                        loopFields,lineData, loopWarnings);
836                        triggerNewStructConn(id);
837
838                } else {
839
840                        logger.debug("Using a generic bean for category {}",category);
841
842                        // trigger a generic bean that can deal with all missing data types...
843                        triggerGeneric(category,loopFields,lineData);
844                }
845
846
847        }
848
849
850//      private PdbxStructOperList getPdbxStructOperList(List<String> loopFields,
851//                      List<String> lineData) {
852//              PdbxStructOperList so = new PdbxStructOperList();
853//
854//              //System.out.println(loopFields);
855//              //System.out.println(lineData);
856//
857//              String id = lineData.get(loopFields.indexOf("id"));
858//              so.setId(id);
859//              so.setType(lineData.get(loopFields.indexOf("type")));
860//              Matrix matrix = new Matrix(3,3);
861//              for (int i = 1 ; i <=3 ; i++){
862//                      for (int j =1 ; j <= 3 ; j++){
863//                              String max = String.format("matrix[%d][%d]",j,i);
864//
865//                              String val = lineData.get(loopFields.indexOf(max));
866//                              Double d = Double.parseDouble(val);
867//                              matrix.set(j-1,i-1,d);
868//                              //                              matrix.set(i-1,j-1,d);
869//                      }
870//              }
871//
872//              double[] coords =new double[3];
873//
874//              for ( int i = 1; i <=3 ; i++){
875//                      String v = String.format("vector[%d]",i);
876//                      String val = lineData.get(loopFields.indexOf(v));
877//                      Double d = Double.parseDouble(val);
878//                      coords[i-1] = d;
879//              }
880//
881//              so.setMatrix(matrix);
882//              so.setVector(coords);
883//
884//
885//
886//              return so;
887//      }
888
889        public void triggerNewPdbxStructOper(PdbxStructOperList structOper) {
890                for(MMcifConsumer c : consumers){
891                        c.newPdbxStructOperList(structOper);
892                }
893
894        }
895
896        public void triggerNewStructNcsOper(StructNcsOper sNcsOper) {
897                for(MMcifConsumer c : consumers){
898                        c.newStructNcsOper(sNcsOper);
899                }
900
901        }
902        
903        public void triggerNewAtomSites(AtomSites atomSites) {
904                for(MMcifConsumer c : consumers){
905                        c.newAtomSites(atomSites);
906                }
907        }
908
909        /**
910         * Populates a bean object from  the {@link org.biojava.nbio.structure.io.mmcif.model} package, 
911         * from the data read from a CIF file.
912         * It uses reflection to lookup the field and setter method names given the category 
913         * found in the CIF file. 
914         * <p>
915         * Due to limitations in variable names in java, not all fields can have names 
916         * exactly as defined in the CIF categories. In those cases the {@link CIFLabel} tag
917         * can be used in the field names to give the appropriate name that corresponds to the
918         * CIF category, which is the name that will be then looked up here.
919         * The {@link IgnoreField} tag can also be used to exclude fields from being looked up.
920         * @param className
921         * @param loopFields
922         * @param lineData
923         * @param warnings
924         * @return
925         */
926        private Object buildObject(String className, List<String> loopFields, List<String> lineData, Set<String> warnings) {
927
928                Object o = null;
929                Class<?> c = null;
930
931                try {
932                        // build up the Entity object from the line data...
933                        c = Class.forName(className);
934
935                        o = c.newInstance();
936
937                } catch (InstantiationException|ClassNotFoundException|IllegalAccessException e){
938                        logger.error( "Error while constructing {}: {}", className, e.getMessage());
939                        return null;
940                } 
941
942                // these methods get the fields but also looking at the IgnoreField and CIFLabel annotations 
943                Field[] fields = MMCIFFileTools.getFields(c);
944                String[] names = MMCIFFileTools.getFieldNames(fields);
945
946                // let's build a map of all methods so that we can look up the setter methods later
947                Method[] methods = c.getMethods();
948
949                Map<String,Method> methodMap = new HashMap<String, Method>();
950                for (Method m : methods) {
951                        methodMap.put(m.getName(),m);
952                }
953
954                // and a map of all the fields so that we can lookup them up later
955                Map<String, Field> names2fields = new HashMap<>();
956                for (int i=0;i<fields.length;i++) {
957                        names2fields.put(names[i], fields[i]);
958                }
959                
960                int pos = -1 ;
961                for (String key: loopFields){
962                        pos++;
963
964                        String val = lineData.get(pos);
965                        
966                        // we first start looking up the field which can be annotated with a CIFLabel if they 
967                        // need alternative names (e.g. for field _symmetry.space_group_name_H-M, since hyphen is not allowed in var names in java)
968                        Field field = names2fields.get(key);
969                        
970                        if (field == null) {
971                                produceWarning(key, val, c, warnings);
972                                continue;
973                        }
974                        // now we need to find the corresponding setter
975                        // note that we can't use the field directly and then call Field.set() because many setters 
976                        // have more functionality than just setting the value (e.g. some setters in ChemComp)
977
978                        // building up the setter method name: need to upper case the first letter, leave the rest untouched
979                        String setterMethodName = "set" + field.getName().substring(0,1).toUpperCase() + field.getName().substring(1, field.getName().length());
980
981                        Method setter = methodMap.get(setterMethodName);
982                        
983                        if (setter==null) {
984                                produceWarning(key, val, c, warnings);
985                                continue;
986                        }
987                        
988                        
989
990                        // now we populate the object with the values by invoking the corresponding setter method,                      
991                        // note that all of the mmCif container classes have only one argument (they are beans)
992                        Class<?>[] pType  = setter.getParameterTypes();
993                        
994
995                        try {
996                                if ( pType[0].getName().equals(Integer.class.getName())) {
997                                        if ( val != null && ! val.equals("?") && !val.equals(".")) {
998
999                                                Integer intVal = Integer.parseInt(val);
1000                                                setter.invoke(o, intVal);
1001                                                
1002                                        }
1003                                } else {
1004                                        // default val is a String                                      
1005                                        setter.invoke(o, val);
1006                                }
1007                        } catch (IllegalAccessException|InvocationTargetException e) {
1008                                logger.error("Could not invoke setter {} with value {} for class {}", setterMethodName, val, className);
1009                        } 
1010
1011                }
1012
1013                return o;
1014        }
1015        
1016        private void produceWarning(String key, String val, Class<?> c, Set<String> warnings) {
1017
1018                String warning = "Trying to set field " + key + " in "+ c.getName() +" found in file, but no corresponding field could be found in model class (value:" + val + ")";
1019                String warnkey = key+"-"+c.getName();
1020                // Suppress duplicate warnings or attempts to store empty data
1021                if( val.equals("?") || val.equals(".") || ( warnings != null && warnings.contains(warnkey)) ) {
1022                        logger.debug(warning);
1023                } else {
1024                        logger.warn(warning);
1025                }
1026
1027                if(warnings != null) {
1028                        warnings.add(warnkey);
1029                }
1030
1031        }
1032
1033        public void triggerGeneric(String category, List<String> loopFields, List<String> lineData){
1034                for(MMcifConsumer c : consumers){
1035                        c.newGenericData(category, loopFields, lineData);
1036                }
1037        }
1038
1039        public void triggerNewEntity(Entity entity){
1040                for(MMcifConsumer c : consumers){
1041                        c.newEntity(entity);
1042                }
1043        }
1044
1045        public void triggerNewEntityPolySeq(EntityPolySeq epolseq){
1046                for(MMcifConsumer c : consumers){
1047                        c.newEntityPolySeq(epolseq);
1048                }
1049        }
1050        public void triggerNewEntitySrcGen(EntitySrcGen entitySrcGen){
1051                for(MMcifConsumer c : consumers){
1052                        c.newEntitySrcGen(entitySrcGen);
1053                }
1054        }
1055        public void triggerNewEntitySrcNat(EntitySrcNat entitySrcNat){
1056                for(MMcifConsumer c : consumers){
1057                        c.newEntitySrcNat(entitySrcNat);
1058                }
1059        }
1060        public void triggerNewEntitySrcSyn(EntitySrcSyn entitySrcSyn){
1061                for(MMcifConsumer c : consumers){
1062                        c.newEntitySrcSyn(entitySrcSyn);
1063                }
1064        }
1065        public void triggerNewChemComp(ChemComp cc){
1066
1067                for(MMcifConsumer c : consumers){
1068                        c.newChemComp(cc);
1069                }
1070        }
1071        public void triggerNewStructAsym(StructAsym sasym){
1072                for(MMcifConsumer c : consumers){
1073                        c.newStructAsym(sasym);
1074                }
1075        }
1076
1077        private void triggerStructData(Struct struct){
1078                for(MMcifConsumer c : consumers){
1079                        c.setStruct(struct);
1080                }
1081        }
1082
1083        private void triggerNewAtomSite(AtomSite atom){
1084                for(MMcifConsumer c : consumers){
1085                        c.newAtomSite(atom);
1086                }
1087        }
1088
1089        private void triggerNewAuditAuthor(AuditAuthor aa){
1090                for(MMcifConsumer c : consumers){
1091                        c.newAuditAuthor(aa);
1092                }
1093        }
1094        private void triggerNewDatabasePDBrev(DatabasePDBrev dbrev){
1095                for(MMcifConsumer c : consumers){
1096                        c.newDatabasePDBrev(dbrev);
1097                }
1098        }
1099        private void triggerNewDatabasePDBrevRecord(DatabasePdbrevRecord dbrev){
1100                for(MMcifConsumer c : consumers){
1101                        c.newDatabasePDBrevRecord(dbrev);
1102                }
1103        }
1104
1105        private void triggerNewDatabasePDBremark(DatabasePDBremark remark){
1106                for(MMcifConsumer c : consumers){
1107                        c.newDatabasePDBremark(remark);
1108                }
1109        }
1110
1111        private void triggerExptl(Exptl exptl){
1112                for(MMcifConsumer c : consumers){
1113                        c.newExptl(exptl);
1114                }
1115        }
1116
1117        private void triggerNewCell(Cell cell) {
1118                for(MMcifConsumer c : consumers){
1119                        c.newCell(cell);
1120                }
1121        }
1122
1123        private void triggerNewSymmetry(Symmetry symmetry) {
1124                for(MMcifConsumer c : consumers){
1125                        c.newSymmetry(symmetry);
1126                }
1127        }
1128
1129        private void triggerNewStrucRef(StructRef sref){
1130                for(MMcifConsumer c : consumers){
1131                        c.newStructRef(sref);
1132                }
1133        }
1134
1135        private void triggerNewStrucRefSeq(StructRefSeq sref){
1136                for(MMcifConsumer c : consumers){
1137                        c.newStructRefSeq(sref);
1138                }
1139        }
1140
1141        private void triggerNewStrucRefSeqDif(StructRefSeqDif sref){
1142                for(MMcifConsumer c : consumers){
1143                        c.newStructRefSeqDif(sref);
1144                }
1145        }
1146
1147        private void triggerNewPdbxPolySeqScheme(PdbxPolySeqScheme ppss){
1148                for(MMcifConsumer c : consumers){
1149                        c.newPdbxPolySeqScheme(ppss);
1150                }
1151        }
1152        private void triggerNewPdbxNonPolyScheme(PdbxNonPolyScheme ppss){
1153                for(MMcifConsumer c : consumers){
1154                        c.newPdbxNonPolyScheme(ppss);
1155                }
1156        }
1157        public void triggerNewPdbxEntityNonPoly(PdbxEntityNonPoly pen){
1158                for (MMcifConsumer c: consumers){
1159                        c.newPdbxEntityNonPoly(pen);
1160                }
1161        }
1162        public void triggerNewStructKeywords(StructKeywords kw){
1163                for (MMcifConsumer c: consumers){
1164                        c.newStructKeywords(kw);
1165                }
1166        }
1167        public void triggerNewRefine(Refine r){
1168                for (MMcifConsumer c: consumers){
1169                        c.newRefine(r);
1170                }
1171        }
1172        public void triggerDocumentStart(){
1173                for(MMcifConsumer c : consumers){
1174                        c.documentStart();
1175                }
1176        }
1177        public void triggerDocumentEnd(){
1178                for(MMcifConsumer c : consumers){
1179                        c.documentEnd();
1180                }
1181        }
1182        public void triggerNewChemCompDescriptor(ChemCompDescriptor ccd) {
1183                for(MMcifConsumer c : consumers){
1184                        c.newChemCompDescriptor(ccd);
1185                }
1186        }
1187        private void triggerNewPdbxStructAssembly(PdbxStructAssembly sa) {
1188                for(MMcifConsumer c : consumers){
1189                        c.newPdbxStrucAssembly(sa);
1190                }
1191        }
1192        private void triggerNewPdbxStructAssemblyGen(PdbxStructAssemblyGen sa) {
1193                for(MMcifConsumer c : consumers){
1194                        c.newPdbxStrucAssemblyGen(sa);
1195                }
1196        }
1197
1198        private void triggerNewChemCompAtom(ChemCompAtom atom) {
1199                for(MMcifConsumer c : consumers){
1200                        c.newChemCompAtom(atom);
1201                }
1202        }
1203
1204        private void triggerNewChemCompBond(ChemCompBond bond) {
1205                for(MMcifConsumer c : consumers){
1206                        c.newChemCompBond(bond);
1207                }
1208        }
1209
1210        private void triggerNewPdbxChemCompIdentifier(PdbxChemCompIdentifier id) {
1211                for(MMcifConsumer c : consumers){
1212                        c.newPdbxChemCompIndentifier(id);
1213                }
1214        }
1215        private void triggerNewPdbxChemCompDescriptor(PdbxChemCompDescriptor id) {
1216                for(MMcifConsumer c : consumers){
1217                        c.newPdbxChemCompDescriptor(id);
1218                }
1219        }
1220        private void triggerNewStructConn(StructConn id) {
1221                for(MMcifConsumer c : consumers){
1222                        c.newStructConn(id);
1223                }
1224        }
1225        private void triggerNewStructSiteGen(StructSiteGen id) {
1226                for (MMcifConsumer c : consumers) {
1227                        c.newStructSiteGen(id);
1228                }
1229        }
1230        private void triggerNewStructSite(StructSite id) {
1231                for (MMcifConsumer c : consumers) {
1232                        c.newStructSite(id);
1233                }
1234        }
1235}