001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * created at Mar 4, 2008
021 */
022package org.biojava.nbio.structure.io.mmcif;
023
024import java.io.BufferedReader;
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.InputStreamReader;
028import java.lang.reflect.Field;
029import java.lang.reflect.InvocationTargetException;
030import java.lang.reflect.Method;
031import java.util.ArrayList;
032import java.util.HashMap;
033import java.util.HashSet;
034import java.util.List;
035import java.util.Map;
036import java.util.Set;
037
038
039import org.biojava.nbio.structure.Structure;
040import org.biojava.nbio.structure.io.MMCIFFileReader;
041import org.biojava.nbio.structure.io.StructureIOFile;
042import org.biojava.nbio.structure.io.mmcif.model.AtomSite;
043import org.biojava.nbio.structure.io.mmcif.model.AtomSites;
044import org.biojava.nbio.structure.io.mmcif.model.AuditAuthor;
045import org.biojava.nbio.structure.io.mmcif.model.CIFLabel;
046import org.biojava.nbio.structure.io.mmcif.model.Cell;
047import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
048import org.biojava.nbio.structure.io.mmcif.model.ChemCompAtom;
049import org.biojava.nbio.structure.io.mmcif.model.ChemCompBond;
050import org.biojava.nbio.structure.io.mmcif.model.ChemCompDescriptor;
051import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBremark;
052import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBrev;
053import org.biojava.nbio.structure.io.mmcif.model.DatabasePdbrevRecord;
054import org.biojava.nbio.structure.io.mmcif.model.Entity;
055import org.biojava.nbio.structure.io.mmcif.model.EntityPoly;
056import org.biojava.nbio.structure.io.mmcif.model.EntityPolySeq;
057import org.biojava.nbio.structure.io.mmcif.model.EntitySrcGen;
058import org.biojava.nbio.structure.io.mmcif.model.EntitySrcNat;
059import org.biojava.nbio.structure.io.mmcif.model.EntitySrcSyn;
060import org.biojava.nbio.structure.io.mmcif.model.Exptl;
061import org.biojava.nbio.structure.io.mmcif.model.IgnoreField;
062import org.biojava.nbio.structure.io.mmcif.model.PdbxAuditRevisionHistory;
063import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompDescriptor;
064import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompIdentifier;
065import org.biojava.nbio.structure.io.mmcif.model.PdbxDatabaseStatus;
066import org.biojava.nbio.structure.io.mmcif.model.PdbxEntityNonPoly;
067import org.biojava.nbio.structure.io.mmcif.model.PdbxNonPolyScheme;
068import org.biojava.nbio.structure.io.mmcif.model.PdbxPolySeqScheme;
069import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssembly;
070import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssemblyGen;
071import org.biojava.nbio.structure.io.mmcif.model.PdbxStructOperList;
072import org.biojava.nbio.structure.io.mmcif.model.Refine;
073import org.biojava.nbio.structure.io.mmcif.model.Struct;
074import org.biojava.nbio.structure.io.mmcif.model.StructAsym;
075import org.biojava.nbio.structure.io.mmcif.model.StructConn;
076import org.biojava.nbio.structure.io.mmcif.model.StructKeywords;
077import org.biojava.nbio.structure.io.mmcif.model.StructNcsOper;
078import org.biojava.nbio.structure.io.mmcif.model.StructRef;
079import org.biojava.nbio.structure.io.mmcif.model.StructRefSeq;
080import org.biojava.nbio.structure.io.mmcif.model.StructRefSeqDif;
081import org.biojava.nbio.structure.io.mmcif.model.StructSite;
082import org.biojava.nbio.structure.io.mmcif.model.StructSiteGen;
083import org.biojava.nbio.structure.io.mmcif.model.Symmetry;
084import org.slf4j.Logger;
085import org.slf4j.LoggerFactory;
086
087/**
088 * A simple mmCif file parser
089 *
090 *
091 * Usage:
092 * <pre>
093String file = "path/to/mmcif/file";
094StructureIOFile pdbreader = new MMCIFFileReader();
095
096Structure s = pdbreader.getStructure(file);
097System.out.println(s);
098
099// you can convert it to a PDB file...
100System.out.println(s.toPDB());
101
102 * </pre>
103 * For more documentation see <a href="http://biojava.org/wiki/BioJava:CookBook#Protein_Structure">http://biojava.org/wiki/BioJava:CookBook#Protein_Structure</a>.
104 *
105 * @author Andreas Prlic
106 * @author Jose Duarte
107 * @since 1.7
108 */
109public class SimpleMMcifParser implements MMcifParser {
110
111
112
113        /**
114         * The header appearing at the beginning of a mmCIF file.
115         * A "block code" can be added to it of no more than 32 chars.
116         * See http://www.iucr.org/__data/assets/pdf_file/0019/22618/cifguide.pdf
117         */
118        public static final String MMCIF_TOP_HEADER = "data_";
119
120        public static final String COMMENT_CHAR = "#";
121        public static final String LOOP_START = "loop_";
122        public static final String FIELD_LINE = "_";
123
124        // the following are the 3 valid quoting characters in CIF
125        /**
126         * Quoting character '
127         */
128        private static final char S1 = '\'';
129
130        /**
131         * Quoting character "
132         */
133        private static final char S2 = '\"';
134
135        /**
136         * Quoting character ; (multi-line quoting)
137         */
138        public static final String STRING_LIMIT = ";";
139
140
141        private List<MMcifConsumer> consumers ;
142
143        private Struct struct ;
144
145        private static final Logger logger = LoggerFactory.getLogger(SimpleMMcifParser.class);
146
147        public SimpleMMcifParser(){
148                consumers = new ArrayList<MMcifConsumer>();
149                struct = null;
150        }
151
152        @Override
153        public void addMMcifConsumer(MMcifConsumer consumer) {
154                consumers.add(consumer);
155
156        }
157
158        @Override
159        public void clearConsumers() {
160                consumers.clear();
161
162        }
163
164        @Override
165        public void removeMMcifConsumer(MMcifConsumer consumer) {
166                consumers.remove(consumer);
167        }
168
169        public static void main(String[] args){
170                String file = "/Users/andreas/WORK/PDB/mmCif/a9/1a9n.cif.gz";
171                //String file = "/Users/andreas/WORK/PDB/MMCIF/1gav.mmcif";
172                //String file = "/Users/andreas/WORK/PDB/MMCIF/100d.cif";
173                //String file = "/Users/andreas/WORK/PDB/MMCIF/1a4a.mmcif";
174                System.out.println("parsing " + file);
175
176                StructureIOFile pdbreader = new MMCIFFileReader();
177                try {
178                        Structure s = pdbreader.getStructure(file);
179                        System.out.println(s);
180                        // convert it to a PDB file...
181                        System.out.println(s.toPDB());
182                } catch (IOException e) {
183                        e.printStackTrace();
184                }
185
186        }
187
188        @Override
189        public void parse(InputStream inStream) throws IOException {
190                parse(new BufferedReader(new InputStreamReader(inStream)));
191
192        }
193
194        @Override
195        public void parse(BufferedReader buf)
196                        throws IOException {
197
198                triggerDocumentStart();
199
200
201                // init container objects...
202                struct = new Struct();
203                String line = null;
204
205                boolean inLoop = false;
206                boolean inLoopData = false;
207
208
209                List<String> loopFields = new ArrayList<String>();
210                List<String> lineData   = new ArrayList<String>();
211                Set<String> loopWarnings = new HashSet<String>(); // used only to reduce logging statements
212
213                String category = null;
214                
215                boolean foundHeader = false;
216
217                while ( (line = buf.readLine ()) != null ){
218
219                        if (line.isEmpty() || line.startsWith(COMMENT_CHAR)) continue;
220
221                        if (!foundHeader) {
222                                // the first non-comment line is a data_PDBCODE line, test if this looks like a mmcif file
223                                if (line.startsWith(MMCIF_TOP_HEADER)){
224                                        foundHeader = true;
225                                        continue;
226                                } else {
227                                        triggerDocumentEnd();
228                                        throw new IOException("This does not look like a valid mmCIF file! The first line should start with 'data_', but is: '" + line+"'");
229                                }
230                        }
231
232                        logger.debug(inLoop + " " + line);
233
234                        if (line.startsWith(MMCIF_TOP_HEADER)){
235                                // either first line in file, or beginning of new section (data block in CIF parlance)
236                                if ( inLoop) {
237                                        //System.out.println("new data and in loop: " + line);
238                                        inLoop = false;
239                                        inLoopData = false;
240                                        lineData.clear();
241                                        loopFields.clear();
242                                }
243
244                        }
245
246
247                        if ( inLoop) {
248
249
250                                if ( line.startsWith(LOOP_START)){
251                                        loopFields.clear();
252                                        inLoop = true;
253                                        inLoopData = false;
254                                        continue;
255                                }
256
257                                if ( line.matches("\\s*"+FIELD_LINE+"\\w+.*")) {
258
259                                        if (inLoopData && line.startsWith(FIELD_LINE)) {
260                                                logger.debug("Found a field line after reading loop data. Toggling to inLoop=false");
261                                                inLoop = false;
262                                                inLoopData = false;
263                                                loopFields.clear();
264
265
266                                                // a boring normal line
267                                                List<String> data = processLine(line, buf, 2);
268
269                                                if ( data.size() < 1){
270                                                        // this can happen if empty lines at end of file
271                                                        lineData.clear();
272                                                        continue;
273                                                }
274                                                String key = data.get(0);
275                                                int pos = key.indexOf(".");
276                                                if ( pos < 0 ) {
277                                                        // looks like a chem_comp file
278                                                        // line should start with data, otherwise something is wrong!
279                                                        if (! line.startsWith(MMCIF_TOP_HEADER)){
280                                                                logger.warn("This does not look like a valid mmCIF file! The first line should start with 'data_', but is '" + line+"'");
281                                                                triggerDocumentEnd();
282                                                                return;
283                                                        }
284                                                        // ignore the first line...
285                                                        category=null;
286                                                        lineData.clear();
287                                                        continue;
288                                                }
289                                                category = key.substring(0,pos);
290                                                String value = data.get(1);
291                                                loopFields.add(key.substring(pos+1,key.length()));
292                                                lineData.add(value);
293
294                                                logger.debug("Found data for category {}: {}", key, value);
295                                                continue;
296                                        }
297
298                                        // found another field.
299                                        String txt = line.trim();
300                                        if ( txt.indexOf('.') > -1){
301
302                                                String[] spl = txt.split("\\.");
303                                                category = spl[0];
304                                                String attribute = spl[1];
305                                                loopFields.add(attribute);
306                                                logger.debug("Found category: {}, attribute: {}",category, attribute);
307                                                if ( spl.length > 2){
308                                                        logger.warn("Found nested attribute in {}, not supported yet!",txt);
309                                                }
310
311                                        } else {
312                                                category = txt;
313                                                logger.debug("Found category without attribute: {}",category);
314                                        }
315
316
317                                } else {
318
319                                        // in loop and we found a data line
320                                        lineData = processLine(line, buf, loopFields.size());
321                                        logger.debug("Found a loop data line with {} data fields", lineData.size());
322                                        logger.debug("Data fields: {}", lineData.toString());
323                                        if ( lineData.size() != loopFields.size()){
324                                                logger.warn("Expected {} data fields, but found {} in line: {}",loopFields.size(),lineData.size(),line);
325
326                                        }
327
328                                        endLineChecks(category, loopFields, lineData, loopWarnings);
329
330                                        lineData.clear();
331
332                                        inLoopData = true;
333                                }
334
335                        } else {
336                                // not in loop
337
338                                if ( line.startsWith(LOOP_START)){
339                                        if ( category != null)
340                                                endLineChecks(category, loopFields, lineData, loopWarnings);
341
342                                        resetBuffers(loopFields, lineData, loopWarnings);
343                                        category = null;
344                                        inLoop = true;
345                                        inLoopData = false;
346                                        logger.debug("Detected LOOP_START: '{}'. Toggling to inLoop=true", LOOP_START);
347                                        continue;
348                                } else {
349                                        logger.debug("Normal line ");
350                                        inLoop = false;
351
352                                        // a boring normal line
353                                        List<String> data = processLine(line, buf, 2);
354
355                                        if ( data.size() < 1){
356                                                // this can happen if empty lines at end of file
357                                                lineData.clear();
358                                                continue;
359                                        }
360                                        String key = data.get(0);
361                                        int pos = key.indexOf(".");
362                                        if ( pos < 0 ) {
363                                                // looks like a chem_comp file
364                                                // line should start with data, otherwise something is wrong!
365                                                if (! line.startsWith(MMCIF_TOP_HEADER)){
366                                                        logger.warn("This does not look like a valid mmCIF file! The first line should start with 'data_', but is '" + line+"'");
367                                                        triggerDocumentEnd();
368                                                        return;
369                                                }
370                                                // ignore the first line...
371                                                category=null;
372                                                lineData.clear();
373                                                continue;
374                                        }
375
376                                        if (category!=null && !key.substring(0,pos).equals(category)) {
377                                                // we've changed category: need to flush the previous one
378                                                endLineChecks(category, loopFields, lineData, loopWarnings);
379                                                resetBuffers(loopFields, lineData, loopWarnings);
380                                        }
381
382                                        category = key.substring(0,pos);
383
384                                        String value = data.get(1);
385                                        loopFields.add(key.substring(pos+1,key.length()));
386                                        lineData.add(value);
387
388                                        logger.debug("Found data for category {}: {}", key, value);
389
390                                }
391                        }
392                }
393
394                if (category!=null && lineData.size()>0 && lineData.size()==loopFields.size()) {
395                        // the last category in the file will still be missing, we add it now
396                        endLineChecks(category, loopFields, lineData, loopWarnings);
397                        resetBuffers(loopFields, lineData, loopWarnings);
398                }
399
400                if (struct != null){
401                        triggerStructData(struct);
402                }
403
404                triggerDocumentEnd();
405
406        }
407
408        private void resetBuffers(List<String> loopFields, List<String> lineData, Set<String> loopWarnings) {
409                loopFields.clear();
410                lineData.clear();
411                loopWarnings.clear();
412        }
413
414        private List<String> processSingleLine(String line){
415
416                List<String> data = new ArrayList<String>();
417
418                if ( line.trim().length() == 0){
419                        return data;
420                }
421
422                if ( line.trim().length() == 1){
423                        if ( line.startsWith(STRING_LIMIT))
424                                return data;
425                }
426                boolean inString = false; // semicolon (;) quoting
427                boolean inS1     = false; // single quote (') quoting
428                boolean inS2     = false; // double quote (") quoting
429                String word      = "";
430
431                for (int i=0; i< line.length(); i++ ){
432
433                        Character c = line.charAt(i);
434
435                        Character nextC = null;
436                        if (i < line.length() - 1)
437                                nextC = line.charAt(i+1);
438
439                        Character prevC = null;
440                        if (i>0)
441                                prevC = line.charAt(i-1);
442
443                        if  (c == ' ') {
444
445                                if ( ! inString){
446                                        if ( ! word.equals(""))
447                                                data.add(word.trim());
448                                        word = "";
449                                } else {
450                                        // we are in a string, add the space
451                                        word += c;
452                                }
453
454                        } else if (c == S1 )  {
455
456                                if ( inString){
457
458                                        boolean wordEnd = false;
459                                        if (! inS2) {
460                                                if (nextC==null || Character.isWhitespace(nextC)){
461                                                        i++;
462                                                        wordEnd = true;
463                                                }
464                                        }
465
466
467                                        if ( wordEnd ) {
468
469                                                // at end of string
470                                                if ( ! word.equals(""))
471                                                        data.add(word.trim());
472                                                word     = "";
473                                                inString = false;
474                                                inS1     = false;
475                                        } else {
476                                                word += c;
477                                        }
478
479                                } else if (prevC==null || prevC==' ') {
480                                        // the beginning of a new string
481                                        inString = true;
482                                        inS1     = true;
483                                } else {
484                                        word += c;
485                                }
486                        } else if ( c == S2 ){
487                                if ( inString){
488
489                                        boolean wordEnd = false;
490                                        if (! inS1) {
491                                                if (nextC==null || Character.isWhitespace(nextC)){
492                                                        i++;
493                                                        wordEnd = true;
494                                                }
495                                        }
496
497                                        if ( wordEnd ) {
498
499                                                // at end of string
500                                                if ( ! word.equals(""))
501                                                        data.add(word.trim());
502                                                word     = "";
503                                                inString = false;
504                                                inS2     = false;
505                                        } else {
506                                                word += c;
507                                        }
508                                }  else if (prevC==null || prevC==' ') {
509                                        // the beginning of a new string
510                                        inString = true;
511                                        inS2     = true;
512                                } else {
513                                        word += c;
514                                }
515                        } else {
516                                word += c;
517                        }
518
519                }
520                if ( ! word.trim().equals(""))
521                        data.add(word);
522
523
524                return data;
525
526        }
527
528        /**
529         * Get the content of a cif entry
530         *
531         * @param line
532         * @param buf
533         * @return
534         */
535        private List<String> processLine(String line,
536                        BufferedReader buf,
537                        int fieldLength)
538                                        throws IOException{
539
540                //System.out.println("XX processLine " + fieldLength + " " + line);
541                // go through the line and process each character
542                List<String> lineData = new ArrayList<String>();
543
544                boolean inString = false;
545
546                StringBuilder bigWord = null;
547
548                while ( true ){
549
550                        if ( line.startsWith(STRING_LIMIT)){
551                                if (! inString){
552
553                                        inString = true;
554                                        if ( line.length() > 1)
555                                                bigWord = new StringBuilder(line.substring(1));
556                                        else
557                                                bigWord = new StringBuilder("");
558
559
560                                } else {
561                                        // the end of a word
562                                        lineData.add(bigWord.toString());
563                                        bigWord = null;
564                                        inString = false;
565
566                                }
567                        } else {
568                                if ( inString )
569                                        bigWord.append(line);
570                                else {
571
572                                        List<String> dat = processSingleLine(line);
573
574                                        for (String d : dat){
575                                                lineData.add(d);
576                                        }
577                                }
578                        }
579
580                        //System.out.println("in process line : " + lineData.size() + " " + fieldLength);
581
582                        if ( lineData.size() > fieldLength){
583
584                                logger.warn("wrong data length ("+lineData.size()+
585                                                ") should be ("+fieldLength+") at line " + line + " got lineData: " + lineData);
586                                return lineData;
587                        }
588
589                        if ( lineData.size() == fieldLength)
590                                return lineData;
591
592
593                        line = buf.readLine();
594                        if ( line == null)
595                                break;
596                }
597                return lineData;
598
599        }
600
601
602
603        private void endLineChecks(String category,List<String> loopFields, List<String> lineData, Set<String> loopWarnings ) throws IOException{
604
605                logger.debug("Processing category {}, with fields: {}",category,loopFields.toString());
606                //              System.out.println("parsed the following data: " +category + " fields: "+
607                //                              loopFields + " DATA: " +
608                //                              lineData);
609
610                if ( loopFields.size() != lineData.size()){
611                        logger.warn("looks like we got a problem with nested string quote characters:");
612                        throw new IOException("data length ("+ lineData.size() +
613                                        ") != fields length ("+loopFields.size()+
614                                        ") category: " +category + " fields: "+
615                                        loopFields + " DATA: " +
616                                        lineData );
617                }
618
619                if ( category.equals("_entity")){
620
621                        Entity e =  (Entity) buildObject(
622                                        Entity.class.getName(),
623                                        loopFields,lineData, loopWarnings);
624                        triggerNewEntity(e);
625
626                } else if (category.equals("_entity_poly")) {
627                        EntityPoly ep = (EntityPoly) buildObject(EntityPoly.class.getName(), loopFields, lineData, loopWarnings);
628                        triggerNewEntityPoly(ep);
629                        
630                } else if ( category.equals("_struct")){
631
632                        struct =  (Struct) buildObject(
633                                        Struct.class.getName(),
634                                        loopFields, lineData, loopWarnings);
635
636                } else if ( category.equals("_atom_site")){
637
638                        AtomSite a = (AtomSite) buildObject(
639                                        AtomSite.class.getName(),
640                                        loopFields, lineData, loopWarnings);
641                        triggerNewAtomSite(a);
642
643                } else if ( category.equals("_database_PDB_rev")){
644                        DatabasePDBrev dbrev = (DatabasePDBrev) buildObject(
645                                        DatabasePDBrev.class.getName(),
646                                        loopFields, lineData, loopWarnings);
647
648                        triggerNewDatabasePDBrev(dbrev);
649
650                } else if ( category.equals("_database_PDB_rev_record")) {
651                        DatabasePdbrevRecord dbrev = (DatabasePdbrevRecord) buildObject(
652                                        DatabasePdbrevRecord.class.getName(),
653                                        loopFields, lineData, loopWarnings);
654
655                        triggerNewDatabasePDBrevRecord(dbrev);
656                        
657    // MMCIF version 5 dates  
658                } else if ( category.equals("_pdbx_audit_revision_history")) {
659                        PdbxAuditRevisionHistory history = (PdbxAuditRevisionHistory) buildObject(
660                                        PdbxAuditRevisionHistory.class.getName(),
661                                        loopFields, lineData, loopWarnings);
662
663                        triggerNewPdbxAuditRevisionHistory(history);
664    
665    // MMCIF version 5 dates
666                } else if ( category.equals("_pdbx_database_status")) {
667                        PdbxDatabaseStatus status = (PdbxDatabaseStatus) buildObject(
668                                        PdbxDatabaseStatus.class.getName(),
669                                        loopFields, lineData, loopWarnings);
670
671                        triggerNewPdbxDatabaseStatus(status);
672
673                }else if (  category.equals("_database_PDB_remark")) {
674                        DatabasePDBremark remark = (DatabasePDBremark) buildObject(
675                                        DatabasePDBremark.class.getName(),
676                                        loopFields, lineData, loopWarnings);
677
678                        triggerNewDatabasePDBremark(remark);
679
680                } else if ( category.equals("_exptl")){
681                        Exptl exptl  = (Exptl) buildObject(
682                                        Exptl.class.getName(),
683                                        loopFields,lineData, loopWarnings);
684
685                        triggerExptl(exptl);
686
687                } else if ( category.equals("_cell")){
688                        Cell cell  = (Cell) buildObject(
689                                        Cell.class.getName(),
690                                        loopFields,lineData, loopWarnings);
691
692                        triggerNewCell(cell);
693
694                } else if ( category.equals("_symmetry")){
695                        Symmetry symmetry  = (Symmetry) buildObject(
696                                        Symmetry.class.getName(),
697                                        loopFields,lineData, loopWarnings);
698
699                        triggerNewSymmetry(symmetry);
700                } else if ( category.equals("_struct_ncs_oper")) {
701
702                        StructNcsOper sNcsOper = (StructNcsOper) buildObject(
703                                        StructNcsOper.class.getName(), 
704                                        loopFields, lineData, loopWarnings);
705                        triggerNewStructNcsOper(sNcsOper);
706                } else if ( category.equals("_atom_sites")) {
707                        
708                        AtomSites atomSites = (AtomSites) buildObject(
709                                        AtomSites.class.getName(),
710                                        loopFields, lineData, loopWarnings);
711                        triggerNewAtomSites(atomSites);
712
713                } else if ( category.equals("_struct_ref")){
714                        StructRef sref  = (StructRef) buildObject(
715                                        StructRef.class.getName(),
716                                        loopFields,lineData, loopWarnings);
717
718                        triggerNewStrucRef(sref);
719
720                } else if ( category.equals("_struct_ref_seq")){
721                        StructRefSeq sref  = (StructRefSeq) buildObject(
722                                        StructRefSeq.class.getName(),
723                                        loopFields,lineData, loopWarnings);
724
725                        triggerNewStrucRefSeq(sref);
726                } else if ( category.equals("_struct_ref_seq_dif")) {
727                        StructRefSeqDif sref = (StructRefSeqDif) buildObject(
728                                        StructRefSeqDif.class.getName(),
729                                        loopFields, lineData, loopWarnings);
730
731                        triggerNewStrucRefSeqDif(sref);
732                } else if ( category.equals("_struct_site_gen")) {
733                        StructSiteGen sref = (StructSiteGen) buildObject(
734                                        StructSiteGen.class.getName(),
735                                        loopFields, lineData, loopWarnings);
736
737                        triggerNewStructSiteGen(sref);
738                } else if ( category.equals("_struct_site")) {
739                        StructSite sref = (StructSite) buildObject(
740                                        StructSite.class.getName(),
741                                        loopFields, lineData, loopWarnings);
742                        triggerNewStructSite(sref);
743                } else if ( category.equals("_entity_poly_seq")){
744                        EntityPolySeq exptl  = (EntityPolySeq) buildObject(
745                                        EntityPolySeq.class.getName(),
746                                        loopFields,lineData, loopWarnings);
747
748                        triggerNewEntityPolySeq(exptl);
749                } else if ( category.equals("_entity_src_gen")){
750                        EntitySrcGen entitySrcGen = (EntitySrcGen) buildObject(
751                                        EntitySrcGen.class.getName(),
752                                        loopFields,lineData, loopWarnings);
753                        triggerNewEntitySrcGen(entitySrcGen);
754                } else if ( category.equals("_entity_src_nat")){
755                        EntitySrcNat entitySrcNat = (EntitySrcNat) buildObject(
756                                        EntitySrcNat.class.getName(),
757                                        loopFields,lineData, loopWarnings);
758                        triggerNewEntitySrcNat(entitySrcNat);
759                } else if ( category.equals("_pdbx_entity_src_syn")){
760                        EntitySrcSyn entitySrcSyn = (EntitySrcSyn) buildObject(
761                                        EntitySrcSyn.class.getName(),
762                                        loopFields,lineData, loopWarnings);
763                        triggerNewEntitySrcSyn(entitySrcSyn);
764                } else if ( category.equals("_struct_asym")){
765                        StructAsym sasym  = (StructAsym) buildObject(
766                                        StructAsym.class.getName(),
767                                        loopFields,lineData, loopWarnings);
768
769                        triggerNewStructAsym(sasym);
770
771                } else if ( category.equals("_pdbx_poly_seq_scheme")){
772                        PdbxPolySeqScheme ppss  = (PdbxPolySeqScheme) buildObject(
773                                        PdbxPolySeqScheme.class.getName(),
774                                        loopFields,lineData, loopWarnings);
775
776                        triggerNewPdbxPolySeqScheme(ppss);
777
778                } else if ( category.equals("_pdbx_nonpoly_scheme")){
779                        PdbxNonPolyScheme ppss  = (PdbxNonPolyScheme) buildObject(
780                                        PdbxNonPolyScheme.class.getName(),
781                                        loopFields,lineData, loopWarnings);
782
783                        triggerNewPdbxNonPolyScheme(ppss);
784
785                } else if ( category.equals("_pdbx_entity_nonpoly")){
786                        PdbxEntityNonPoly pen = (PdbxEntityNonPoly) buildObject(
787                                        PdbxEntityNonPoly.class.getName(),
788                                        loopFields,lineData, loopWarnings
789                                        );
790                        triggerNewPdbxEntityNonPoly(pen);
791                } else if ( category.equals("_struct_keywords")){
792                        StructKeywords kw = (StructKeywords)buildObject(
793                                        StructKeywords.class.getName(),
794                                        loopFields,lineData, loopWarnings
795                                        );
796                        triggerNewStructKeywords(kw);
797                } else if (category.equals("_refine")){
798                        Refine r = (Refine)buildObject(
799                                        Refine.class.getName(),
800                                        loopFields,lineData, loopWarnings
801                                        );
802                        triggerNewRefine(r);
803                } else if (category.equals("_chem_comp")){
804                        ChemComp c = (ChemComp)buildObject(
805                                        ChemComp.class.getName(),
806                                        loopFields, lineData, loopWarnings
807                                        );
808                        triggerNewChemComp(c);
809                } else if (category.equals("_audit_author")) {
810                        AuditAuthor aa = (AuditAuthor)buildObject(
811                                        AuditAuthor.class.getName(),
812                                        loopFields, lineData, loopWarnings);
813                        triggerNewAuditAuthor(aa);
814                } else if (category.equals("_pdbx_chem_comp_descriptor")) {
815                        ChemCompDescriptor ccd = (ChemCompDescriptor) buildObject(
816                                        ChemCompDescriptor.class.getName(),
817                                        loopFields, lineData, loopWarnings);
818                        triggerNewChemCompDescriptor(ccd);
819                } else if (category.equals("_pdbx_struct_oper_list")) {
820
821                        PdbxStructOperList structOper = (PdbxStructOperList) buildObject(
822                                        PdbxStructOperList.class.getName(),
823                                        loopFields, lineData, loopWarnings
824                                        );
825                        triggerNewPdbxStructOper(structOper);
826
827                } else if (category.equals("_pdbx_struct_assembly")) {
828                        PdbxStructAssembly sa = (PdbxStructAssembly) buildObject(
829                                        PdbxStructAssembly.class.getName(),
830                                        loopFields, lineData, loopWarnings);
831                        triggerNewPdbxStructAssembly(sa);
832
833                } else if (category.equals("_pdbx_struct_assembly_gen")) {
834                        PdbxStructAssemblyGen sa = (PdbxStructAssemblyGen) buildObject(
835                                        PdbxStructAssemblyGen.class.getName(),
836                                        loopFields, lineData, loopWarnings);
837                        triggerNewPdbxStructAssemblyGen(sa);
838                } else if ( category.equals("_chem_comp_atom")){
839                        ChemCompAtom atom = (ChemCompAtom)buildObject(
840                                        ChemCompAtom.class.getName(),
841                                        loopFields,lineData, loopWarnings);
842                        triggerNewChemCompAtom(atom);
843
844                }else if ( category.equals("_chem_comp_bond")){
845                        ChemCompBond bond = (ChemCompBond)buildObject(
846                                        ChemCompBond.class.getName(),
847                                        loopFields,lineData, loopWarnings);
848                        triggerNewChemCompBond(bond);
849                } else if ( category.equals("_pdbx_chem_comp_identifier")){
850                        PdbxChemCompIdentifier id = (PdbxChemCompIdentifier)buildObject(
851                                        PdbxChemCompIdentifier.class.getName(),
852                                        loopFields,lineData, loopWarnings);
853                        triggerNewPdbxChemCompIdentifier(id);
854                } else if ( category.equals("_pdbx_chem_comp_descriptor")){
855                        PdbxChemCompDescriptor id = (PdbxChemCompDescriptor)buildObject(
856                                        PdbxChemCompDescriptor.class.getName(),
857                                        loopFields,lineData, loopWarnings);
858                        triggerNewPdbxChemCompDescriptor(id);
859                } else if ( category.equals("_struct_conn")){
860                        StructConn id = (StructConn)buildObject(
861                                        StructConn.class.getName(),
862                                        loopFields,lineData, loopWarnings);
863                        triggerNewStructConn(id);
864
865                } else {
866
867                        logger.debug("Using a generic bean for category {}",category);
868
869                        // trigger a generic bean that can deal with all missing data types...
870                        triggerGeneric(category,loopFields,lineData);
871                }
872
873
874        }
875
876
877//      private PdbxStructOperList getPdbxStructOperList(List<String> loopFields,
878//                      List<String> lineData) {
879//              PdbxStructOperList so = new PdbxStructOperList();
880//
881//              //System.out.println(loopFields);
882//              //System.out.println(lineData);
883//
884//              String id = lineData.get(loopFields.indexOf("id"));
885//              so.setId(id);
886//              so.setType(lineData.get(loopFields.indexOf("type")));
887//              Matrix matrix = new Matrix(3,3);
888//              for (int i = 1 ; i <=3 ; i++){
889//                      for (int j =1 ; j <= 3 ; j++){
890//                              String max = String.format("matrix[%d][%d]",j,i);
891//
892//                              String val = lineData.get(loopFields.indexOf(max));
893//                              Double d = Double.parseDouble(val);
894//                              matrix.set(j-1,i-1,d);
895//                              //                              matrix.set(i-1,j-1,d);
896//                      }
897//              }
898//
899//              double[] coords =new double[3];
900//
901//              for ( int i = 1; i <=3 ; i++){
902//                      String v = String.format("vector[%d]",i);
903//                      String val = lineData.get(loopFields.indexOf(v));
904//                      Double d = Double.parseDouble(val);
905//                      coords[i-1] = d;
906//              }
907//
908//              so.setMatrix(matrix);
909//              so.setVector(coords);
910//
911//
912//
913//              return so;
914//      }
915
916        public void triggerNewPdbxStructOper(PdbxStructOperList structOper) {
917                for(MMcifConsumer c : consumers){
918                        c.newPdbxStructOperList(structOper);
919                }
920
921        }
922
923        public void triggerNewStructNcsOper(StructNcsOper sNcsOper) {
924                for(MMcifConsumer c : consumers){
925                        c.newStructNcsOper(sNcsOper);
926                }
927
928        }
929        
930        public void triggerNewAtomSites(AtomSites atomSites) {
931                for(MMcifConsumer c : consumers){
932                        c.newAtomSites(atomSites);
933                }
934        }
935
936        /**
937         * Populates a bean object from  the {@link org.biojava.nbio.structure.io.mmcif.model} package, 
938         * from the data read from a CIF file.
939         * It uses reflection to lookup the field and setter method names given the category 
940         * found in the CIF file. 
941         * <p>
942         * Due to limitations in variable names in java, not all fields can have names 
943         * exactly as defined in the CIF categories. In those cases the {@link CIFLabel} tag
944         * can be used in the field names to give the appropriate name that corresponds to the
945         * CIF category, which is the name that will be then looked up here.
946         * The {@link IgnoreField} tag can also be used to exclude fields from being looked up.
947         * @param className
948         * @param loopFields
949         * @param lineData
950         * @param warnings
951         * @return
952         */
953        private Object buildObject(String className, List<String> loopFields, List<String> lineData, Set<String> warnings) {
954
955                Object o = null;
956                Class<?> c = null;
957
958                try {
959                        // build up the Entity object from the line data...
960                        c = Class.forName(className);
961
962                        o = c.newInstance();
963
964                } catch (InstantiationException|ClassNotFoundException|IllegalAccessException e){
965                        logger.error( "Error while constructing {}: {}", className, e.getMessage());
966                        return null;
967                } 
968
969                // these methods get the fields but also looking at the IgnoreField and CIFLabel annotations 
970                Field[] fields = MMCIFFileTools.getFields(c);
971                String[] names = MMCIFFileTools.getFieldNames(fields);
972
973                // let's build a map of all methods so that we can look up the setter methods later
974                Method[] methods = c.getMethods();
975
976                Map<String,Method> methodMap = new HashMap<String, Method>();
977                for (Method m : methods) {
978                        methodMap.put(m.getName(),m);
979                }
980
981                // and a map of all the fields so that we can lookup them up later
982                Map<String, Field> names2fields = new HashMap<>();
983                for (int i=0;i<fields.length;i++) {
984                        names2fields.put(names[i], fields[i]);
985                }
986                
987                int pos = -1 ;
988                for (String key: loopFields){
989                        pos++;
990
991                        String val = lineData.get(pos);
992                        
993                        // we first start looking up the field which can be annotated with a CIFLabel if they 
994                        // need alternative names (e.g. for field _symmetry.space_group_name_H-M, since hyphen is not allowed in var names in java)
995                        Field field = names2fields.get(key);
996                        
997                        if (field == null) {
998                                produceWarning(key, val, c, warnings);
999                                continue;
1000                        }
1001                        // now we need to find the corresponding setter
1002                        // note that we can't use the field directly and then call Field.set() because many setters 
1003                        // have more functionality than just setting the value (e.g. some setters in ChemComp)
1004
1005                        // building up the setter method name: need to upper case the first letter, leave the rest untouched
1006                        String setterMethodName = "set" + field.getName().substring(0,1).toUpperCase() + field.getName().substring(1, field.getName().length());
1007
1008                        Method setter = methodMap.get(setterMethodName);
1009                        
1010                        if (setter==null) {
1011                                produceWarning(key, val, c, warnings);
1012                                continue;
1013                        }
1014                        
1015                        
1016
1017                        // now we populate the object with the values by invoking the corresponding setter method,                      
1018                        // note that all of the mmCif container classes have only one argument (they are beans)
1019                        Class<?>[] pType  = setter.getParameterTypes();
1020                        
1021
1022                        try {
1023                                if ( pType[0].getName().equals(Integer.class.getName())) {
1024                                        if ( val != null && ! val.equals("?") && !val.equals(".")) {
1025
1026                                                Integer intVal = Integer.parseInt(val);
1027                                                setter.invoke(o, intVal);
1028                                                
1029                                        }
1030                                } else {
1031                                        // default val is a String                                      
1032                                        setter.invoke(o, val);
1033                                }
1034                        } catch (IllegalAccessException|InvocationTargetException e) {
1035                                logger.error("Could not invoke setter {} with value {} for class {}", setterMethodName, val, className);
1036                        } 
1037
1038                }
1039
1040                return o;
1041        }
1042        
1043        private void produceWarning(String key, String val, Class<?> c, Set<String> warnings) {
1044
1045                String warning = "Trying to set field " + key + " in "+ c.getName() +" found in file, but no corresponding field could be found in model class (value:" + val + ")";
1046                String warnkey = key+"-"+c.getName();
1047                // Suppress duplicate warnings or attempts to store empty data
1048                if( val.equals("?") || val.equals(".") || ( warnings != null && warnings.contains(warnkey)) ) {
1049                        logger.debug(warning);
1050                } else {
1051                        logger.info(warning);
1052                }
1053
1054                if(warnings != null) {
1055                        warnings.add(warnkey);
1056                }
1057
1058        }
1059
1060        public void triggerGeneric(String category, List<String> loopFields, List<String> lineData){
1061                for(MMcifConsumer c : consumers){
1062                        c.newGenericData(category, loopFields, lineData);
1063                }
1064        }
1065
1066        public void triggerNewEntity(Entity entity){
1067                for(MMcifConsumer c : consumers){
1068                        c.newEntity(entity);
1069                }
1070        }
1071        
1072        public void triggerNewEntityPoly(EntityPoly entityPoly) {
1073                for(MMcifConsumer c : consumers){
1074                        c.newEntityPoly(entityPoly);
1075                }               
1076        }
1077
1078        public void triggerNewEntityPolySeq(EntityPolySeq epolseq){
1079                for(MMcifConsumer c : consumers){
1080                        c.newEntityPolySeq(epolseq);
1081                }
1082        }
1083        public void triggerNewEntitySrcGen(EntitySrcGen entitySrcGen){
1084                for(MMcifConsumer c : consumers){
1085                        c.newEntitySrcGen(entitySrcGen);
1086                }
1087        }
1088        public void triggerNewEntitySrcNat(EntitySrcNat entitySrcNat){
1089                for(MMcifConsumer c : consumers){
1090                        c.newEntitySrcNat(entitySrcNat);
1091                }
1092        }
1093        public void triggerNewEntitySrcSyn(EntitySrcSyn entitySrcSyn){
1094                for(MMcifConsumer c : consumers){
1095                        c.newEntitySrcSyn(entitySrcSyn);
1096                }
1097        }
1098        public void triggerNewChemComp(ChemComp cc){
1099
1100                for(MMcifConsumer c : consumers){
1101                        c.newChemComp(cc);
1102                }
1103        }
1104        public void triggerNewStructAsym(StructAsym sasym){
1105                for(MMcifConsumer c : consumers){
1106                        c.newStructAsym(sasym);
1107                }
1108        }
1109
1110        private void triggerStructData(Struct struct){
1111                for(MMcifConsumer c : consumers){
1112                        c.setStruct(struct);
1113                }
1114        }
1115
1116        private void triggerNewAtomSite(AtomSite atom){
1117                for(MMcifConsumer c : consumers){
1118                        c.newAtomSite(atom);
1119                }
1120        }
1121
1122        private void triggerNewAuditAuthor(AuditAuthor aa){
1123                for(MMcifConsumer c : consumers){
1124                        c.newAuditAuthor(aa);
1125                }
1126        }
1127        
1128        private void triggerNewPdbxAuditRevisionHistory(PdbxAuditRevisionHistory history) {
1129                for(MMcifConsumer c : consumers){
1130                        c.newPdbxAuditRevisionHistory(history);
1131                }
1132        }
1133        
1134        private void triggerNewPdbxDatabaseStatus(PdbxDatabaseStatus status) {
1135                for(MMcifConsumer c : consumers){
1136                        c.newPdbxDatabaseStatus(status);
1137                }
1138        }
1139        
1140        private void triggerNewDatabasePDBrev(DatabasePDBrev dbrev){
1141                for(MMcifConsumer c : consumers){
1142                        c.newDatabasePDBrev(dbrev);
1143                }
1144        }
1145        private void triggerNewDatabasePDBrevRecord(DatabasePdbrevRecord dbrev){
1146                for(MMcifConsumer c : consumers){
1147                        c.newDatabasePDBrevRecord(dbrev);
1148                }
1149        }
1150
1151        private void triggerNewDatabasePDBremark(DatabasePDBremark remark){
1152                for(MMcifConsumer c : consumers){
1153                        c.newDatabasePDBremark(remark);
1154                }
1155        }
1156
1157        private void triggerExptl(Exptl exptl){
1158                for(MMcifConsumer c : consumers){
1159                        c.newExptl(exptl);
1160                }
1161        }
1162
1163        private void triggerNewCell(Cell cell) {
1164                for(MMcifConsumer c : consumers){
1165                        c.newCell(cell);
1166                }
1167        }
1168
1169        private void triggerNewSymmetry(Symmetry symmetry) {
1170                for(MMcifConsumer c : consumers){
1171                        c.newSymmetry(symmetry);
1172                }
1173        }
1174
1175        private void triggerNewStrucRef(StructRef sref){
1176                for(MMcifConsumer c : consumers){
1177                        c.newStructRef(sref);
1178                }
1179        }
1180
1181        private void triggerNewStrucRefSeq(StructRefSeq sref){
1182                for(MMcifConsumer c : consumers){
1183                        c.newStructRefSeq(sref);
1184                }
1185        }
1186
1187        private void triggerNewStrucRefSeqDif(StructRefSeqDif sref){
1188                for(MMcifConsumer c : consumers){
1189                        c.newStructRefSeqDif(sref);
1190                }
1191        }
1192
1193        private void triggerNewPdbxPolySeqScheme(PdbxPolySeqScheme ppss){
1194                for(MMcifConsumer c : consumers){
1195                        c.newPdbxPolySeqScheme(ppss);
1196                }
1197        }
1198        private void triggerNewPdbxNonPolyScheme(PdbxNonPolyScheme ppss){
1199                for(MMcifConsumer c : consumers){
1200                        c.newPdbxNonPolyScheme(ppss);
1201                }
1202        }
1203        public void triggerNewPdbxEntityNonPoly(PdbxEntityNonPoly pen){
1204                for (MMcifConsumer c: consumers){
1205                        c.newPdbxEntityNonPoly(pen);
1206                }
1207        }
1208        public void triggerNewStructKeywords(StructKeywords kw){
1209                for (MMcifConsumer c: consumers){
1210                        c.newStructKeywords(kw);
1211                }
1212        }
1213        public void triggerNewRefine(Refine r){
1214                for (MMcifConsumer c: consumers){
1215                        c.newRefine(r);
1216                }
1217        }
1218        public void triggerDocumentStart(){
1219                for(MMcifConsumer c : consumers){
1220                        c.documentStart();
1221                }
1222        }
1223        public void triggerDocumentEnd(){
1224                for(MMcifConsumer c : consumers){
1225                        c.documentEnd();
1226                }
1227        }
1228        public void triggerNewChemCompDescriptor(ChemCompDescriptor ccd) {
1229                for(MMcifConsumer c : consumers){
1230                        c.newChemCompDescriptor(ccd);
1231                }
1232        }
1233        private void triggerNewPdbxStructAssembly(PdbxStructAssembly sa) {
1234                for(MMcifConsumer c : consumers){
1235                        c.newPdbxStrucAssembly(sa);
1236                }
1237        }
1238        private void triggerNewPdbxStructAssemblyGen(PdbxStructAssemblyGen sa) {
1239                for(MMcifConsumer c : consumers){
1240                        c.newPdbxStrucAssemblyGen(sa);
1241                }
1242        }
1243
1244        private void triggerNewChemCompAtom(ChemCompAtom atom) {
1245                for(MMcifConsumer c : consumers){
1246                        c.newChemCompAtom(atom);
1247                }
1248        }
1249
1250        private void triggerNewChemCompBond(ChemCompBond bond) {
1251                for(MMcifConsumer c : consumers){
1252                        c.newChemCompBond(bond);
1253                }
1254        }
1255
1256        private void triggerNewPdbxChemCompIdentifier(PdbxChemCompIdentifier id) {
1257                for(MMcifConsumer c : consumers){
1258                        c.newPdbxChemCompIndentifier(id);
1259                }
1260        }
1261        private void triggerNewPdbxChemCompDescriptor(PdbxChemCompDescriptor id) {
1262                for(MMcifConsumer c : consumers){
1263                        c.newPdbxChemCompDescriptor(id);
1264                }
1265        }
1266        private void triggerNewStructConn(StructConn id) {
1267                for(MMcifConsumer c : consumers){
1268                        c.newStructConn(id);
1269                }
1270        }
1271        private void triggerNewStructSiteGen(StructSiteGen id) {
1272                for (MMcifConsumer c : consumers) {
1273                        c.newStructSiteGen(id);
1274                }
1275        }
1276        private void triggerNewStructSite(StructSite id) {
1277                for (MMcifConsumer c : consumers) {
1278                        c.newStructSite(id);
1279                }
1280        }
1281}