001package org.biojava.bio.program.formats;
002
003import java.util.regex.Pattern;
004
005import org.biojava.bio.AnnotationType;
006import org.biojava.bio.CardinalityConstraint;
007import org.biojava.bio.EcNumber;
008import org.biojava.bio.PropertyConstraint;
009import org.biojava.bio.program.tagvalue.BoundaryFinder;
010import org.biojava.bio.program.tagvalue.ChangeTable;
011import org.biojava.bio.program.tagvalue.LineSplitParser;
012import org.biojava.bio.program.tagvalue.MultiTagger;
013import org.biojava.bio.program.tagvalue.ParserListener;
014import org.biojava.bio.program.tagvalue.RegexFieldFinder;
015import org.biojava.bio.program.tagvalue.RegexSplitter;
016import org.biojava.bio.program.tagvalue.TagDelegator;
017import org.biojava.bio.program.tagvalue.TagValueListener;
018import org.biojava.bio.program.tagvalue.ValueChanger;
019import org.biojava.bio.symbol.Location;
020import org.biojava.utils.lsid.LifeScienceIdentifier;
021
022public class Enzyme
023implements Format {
024  private static final AnnotationType ANNO_TYPE;
025  private static final LineSplitParser PARSER;
026  private static final LifeScienceIdentifier LSID;
027
028  static {
029    LSID = LifeScienceIdentifier.valueOf("open-bio.org", "format", "enzyme");
030
031    Location NONE = CardinalityConstraint.NONE;
032    Location ANY = CardinalityConstraint.ANY;
033    Location ONE = CardinalityConstraint.ONE;
034
035    PARSER = new LineSplitParser(LineSplitParser.EMBL);
036
037    PropertyConstraint c_string = new PropertyConstraint.ByClass(String.class);
038    PropertyConstraint c_ecNumber = new PropertyConstraint.ByClass(EcNumber.class);
039
040    AnnotationType DI = new AnnotationType.Impl();
041    DI.setDefaultConstraints(PropertyConstraint.NONE, NONE);
042    DI.setConstraints("Disease_name", c_string, ONE);
043    DI.setConstraints("MIM:Number", c_string, ONE);
044    PropertyConstraint c_diType = new PropertyConstraint.ByAnnotationType(DI);
045
046    ANNO_TYPE = new AnnotationType.Impl();
047    ANNO_TYPE.setDefaultConstraints(PropertyConstraint.NONE, NONE);
048    ANNO_TYPE.setConstraints("ID", c_ecNumber, ONE);
049    ANNO_TYPE.setConstraints("DE", c_string, ONE);
050    ANNO_TYPE.setConstraints("AN", c_string, ANY);
051    ANNO_TYPE.setConstraints("CA", c_string, ANY);
052    ANNO_TYPE.setConstraints("CF", c_string, ANY);
053    ANNO_TYPE.setConstraints("CC", c_string, ANY);
054    ANNO_TYPE.setConstraints("DI", c_diType, ANY);
055    ANNO_TYPE.setConstraints("PR", c_string, ANY);
056    ANNO_TYPE.setConstraints("DR", c_string, ANY);
057  }
058
059  public ParserListener getParserListener(TagValueListener listener) {
060    ChangeTable.Changer trailingDotStripper = new ChangeTable.Changer() {
061      public Object change(Object value) {
062        String val = (String) value;
063        if(val.endsWith(".")) {
064          return val.substring(0, val.length() - 1);
065        } else {
066          return val;
067        }
068      }
069    };
070
071    ChangeTable changeTable = new ChangeTable();
072
073    changeTable.setChanger("ID", new ChangeTable.Changer() {
074      public Object change(Object value) {
075        return EcNumber.Impl.valueOf((String) value);
076      }
077    });
078    changeTable.setChanger("AN", trailingDotStripper);
079    changeTable.setChanger("DE", trailingDotStripper);
080    changeTable.setChanger("CA", trailingDotStripper);
081    changeTable.setChanger("CF", trailingDotStripper);
082    changeTable.setSplitter("DR", new RegexSplitter(
083      Pattern.compile("\\S+,\\s*\\S+;"),
084      0 ));
085
086    ValueChanger valueChanger = new ValueChanger(listener, changeTable);
087
088    MultiTagger dotMultiTag = new MultiTagger(
089      valueChanger,
090      new BoundaryFinder() {
091        public boolean dropBoundaryValues() { return false; }
092        public boolean isBoundaryStart(Object value) { return false; }
093        public boolean isBoundaryEnd(Object value) {
094          return ((String) value).endsWith(".");
095        }
096      }
097    );
098
099    MultiTagger commentMultiTag = new MultiTagger(
100      valueChanger,
101      new BoundaryFinder() {
102        public boolean dropBoundaryValues() { return false; }
103        public boolean isBoundaryStart(Object value) {
104          return ((String) value).startsWith("-!-");
105        }
106        public boolean isBoundaryEnd(Object value) { return false; }
107      }
108    );
109
110    TagDelegator tagDelegator = new TagDelegator(valueChanger);
111    tagDelegator.setListener("AN", dotMultiTag);
112    tagDelegator.setListener("CA", dotMultiTag);
113    tagDelegator.setListener("CC", commentMultiTag);
114    tagDelegator.setListener("DI", new RegexFieldFinder(
115      valueChanger,
116      Pattern.compile("([^;]+);\\s*MIM:\\s*(\\S+)\\."),
117      new String[] { "Disease_name", "MIM:Number" },
118      false ));
119
120
121    return new ParserListener(PARSER, tagDelegator);
122  }
123
124  public AnnotationType getType() {
125    return ANNO_TYPE;
126  }
127
128  public LifeScienceIdentifier getLSID() {
129    return LSID;
130  }
131}