001package org.biojava.bio.program.formats; 002 003import java.util.regex.Pattern; 004 005import org.biojava.bio.AnnotationType; 006import org.biojava.bio.CardinalityConstraint; 007import org.biojava.bio.EcNumber; 008import org.biojava.bio.PropertyConstraint; 009import org.biojava.bio.program.tagvalue.BoundaryFinder; 010import org.biojava.bio.program.tagvalue.ChangeTable; 011import org.biojava.bio.program.tagvalue.LineSplitParser; 012import org.biojava.bio.program.tagvalue.MultiTagger; 013import org.biojava.bio.program.tagvalue.ParserListener; 014import org.biojava.bio.program.tagvalue.RegexFieldFinder; 015import org.biojava.bio.program.tagvalue.RegexSplitter; 016import org.biojava.bio.program.tagvalue.TagDelegator; 017import org.biojava.bio.program.tagvalue.TagValueListener; 018import org.biojava.bio.program.tagvalue.ValueChanger; 019import org.biojava.bio.symbol.Location; 020import org.biojava.utils.lsid.LifeScienceIdentifier; 021 022public class Enzyme 023implements Format { 024 private static final AnnotationType ANNO_TYPE; 025 private static final LineSplitParser PARSER; 026 private static final LifeScienceIdentifier LSID; 027 028 static { 029 LSID = LifeScienceIdentifier.valueOf("open-bio.org", "format", "enzyme"); 030 031 Location NONE = CardinalityConstraint.NONE; 032 Location ANY = CardinalityConstraint.ANY; 033 Location ONE = CardinalityConstraint.ONE; 034 035 PARSER = new LineSplitParser(LineSplitParser.EMBL); 036 037 PropertyConstraint c_string = new PropertyConstraint.ByClass(String.class); 038 PropertyConstraint c_ecNumber = new PropertyConstraint.ByClass(EcNumber.class); 039 040 AnnotationType DI = new AnnotationType.Impl(); 041 DI.setDefaultConstraints(PropertyConstraint.NONE, NONE); 042 DI.setConstraints("Disease_name", c_string, ONE); 043 DI.setConstraints("MIM:Number", c_string, ONE); 044 PropertyConstraint c_diType = new PropertyConstraint.ByAnnotationType(DI); 045 046 ANNO_TYPE = new AnnotationType.Impl(); 047 ANNO_TYPE.setDefaultConstraints(PropertyConstraint.NONE, NONE); 048 ANNO_TYPE.setConstraints("ID", c_ecNumber, ONE); 049 ANNO_TYPE.setConstraints("DE", c_string, ONE); 050 ANNO_TYPE.setConstraints("AN", c_string, ANY); 051 ANNO_TYPE.setConstraints("CA", c_string, ANY); 052 ANNO_TYPE.setConstraints("CF", c_string, ANY); 053 ANNO_TYPE.setConstraints("CC", c_string, ANY); 054 ANNO_TYPE.setConstraints("DI", c_diType, ANY); 055 ANNO_TYPE.setConstraints("PR", c_string, ANY); 056 ANNO_TYPE.setConstraints("DR", c_string, ANY); 057 } 058 059 public ParserListener getParserListener(TagValueListener listener) { 060 ChangeTable.Changer trailingDotStripper = new ChangeTable.Changer() { 061 public Object change(Object value) { 062 String val = (String) value; 063 if(val.endsWith(".")) { 064 return val.substring(0, val.length() - 1); 065 } else { 066 return val; 067 } 068 } 069 }; 070 071 ChangeTable changeTable = new ChangeTable(); 072 073 changeTable.setChanger("ID", new ChangeTable.Changer() { 074 public Object change(Object value) { 075 return EcNumber.Impl.valueOf((String) value); 076 } 077 }); 078 changeTable.setChanger("AN", trailingDotStripper); 079 changeTable.setChanger("DE", trailingDotStripper); 080 changeTable.setChanger("CA", trailingDotStripper); 081 changeTable.setChanger("CF", trailingDotStripper); 082 changeTable.setSplitter("DR", new RegexSplitter( 083 Pattern.compile("\\S+,\\s*\\S+;"), 084 0 )); 085 086 ValueChanger valueChanger = new ValueChanger(listener, changeTable); 087 088 MultiTagger dotMultiTag = new MultiTagger( 089 valueChanger, 090 new BoundaryFinder() { 091 public boolean dropBoundaryValues() { return false; } 092 public boolean isBoundaryStart(Object value) { return false; } 093 public boolean isBoundaryEnd(Object value) { 094 return ((String) value).endsWith("."); 095 } 096 } 097 ); 098 099 MultiTagger commentMultiTag = new MultiTagger( 100 valueChanger, 101 new BoundaryFinder() { 102 public boolean dropBoundaryValues() { return false; } 103 public boolean isBoundaryStart(Object value) { 104 return ((String) value).startsWith("-!-"); 105 } 106 public boolean isBoundaryEnd(Object value) { return false; } 107 } 108 ); 109 110 TagDelegator tagDelegator = new TagDelegator(valueChanger); 111 tagDelegator.setListener("AN", dotMultiTag); 112 tagDelegator.setListener("CA", dotMultiTag); 113 tagDelegator.setListener("CC", commentMultiTag); 114 tagDelegator.setListener("DI", new RegexFieldFinder( 115 valueChanger, 116 Pattern.compile("([^;]+);\\s*MIM:\\s*(\\S+)\\."), 117 new String[] { "Disease_name", "MIM:Number" }, 118 false )); 119 120 121 return new ParserListener(PARSER, tagDelegator); 122 } 123 124 public AnnotationType getType() { 125 return ANNO_TYPE; 126 } 127 128 public LifeScienceIdentifier getLSID() { 129 return LSID; 130 } 131}