001package org.biojava.bio.program.formats; 002 003import java.util.regex.Matcher; 004import java.util.regex.Pattern; 005 006import org.biojava.bio.AnnotationType; 007import org.biojava.bio.CardinalityConstraint; 008import org.biojava.bio.PropertyConstraint; 009import org.biojava.bio.program.tagvalue.LineSplitParser; 010import org.biojava.bio.program.tagvalue.ParserListener; 011import org.biojava.bio.program.tagvalue.RegexFieldFinder; 012import org.biojava.bio.program.tagvalue.RegexSplitter; 013import org.biojava.bio.program.tagvalue.SimpleTagValueWrapper; 014import org.biojava.bio.program.tagvalue.TagDelegator; 015import org.biojava.bio.program.tagvalue.TagValueContext; 016import org.biojava.bio.program.tagvalue.TagValueListener; 017import org.biojava.bio.program.tagvalue.ValueChanger; 018import org.biojava.bio.symbol.Location; 019import org.biojava.utils.ParserException; 020import org.biojava.utils.lsid.LifeScienceIdentifier; 021 022public class Swissprot 023implements Format { 024 private static final AnnotationType ANNO_TYPE; 025 //private static final LineSplitParser PARSER; 026 private static final LifeScienceIdentifier LSID; 027 028 static { 029 LSID = LifeScienceIdentifier.valueOf("open-bio.org", "format", "swissprot"); 030 031 Location NONE = CardinalityConstraint.NONE; 032 Location ANY = CardinalityConstraint.ANY; 033 Location ONE = CardinalityConstraint.ONE; 034 Location ONE_OR_MORE = CardinalityConstraint.ONE_OR_MORE; 035 036 //PARSER = new LineSplitParser(LineSplitParser.EMBL); 037 038 PropertyConstraint c_string = new PropertyConstraint.ByClass(String.class); 039 040 AnnotationType FT = new AnnotationType.Impl(); 041 FT.setDefaultConstraints(PropertyConstraint.ANY, ANY); // fix this 042 PropertyConstraint c_ft = new PropertyConstraint.ByAnnotationType(FT); 043 044 ANNO_TYPE = new AnnotationType.Impl(); 045 ANNO_TYPE.setDefaultConstraints(PropertyConstraint.NONE, NONE); 046 ANNO_TYPE.setConstraints("ID", c_string, ONE); 047 ANNO_TYPE.setConstraints("TYPE", c_string, ONE); 048 ANNO_TYPE.setConstraints("MOLECULE", c_string, ONE); 049 ANNO_TYPE.setConstraints("LENGTH", c_string, ONE); 050 ANNO_TYPE.setConstraints("AC", c_string, ONE_OR_MORE); 051 ANNO_TYPE.setConstraints("DT", c_string, ANY); 052 ANNO_TYPE.setConstraints("KW", c_string, ANY); 053 ANNO_TYPE.setConstraints("OS", c_string, ONE); 054 ANNO_TYPE.setConstraints("OC", c_string, ANY); 055 ANNO_TYPE.setConstraints("DE", c_string, ANY); 056 ANNO_TYPE.setConstraints("GN", c_string, ANY); 057 ANNO_TYPE.setConstraints("OS", c_string, ANY); 058 ANNO_TYPE.setConstraints("OG", c_string, ANY); 059 ANNO_TYPE.setConstraints("OC", c_string, ANY); 060 ANNO_TYPE.setConstraints("OX", c_string, ANY); 061 ANNO_TYPE.setConstraints("RN", c_string, ANY); 062 ANNO_TYPE.setConstraints("RP", c_string, ANY); 063 ANNO_TYPE.setConstraints("RC", c_string, ANY); 064 ANNO_TYPE.setConstraints("RX", c_string, ANY); 065 ANNO_TYPE.setConstraints("RA", c_string, ANY); 066 ANNO_TYPE.setConstraints("RT", c_string, ANY); 067 ANNO_TYPE.setConstraints("RL", c_string, ANY); 068 ANNO_TYPE.setConstraints("CC", c_string, ANY); 069 ANNO_TYPE.setConstraints("DR", c_string, ANY); 070 ANNO_TYPE.setConstraints("KW", c_string, ANY); 071 ANNO_TYPE.setConstraints("FT", c_ft, ANY); 072 ANNO_TYPE.setConstraints("SQ", c_string, ANY); 073 ANNO_TYPE.setConstraints("", c_string, ANY); 074 } 075 076 public ParserListener getParserListener(TagValueListener listener) { 077 RegexSplitter semiColonSplitter = new RegexSplitter( 078 Pattern.compile("(\\w+)[;.]"), 079 1 080 ); 081 ValueChanger semiColonChanger = new ValueChanger(listener); 082 semiColonChanger.setDefaultSplitter(semiColonSplitter); 083 084 LineSplitParser ftParser = new LineSplitParser(); 085 ftParser.setSplitOffset(29); 086 ftParser.setTrimTag(true); 087 ftParser.setTrimValue(true); 088 ftParser.setContinueOnEmptyTag(true); 089 ftParser.setMergeSameTag(false); 090 091 TagValueListener ftListener = new SPFeatureTableListener(listener); 092 093 LineSplitParser lsp = LineSplitParser.EMBL; 094 TagDelegator td = new TagDelegator(listener); 095 096 td.setListener("ID", new RegexFieldFinder( 097 listener, 098 Pattern.compile("(\\w+)\\s+(\\w+);\\s+(\\w+);\\s+(\\d+)"), 099 new String[] { "ID", "TYPE", "MOLECULE", "LENGTH" }, 100 true 101 )); 102 td.setListener("AC", semiColonChanger); 103 td.setListener("KW", semiColonChanger); 104 td.setListener("OC", semiColonChanger); 105 td.setListener("RC", semiColonChanger); 106 td.setListener("RX", semiColonChanger); 107 td.setParserListener("FT", ftParser, ftListener); 108 109 return new ParserListener(lsp, td); 110 } 111 112 113 public AnnotationType getType() { 114 return ANNO_TYPE; 115 } 116 117 public LifeScienceIdentifier getLSID() { 118 return LSID; 119 } 120 121 private static class SPFeatureTableListener 122 extends SimpleTagValueWrapper { 123 private Pattern pat = Pattern.compile("(\\w+)\\s+((<?\\d+)|(?))\\s+((>?\\d+)|(\\?))"); 124 private int depth = 0; 125 private Object tag; 126 127 public SPFeatureTableListener(TagValueListener delegate) { 128 super(delegate); 129 } 130 131 public void startRecord() 132 throws ParserException { 133 depth++; 134 super.startRecord(); 135 } 136 137 public void endRecord() 138 throws ParserException { 139 super.endRecord(); 140 depth--; 141 } 142 143 public void startTag(Object tag) 144 throws ParserException { 145 if(depth == 1) { 146 this.tag = tag; 147 } else { 148 super.startTag(tag); 149 } 150 } 151 152 public void endTag(Object tag) 153 throws ParserException { 154 if(depth == 1) { 155 // do we need something here? 156 } 157 158 super.endTag(); 159 } 160 161 public void value(TagValueContext ctxt, Object val) 162 throws ParserException { 163 if(depth == 1) { 164 if(tag != null) { 165 try { 166 Matcher m = pat.matcher(tag.toString()); 167 m.find(); 168 169 super.startTag("TYPE"); 170 super.value(ctxt, m.group(1)); 171 super.endTag(); 172 173 super.startTag("START"); 174 super.value(ctxt, m.group(2)); 175 super.endTag(); 176 177 super.startTag("END"); 178 super.value(ctxt, m.group(3)); 179 super.endTag(); 180 181 super.startTag("DESCRIPTION"); 182 super.value(ctxt, val); 183 184 tag = null; 185 } catch (IllegalStateException ise) { 186 throw new ParserException("Couldn't match: " + pat.pattern() + " " + tag, ise); 187 } 188 } else { 189 super.value(ctxt, val); 190 } 191 } else { 192 super.value(ctxt, val); 193 } 194 } 195 } 196} 197