001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojavax.bio.seq.io; 023 024import java.util.regex.Matcher; 025import java.util.regex.Pattern; 026 027import org.biojava.bio.seq.io.ParseException; 028import org.biojavax.RichObjectFactory; 029import org.biojavax.bio.seq.Position; 030import org.biojavax.bio.seq.RichLocation; 031import org.biojavax.bio.seq.SimplePosition; 032import org.biojavax.bio.seq.SimpleRichLocation; 033import org.biojavax.bio.seq.RichLocation.Strand; 034import org.biojavax.utils.StringTools; 035 036 037/** 038 * Parses UniProt location strings into RichLocation objects. 039 * @author Richard Holland 040 * @since 1.5 041 */ 042public class UniProtLocationParser { 043 044 // No instances please 045 private UniProtLocationParser() {} 046 047 /** 048 * Parses a location. 049 * @param loc the UniProt location string. 050 * @return RichLocation the equivalent RichLocation object. 051 * @throws ParseException if the parsing failed. 052 */ 053 public static RichLocation parseLocation(String loc) throws ParseException { 054 try{ 055 String parts[] = loc.trim().split("\\s+"); 056 Position startPos = null; 057 Position endPos = null; 058 059 try { 060 startPos = parsePosition(parts[0].trim()); 061 } catch (Exception e){ 062 System.err.println(e.getMessage()); 063 } 064 try { 065 endPos = parsePosition(parts[1].trim()); 066 } catch (Exception e){ 067 System.err.println(e.getMessage()); 068 } 069 070 if (( startPos == null) && ( endPos == null)){ 071 return new SimpleRichLocation(new SimplePosition(0),new SimplePosition(0),1,Strand.POSITIVE_STRAND,null); 072 } else if ( endPos == null){ 073 return new SimpleRichLocation(startPos,new SimplePosition(0),1,Strand.POSITIVE_STRAND,null); 074 } else if ( startPos == null){ 075 return new SimpleRichLocation(new SimplePosition(0),endPos,1,Strand.POSITIVE_STRAND,null); 076 } 077 return new SimpleRichLocation(startPos,endPos,1,Strand.POSITIVE_STRAND,null); 078 }catch (RuntimeException ex){ 079 throw new ParseException(ex, "Cannot parse location: "+loc); 080 } 081 } 082 083 // O beautiful regex, we worship you. 084 // this matches both the point and end locations 085 private static Pattern sp = Pattern.compile("^(<|>)?(\\d+)(<|>)?$"); 086 087 // this function parses a single position - usually just half of one location 088 private static Position parsePosition(String position) throws ParseException { 089 // First attempt to find the group enclosing everything we've been passed 090 Matcher sm = sp.matcher(position); 091 if (!sm.matches()) throw new ParseException("Could not understand position: "+position); 092 String startfuzz = sm.group(1); 093 String point = sm.group(2); 094 String endfuzz = sm.group(3); 095 096 boolean startsFuzzy = ((startfuzz!=null && startfuzz.equals("<")) || (endfuzz!=null && endfuzz.equals("<"))); 097 boolean endsFuzzy = ((endfuzz!=null && endfuzz.equals(">")) || (startfuzz!=null && startfuzz.equals(">"))); 098 099 return new SimplePosition(startsFuzzy,endsFuzzy,Integer.parseInt(point)); 100 } 101 102 /** 103 * Writes a location in UniProt format. 104 * @param l the location to write 105 * @return the formatted string representing the location. 106 */ 107 public static String writeLocation(RichLocation l) { 108 //write out location text 109 return _writeSingleLocation(l); 110 } 111 112 // writes out a single position 113 private static String _writePosition(Position p, boolean useMax) { 114 StringBuffer sb = new StringBuffer(); 115 int s = p.getStart(); 116 int e = p.getEnd(); 117 boolean fs = p.getFuzzyStart(); 118 boolean fe = p.getFuzzyEnd(); 119 int a; 120 if (s!=e) { 121 // we have to average it out 122 if (useMax) a = RichObjectFactory.getDefaultPositionResolver().getMax(p); 123 else a = RichObjectFactory.getDefaultPositionResolver().getMin(p); 124 } else { 125 a = s; 126 } 127 if (fs) sb.append("<"); 128 sb.append(a); 129 if (fe) sb.append(">"); 130 return sb.toString(); 131 } 132 133 // write out a single location 134 private static String _writeSingleLocation(RichLocation l) { 135 StringBuffer loc = new StringBuffer(); 136 loc.append(StringTools.leftPad(_writePosition(l.getMinPosition(),false),6)); 137 loc.append(" "); 138 loc.append(StringTools.leftPad(_writePosition(l.getMaxPosition(),true),6)); 139 return loc.toString(); 140 } 141}