001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojavax.bio.seq.io;
023
024import java.util.regex.Matcher;
025import java.util.regex.Pattern;
026
027import org.biojava.bio.seq.io.ParseException;
028import org.biojavax.RichObjectFactory;
029import org.biojavax.bio.seq.Position;
030import org.biojavax.bio.seq.RichLocation;
031import org.biojavax.bio.seq.SimplePosition;
032import org.biojavax.bio.seq.SimpleRichLocation;
033import org.biojavax.bio.seq.RichLocation.Strand;
034import org.biojavax.utils.StringTools;
035
036
037/**
038 * Parses UniProt location strings into RichLocation objects.
039 * @author Richard Holland
040 * @since 1.5
041 */
042public class UniProtLocationParser {
043
044        // No instances please
045        private UniProtLocationParser() {}
046
047        /**
048         * Parses a location.
049         * @param loc the UniProt location string.
050         * @return RichLocation the equivalent RichLocation object.
051         * @throws ParseException if the parsing failed.
052         */
053        public static RichLocation parseLocation(String loc) throws ParseException {
054                try{
055                        String parts[] = loc.trim().split("\\s+");
056                        Position startPos = null;
057                        Position endPos   = null; 
058
059                        try {
060                                startPos = parsePosition(parts[0].trim());
061                        } catch (Exception e){
062                                System.err.println(e.getMessage());
063                        }
064                        try {
065                                endPos   = parsePosition(parts[1].trim());
066                        } catch (Exception e){
067                                System.err.println(e.getMessage());
068                        }
069
070                        if (( startPos == null) && ( endPos == null)){
071                                return new SimpleRichLocation(new SimplePosition(0),new SimplePosition(0),1,Strand.POSITIVE_STRAND,null);
072                        } else if ( endPos == null){
073                                return new SimpleRichLocation(startPos,new SimplePosition(0),1,Strand.POSITIVE_STRAND,null);
074                        } else if ( startPos == null){
075                                return new SimpleRichLocation(new SimplePosition(0),endPos,1,Strand.POSITIVE_STRAND,null);
076                        }
077                        return new SimpleRichLocation(startPos,endPos,1,Strand.POSITIVE_STRAND,null);
078                }catch (RuntimeException ex){
079                        throw new ParseException(ex, "Cannot parse location: "+loc);
080                }
081        }
082
083        // O beautiful regex, we worship you.
084        // this matches both the point and end locations
085        private static Pattern sp = Pattern.compile("^(<|>)?(\\d+)(<|>)?$");
086
087        // this function parses a single position - usually just half of one location
088        private static Position parsePosition(String position) throws ParseException {
089                // First attempt to find the group enclosing everything we've been passed
090                Matcher sm = sp.matcher(position);
091                if (!sm.matches()) throw new ParseException("Could not understand position: "+position);
092                String startfuzz = sm.group(1);
093                String point = sm.group(2);
094                String endfuzz = sm.group(3);
095
096                boolean startsFuzzy = ((startfuzz!=null && startfuzz.equals("<")) || (endfuzz!=null && endfuzz.equals("<")));
097                boolean endsFuzzy = ((endfuzz!=null && endfuzz.equals(">")) || (startfuzz!=null && startfuzz.equals(">")));
098
099                return new SimplePosition(startsFuzzy,endsFuzzy,Integer.parseInt(point));
100        }
101
102        /**
103         * Writes a location in UniProt format.
104         * @param l the location to write
105         * @return the formatted string representing the location.
106         */
107        public static String writeLocation(RichLocation l) {
108                //write out location text
109                return _writeSingleLocation(l);
110        }
111
112        // writes out a single position
113        private static String _writePosition(Position p, boolean useMax) {
114                StringBuffer sb = new StringBuffer();
115                int s = p.getStart();
116                int e = p.getEnd();
117                boolean fs = p.getFuzzyStart();
118                boolean fe = p.getFuzzyEnd();
119                int a;
120                if (s!=e) {
121                        // we have to average it out
122                        if (useMax) a = RichObjectFactory.getDefaultPositionResolver().getMax(p);
123                        else a = RichObjectFactory.getDefaultPositionResolver().getMin(p);
124                } else {
125                        a = s;
126                }
127                if (fs) sb.append("<");
128                sb.append(a);
129                if (fe) sb.append(">");
130                return sb.toString();
131        }
132
133        // write out a single location
134        private static String _writeSingleLocation(RichLocation l) {
135                StringBuffer loc = new StringBuffer();
136                loc.append(StringTools.leftPad(_writePosition(l.getMinPosition(),false),6));
137                loc.append(" ");
138                loc.append(StringTools.leftPad(_writePosition(l.getMaxPosition(),true),6));
139                return loc.toString();
140        }
141}