001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojavax.utils;
023
024import java.io.PrintStream;
025import java.util.ArrayList;
026import java.util.List;
027import java.util.regex.Matcher;
028import java.util.regex.Pattern;
029
030/**
031 * Utility class for formatting strings into regular-sized blocks.
032 * @author Richard Holland
033 * @since 1.5
034 */
035public class StringTools {
036    
037    // Static methods so should never be instantiated.
038    private StringTools() {}
039    
040    /**
041     * Takes an input string and appends spaces to the left. Ignores
042     * any existing leading whitespace when counting the indent size.
043     * @param input the input string
044     * @param leftIndent the number of spaces to indent it by.
045     * @return the indented string.
046     */
047    public static String leftIndent(String input, int leftIndent) {
048        StringBuffer b = new StringBuffer();
049        for (int i = 0; i < leftIndent; i++) b.append(" "); // yuck!
050        b.append(input);
051        return b.toString();
052    }
053    
054    /**
055     * Pads a string to be a certain width by prepending spaces.
056     * @param input the string to pad.
057     * @param totalWidth the final width required including padded space.
058     */
059    public static String leftPad(String input, int totalWidth) {
060        return leftPad(input, ' ', totalWidth);
061    }
062    
063    /**
064     * Pads a string to be a certain width by prepending given symbols.
065     * @param input the string to pad.
066     * @param padChar the symbol to pad with.
067     * @param totalWidth the final width required including padded symbols.
068     */
069    public static String leftPad(String input, char padChar, int totalWidth) {
070        StringBuffer b = new StringBuffer();
071        b.append(input);
072        while(b.length()<totalWidth) b.insert(0,padChar); // yuck!
073        return b.toString();
074    }
075    
076    /**
077     * Pads a string to be a certain width by appending spaces.
078     * @param input the string to pad.
079     * @param totalWidth the final width required including padded space.
080     */
081    public static String rightPad(String input, int totalWidth) {
082        return rightPad(input, ' ', totalWidth);
083    }
084    
085    /**
086     * Pads a string to be a certain width by appending given symbols.
087     * @param input the string to pad.
088     * @param padChar the symbol to pad with.
089     * @param totalWidth the final width required including padded symbols.
090     */
091    public static String rightPad(String input, char padChar, int totalWidth) {
092        StringBuffer b = new StringBuffer();
093        b.append(input);
094        while(b.length()<totalWidth)
095            b.append(padChar); // yuck!
096        return b.toString();
097    }
098    
099    /**
100     * Word-wraps a string into an array of lines of no more than the given width.
101     * The string is split into chunks using the regex supplied to identify the
102     * points where it can be broken. If a word is longer than the width required,
103     * it is broken mid-word, otherwise the string is always broken between words.
104     * @param input the string to format
105     * @param sepRegex the regex identifying the break points in the string, to be
106     * compiled using Pattern.
107     * @param width the width of the lines required
108     * @return an array of strings, one per line, containing the wrapped output.
109     * @see Pattern
110     */
111    public static String[] wordWrap(String input, String sepRegex, int width) {
112        List lines = new ArrayList();
113        Pattern p = Pattern.compile(sepRegex);
114        int start = 0;
115        while (start < input.length()) {
116            //begin from start+width
117            int splitPoint = start+width;
118            //if has newline before end, use it
119           int newline = input.indexOf('\n',start);
120           if (newline>=start && newline<splitPoint) {
121                splitPoint = newline;
122            }
123            //easy case where only small portion of line remains
124            if (splitPoint >= input.length()) splitPoint=input.length();
125            //hard case, have to split it!
126            else {
127                //if not match sep, find first point that does
128                while (splitPoint>=start) {
129                    char c = input.charAt(splitPoint);
130                    Matcher m = p.matcher(""+c);
131                    if (m.matches()) {
132                        splitPoint+=1;// splitpoint is index of separator - include on this line - assumes a single character separator
133                        break;
134                    }
135                    splitPoint--;
136                }
137                //if ended up at splitPoint=start, splitPoint=start+width
138                //in order to break word mid-way through
139                if (splitPoint<=start) splitPoint = start+width;
140            }
141            //trailing blanks - which may include the separator - are not in genbank lines - so they are removed
142            //output chunk from start to splitPoint - do not include trailing newline - it will be added by writeKeyValueLine
143            lines.add(trimTrailingBlanks(newline==splitPoint-1?input.substring(start, splitPoint-1):input.substring(start, splitPoint)));
144            start=splitPoint;// start right after the separator
145        }
146        return (String[])lines.toArray(new String[0]);
147    }
148    
149    private final static String trimTrailingBlanks(final String theString) {
150        if (theString.length() ==0 || theString.charAt(theString.length()-1) != ' ') return theString;
151        int len = theString.length();
152        final char[] val = theString.toCharArray();
153        while (len > 0 && (val[len - 1] <= ' ')) len--;
154        return ((len <  theString.length())) ? theString.substring(0, len) : theString;
155    }
156    
157    /**
158     * Writes some text to the output stream in the following format:
159     *    key         text
160     *                continuation of text
161     * where the key/wrappedKey column is keyWidth wide, and the total line width is lineWidth,
162     * and the text is split over multiple lines at the nearest occurrence of whitespace.
163     * @param key the key to write on the first line only
164     * @param text the text to write out
165     * @param keyWidth the width to indent the text by (in which the key will be printed)
166     * @param os the stream to write the formatted output to
167     */
168    public static void writeKeyValueLine(String key, String text, int keyWidth, int lineWidth, PrintStream os) {
169        writeKeyValueLine(key,text,keyWidth,lineWidth,null,null,os);
170    }
171    
172    /**
173     * Writes some text to the output stream in the following format:
174     *    key         text
175     *                continuation of text
176     * where the key/wrappedKey column is keyWidth wide, and the total line width is lineWidth,
177     * and the text is split over multiple lines at the nearest occurrence of separator sep.
178     * @param key the key to write on the first line only
179     * @param text the text to write out
180     * @param keyWidth the width to indent the text by (in which the key will be printed)
181     * @param sep the separator to split the text on if it exceeds the line width
182     * @param os the stream to write the formatted output to
183     */
184    public static void writeKeyValueLine(String key, String text, int keyWidth, int lineWidth, String sep, PrintStream os) {
185        writeKeyValueLine(key,text,keyWidth,lineWidth,sep,null,os);
186    }
187    
188    /**
189     * Writes some text to the output stream in the following format:
190     *    key         text
191     *    wrappedKey  continuation of text
192     * where the key/wrappedKey column is keyWidth wide, and the total line width is lineWidth,
193     * and the text is split over multiple lines at the nearest occurrence of separator sep.
194     * @param key the key to write on the first line only
195     * @param text the text to write out
196     * @param keyWidth the width to indent the text by (in which the key will be printed)
197     * @param sep the separator to split the text on if it exceeds the line width
198     * @param wrappedKey the key to print on second and subsequent lines
199     * @param os the stream to write the formatted output to
200     */
201    public static void writeKeyValueLine(String key, String text, int keyWidth, int lineWidth, String sep, String wrappedKey, PrintStream os) {
202        if (key==null || text==null) return; // skip blank lines
203        if (wrappedKey==null) wrappedKey=""; // stop null pointer exceptions on wrapped keys
204        if (sep==null) sep="\\s+"; // stop null pointer exceptions on the separator
205//        text = text.trim(); // trim leading/trailing whitespace from text - this deletes leading blank lines from comments: e.g. AC140936
206        String[] lines = StringTools.wordWrap(text, sep, lineWidth-keyWidth);
207        if (lines.length==0) os.println(StringTools.rightPad(key,keyWidth));
208        else {
209            lines[0] = StringTools.rightPad(key,keyWidth)+
210                    lines[0];
211            os.println(lines[0]);
212            for (int i = 1; i < lines.length; i++) os.println(StringTools.rightPad(wrappedKey,keyWidth)+lines[i]);
213        }
214    }
215}