001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022package org.biojava.nbio.core.sequence.io.util;
023
024import org.biojava.nbio.core.exceptions.ParserException;
025import org.biojava.nbio.core.sequence.compound.AmbiguityDNACompoundSet;
026import org.biojava.nbio.core.sequence.compound.AmbiguityRNACompoundSet;
027import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
028import org.biojava.nbio.core.sequence.compound.RNACompoundSet;
029import org.biojava.nbio.core.sequence.template.Compound;
030import org.biojava.nbio.core.sequence.template.CompoundSet;
031import org.biojava.nbio.core.sequence.template.Sequence;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035import java.io.*;
036import java.util.ArrayList;
037import java.util.List;
038import java.util.zip.GZIPInputStream;
039
040public class IOUtils {
041
042        private static final Logger logger = LoggerFactory.getLogger(IOUtils.class);
043
044        private static final int BUFFER = 4096;
045
046        /**
047         * Closes any Object which implements the interface {@link Closeable} and
048         * sending any error to the logger but not forcing any explicit catching of
049         * stream errors.
050         *
051         * @param c The stream to close
052         */
053        public static void close(Closeable c) {
054                try {
055                        if (c != null) {
056                                c.close();
057                        }
058                } catch (IOException e) {
059                        logger.warn("Cannot close down the given Closeable object", e);
060                }
061        }
062
063        /**
064         * Moves the bytes from input to output using a 4KB byte array.
065         *
066         * @param input Input stream of bytes
067         * @param output Output stream of bytes
068         * @throws IOException If anything occurs in the case of the reads and writes
069         */
070        public static void copy(InputStream input, OutputStream output)
071                        throws IOException {
072                byte[] buffer = new byte[BUFFER];
073                int n = 0;
074                while (-1 != (n = input.read(buffer))) {
075                        output.write(buffer, 0, n);
076                }
077        }
078
079        /**
080         * Takes in a reader and a processor, reads every line from the given
081         * file and then invokes the processor. What you do with the lines is
082         * dependent on your processor.
083         *
084         * The code will automatically close the given BufferedReader.
085         *
086         * @param br The reader to process
087         * @param processor The processor to invoke on all lines
088         * @throws ParserException Can throw this if we cannot parse the given reader
089         */
090        public static void processReader(BufferedReader br, ReaderProcessor processor) throws ParserException {
091                String line;
092                try {
093                        while( (line = br.readLine()) != null ) {
094                                processor.process(line);
095                        }
096                }
097                catch(IOException e) {
098                        throw new ParserException("Could not read from the given BufferedReader");
099                }
100                finally {
101                        close(br);
102                }
103        }
104
105        /**
106         * Returns the contents of a buffered reader as a list of strings
107         *
108         * @param br BufferedReader to read from; <strong>will be closed</strong>
109         * @return List of Strings
110         * @throws ParserException Can throw this if we cannot parse the given reader
111         */
112        public static List<String> getList(BufferedReader br) throws ParserException {
113                final List<String> list = new ArrayList<String>();
114                processReader(br, new ReaderProcessor() {
115                        @Override
116                        public void process(String line) {
117                                list.add(line);
118                        }
119                });
120                return list;
121        }
122
123        /**
124         * Delegates to {@link #getList(BufferedReader)} by wrapping the InputStream
125         * in a valid reader. No encoding is mentioned so if you need anything
126         * more advanced then use the other version of this method.
127         *
128         * @param is InputStream which is a text file
129         * @return List of Strings representing the lines of the files
130         * @throws ParserException Can throw this if the file is not a file or we
131         * cannot parse it
132         */
133        public static List<String> getList(InputStream is) throws ParserException {
134                return getList(new BufferedReader(new InputStreamReader(is)));
135        }
136
137        /**
138         * Delegates to {@link #getList(InputStream)} by wrapping the File
139         * in a valid stream. No encoding is mentioned so if you need anything
140         * more advanced then use the other version of this method. Since this
141         * uses {@link #openFile(File)} this code can support GZipped and plain
142         * files.
143         *
144         * @param file File which is a text file
145         * @return List of Strings representing the lines of the files
146         * @throws IOException
147         */
148        public static List<String> getList(File file) throws IOException {
149                return getList(openFile(file));
150        }
151
152        /**
153         * For a filename this code will check the extension of the file for a
154         * .gz extension. If it finds one then the InputStream given back
155         * is a {@link GZIPInputStream}. Otherwise we return a normal
156         * {@link FileInputStream}.
157         *
158         * @param file File which may or may not be GZipped
159         * @return The final stream
160         * @throws IOExceptio n
161         */
162        public static InputStream openFile(File file) throws IOException {
163                final InputStream is;
164                if(!file.isFile()) {
165                        throw new ParserException("The file "+file+" is not a file.");
166                }
167                String name = file.getName();
168
169                if(name.endsWith(".gz")) {
170                        is = new GZIPInputStream(new FileInputStream(file));
171                }
172                else {
173                        is = new FileInputStream(file);
174                }
175
176                return is;
177        }
178
179        /**
180         * Closure interface used when working with
181         * {@link IOUtils#processReader(String)}. Each time a line is encountered
182         * the object that implements this interface will be invoked.
183         *
184         * @author ayates
185         */
186        public static interface ReaderProcessor {
187                void process(String line) throws IOException;
188        }
189
190        /**
191         * Calculates GCG checksum for entire list of sequences
192         *
193         * @param sequences list of sequences
194         * @return GCG checksum
195         */
196        public static <S extends Sequence<C>, C extends Compound> int getGCGChecksum(List<S> sequences) {
197                int check = 0;
198                for (S as : sequences) {
199                        check += getGCGChecksum(as);
200                }
201                return check % 10000;
202        }
203
204        /**
205         * Calculates GCG checksum for a given sequence
206         *
207         * @param sequence given sequence
208         * @return GCG checksum
209         */
210        public static <S extends Sequence<C>, C extends Compound> int getGCGChecksum(S sequence) {
211                String s = sequence.toString().toUpperCase();
212                int count = 0, check = 0;
213                for (int i = 0; i < s.length(); i++) {
214                        count++;
215                        check += count * s.charAt(i);
216                        if (count == 57) {
217                                count = 0;
218                        }
219                }
220                return check % 10000;
221        }
222
223        /**
224         * Assembles a GCG file header
225         *
226         * @param sequences list of sequences
227         * @return GCG header
228         */
229        public static <S extends Sequence<C>, C extends Compound> String getGCGHeader(List<S> sequences) {
230                StringBuilder header = new StringBuilder();
231                S s1 = sequences.get(0);
232                header.append(String.format("MSA from BioJava%n%n MSF: %d  Type: %s  Check: %d ..%n%n",
233                                s1.getLength(), getGCGType(s1.getCompoundSet()), getGCGChecksum(sequences)));
234                String format = " Name: " + getIDFormat(sequences) + " Len: " + s1.getLength() + "  Check: %4d  Weight: 1.0%n";
235                for (S as : sequences) {
236                        header.append(String.format(format, as.getAccession(), getGCGChecksum(as)));
237                        // TODO show weights in MSF header
238                }
239                header.append(String.format("%n//%n%n"));
240                // TODO? convert gap characters to '.'
241                return header.toString();
242        }
243
244        /**
245         * Determines GCG type
246         * @param cs compound set of sequences
247         * @return GCG type
248         */
249        public static <C extends Compound> String getGCGType(CompoundSet<C> cs) {
250                return (cs == DNACompoundSet.getDNACompoundSet() || cs == AmbiguityDNACompoundSet.getDNACompoundSet()) ? "D" :
251                        (cs == RNACompoundSet.getRNACompoundSet() || cs == AmbiguityRNACompoundSet.getRNACompoundSet()) ? "R" : "P";
252        }
253
254        /**
255         * Creates format String for accession IDs
256         *
257         * @param sequences list of sequences
258         * @return format String for accession IDs
259         */
260        public static <S extends Sequence<C>, C extends Compound> String getIDFormat(List<S> sequences) {
261                int length = 0;
262                for (S as : sequences) {
263                        length = Math.max(length, (as.getAccession() == null) ? 0 : as.getAccession().toString().length());
264                }
265                return (length == 0) ? null : "%-" + (length + 1) + "s";
266        }
267
268        /**
269         * Creates formatted String for a single character of PDB output
270         *
271         * @param web true for HTML display
272         * @param c1 character in first sequence
273         * @param c2 character in second sequence
274         * @param similar true if c1 and c2 are considered similar compounds
275         * @param c character to display
276         * @return formatted String
277         */
278        public static String getPDBCharacter(boolean web, char c1, char c2, boolean similar, char c) {
279                String s = c + "";
280                return getPDBString(web, c1, c2, similar, s, s, s, s);
281        }
282
283        /**
284         * Creates formatted String for displaying conservation in PDB output
285         *
286         * @param web true for HTML display
287         * @param c1 character in first sequence
288         * @param c2 character in second sequence
289         * @param similar true if c1 and c2 are considered similar compounds
290         * @return formatted String
291         */
292        public static String getPDBConservation(boolean web, char c1, char c2, boolean similar) {
293                return getPDBString(web, c1, c2, similar, "|", ".", " ", web ? "&nbsp;" : " ");
294        }
295
296        // helper method for getPDBCharacter and getPDBConservation
297        private static String getPDBString(boolean web, char c1, char c2, boolean similar, String m, String sm, String dm,
298                        String qg) {
299                if (c1 == c2)
300                        return web ? "<span class=\"m\">" + m + "</span>" : m;
301                else if (similar)
302                        return web ? "<span class=\"sm\">" + sm + "</span>" : sm;
303                else if (c1 == '-' || c2 == '-')
304                        return web ? "<span class=\"dm\">" + dm + "</span>" : dm;
305                else
306                        return web ? "<span class=\"qg\">" + qg + "</span>" : qg;
307        }
308
309        /**
310         * Creates formatted String for displaying conservation legend in PDB output
311         *
312         * @return legend String
313         */
314        public static String getPDBLegend() {
315                StringBuilder s = new StringBuilder();
316                s.append("</pre></div>");
317                s.append("          <div class=\"subText\">");
318                s.append("          <b>Legend:</b>");
319                s.append("          <span class=\"m\">Green</span> - identical residues |");
320                s.append("          <span class=\"sm\">Pink</span> - similar residues | ");
321                s.append("          <span class=\"qg\">Blue</span> - sequence mismatch |");
322                s.append("          <span class=\"dm\">Brown</span> - insertion/deletion |");
323                s.append("      </div>");
324                s.append(String.format("%n"));
325                return s.toString();
326        }
327
328        /**
329         * Prints {@code string} to {@code file}.
330         * @throws IOException If any I/O exception occurs while printing; this method does not catch any exceptions
331         */
332        public static void print(String string, File file) throws IOException {
333                PrintWriter out = null;
334                try {
335                        out = new PrintWriter(new BufferedWriter(new FileWriter(file)));
336                        out.print(string);
337                        out.flush();
338                        out.close();
339                } finally {
340                        if (out != null) out.close();
341                }
342        }
343
344}