001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.aaproperties;
022
023import org.biojava.nbio.aaproperties.xml.AminoAcidCompositionTable;
024import org.biojava.nbio.aaproperties.xml.CaseFreeAminoAcidCompoundSet;
025import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
026import org.biojava.nbio.core.sequence.ProteinSequence;
027import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
028import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
029import org.biojava.nbio.core.sequence.io.*;
030import org.biojava.nbio.core.sequence.template.CompoundSet;
031
032import java.io.File;
033import java.io.FileInputStream;
034import java.io.IOException;
035import java.io.PrintStream;
036import java.util.ArrayList;
037import java.util.LinkedHashMap;
038import java.util.List;
039import java.util.Map;
040import java.util.Map.Entry;
041
042
043public class CommandPrompt {
044
045        /**
046         * The main method
047         * @param args
048         *      See showHelp for a list of available arguments
049         * @throws Exception
050         *  To handle exception thrown by reading of XML files
051         */
052        public static void main(String[] args) throws Exception{
053                run(args);
054        }
055
056        private static AminoAcidCompositionTable checkForValidityAndObtainAATable(String inputLocation, int propertyListSize, String aminoAcidCompositionLocation,
057                        String elementMassLocation) throws Exception{
058                if(inputLocation == null) {
059                        showHelp();
060                        throw new Error("Please do provide location of input file.");
061                }
062                if(propertyListSize == 0){
063                        showHelp();
064                        throw new Error("Please at least specify a property to compute.");
065                }
066                AminoAcidCompositionTable aaTable = null;
067                if(aminoAcidCompositionLocation != null && elementMassLocation == null){
068                        aaTable = PeptideProperties.obtainAminoAcidCompositionTable(new File(aminoAcidCompositionLocation));
069                }else if(aminoAcidCompositionLocation != null && elementMassLocation != null){
070                        aaTable = PeptideProperties.obtainAminoAcidCompositionTable(new File(aminoAcidCompositionLocation, elementMassLocation));
071                }else if(aminoAcidCompositionLocation == null && elementMassLocation != null){
072                        throw new Error("You have define the location of Element Mass XML file. Please also define the location of Amino Acid Composition XML file");
073                }
074                return aaTable;
075        }
076
077        private static void readInputAndGenerateOutput(String outputLocation, List<Character> propertyList, List<Character> specificList,
078                        String delimiter, String inputLocation, AminoAcidCompositionTable aaTable, int decimalPlace) throws Exception{
079                PrintStream output;
080                if(outputLocation != null)
081                        output = new PrintStream(new File(outputLocation));
082                else
083                        output = System.out;
084                printHeader(output, propertyList, specificList, delimiter);
085                Map<String, ProteinSequence> a = readInputFile(inputLocation, aaTable);
086                //Need for the last sequence
087                for(Entry<String, ProteinSequence> entry:a.entrySet()){
088                        compute(output, entry.getValue().getOriginalHeader(), entry.getValue().getSequenceAsString().trim(), delimiter, aaTable, propertyList, specificList,
089                                        decimalPlace);
090                }
091                output.close();
092        }
093
094        public static void run(String[] args) throws Exception{
095                /*
096                 * Parse input arguments
097                 */
098                List<Character> propertyList = new ArrayList<>();
099                List<Character> specificList = new ArrayList<>();
100                String inputLocation = null;
101                String outputLocation = null;
102                String aminoAcidCompositionLocation = null;
103                String elementMassLocation = null;
104                String delimiter = ",";
105                int decimalPlace = 4;
106
107                for(int i = 0; i < args.length; i++){
108                        if(args[i].charAt(0) != '-' || args[i].length() != 2){
109                                showHelp();
110                                throw new Error("Unknown option: " + args[i]);
111                        }else{
112                                switch(args[i].charAt(1)){
113                                //Required
114                                case 'i': inputLocation = args[++i]; break;
115                                //Optional
116                                case 'o': outputLocation = args[++i]; break;
117                                case 'f':
118                                        i++;
119                                        if("csv".equalsIgnoreCase(args[i])) delimiter = ",";
120                                        else if("tsv".equalsIgnoreCase(args[i])) delimiter = "\t";
121                                        else throw new Error("Invalid value for -f: " + args[i] + ". Please choose either csv or tsv only.");
122                                        break;
123                                case 'x': aminoAcidCompositionLocation = args[++i]; break;
124                                case 'y': elementMassLocation = args[++i]; break;
125                                case 'd': decimalPlace = Integer.parseInt(args[++i]); break;
126                                //Properties
127                                case 'a':
128                                        propertyList.add('1');
129                                        propertyList.add('2');
130                                        propertyList.add('3');
131                                        propertyList.add('4');
132                                        propertyList.add('5');
133                                        propertyList.add('6');
134                                        propertyList.add('7');
135                                        propertyList.add('8');
136                                        propertyList.add('9');
137                                        break;
138                                case '1': propertyList.add('1'); break;
139                                case '2': propertyList.add('2'); break;
140                                case '3': propertyList.add('3'); break;
141                                case '4': propertyList.add('4'); break;
142                                case '5': propertyList.add('5'); break;
143                                case '6': propertyList.add('6'); break;
144                                case '7': propertyList.add('7'); break;
145                                case '8': propertyList.add('8'); break;
146                                case '9': propertyList.add('9'); break;
147                                case '0':
148                                        propertyList.add('0');
149                                        i++;
150                                        if(args[i].length() != 1) throw new Error("Invalid value: " + args[i] + ". Amino Acid Symbol should be of single character");
151                                        specificList.add(args[i].toUpperCase().charAt(0));
152                                        break;
153                                default:
154                                        showHelp();
155                                        throw new Error("Unknown option: " + args[i]);
156                                }
157                        }
158                }
159
160                /*
161                 * Check for validity of input arguments
162                 */
163                AminoAcidCompositionTable aaTable = checkForValidityAndObtainAATable(inputLocation, propertyList.size(), aminoAcidCompositionLocation,
164                                elementMassLocation);
165
166                /*
167                 * Read input file and generate output
168                 */
169                readInputAndGenerateOutput(outputLocation, propertyList, specificList, delimiter, inputLocation, aaTable, decimalPlace);
170        }
171
172        private static Map<String, ProteinSequence> readInputFile(String inputLocation, AminoAcidCompositionTable aaTable) throws Exception{
173                FileInputStream inStream = new FileInputStream(inputLocation);
174                CompoundSet<AminoAcidCompound>  set;
175                if(aaTable == null){
176                        set = CaseFreeAminoAcidCompoundSet.getAminoAcidCompoundSet();
177                }else{
178                        set = aaTable.getAminoAcidCompoundSet();
179                }
180                Map<String, ProteinSequence> ret;
181                if ( inputLocation.toLowerCase().contains(".gb")) {
182                        GenbankReader<ProteinSequence, AminoAcidCompound> genbankReader = new GenbankReader<>(
183                                        inStream, new GenericGenbankHeaderParser<ProteinSequence, AminoAcidCompound>(),
184                                        new ProteinSequenceCreator(set));
185                        ret = genbankReader.process();
186
187
188                } else {
189                        FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<>(
190                                        inStream, new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
191                                        new ProteinSequenceCreator(set));
192                        ret = fastaReader.process();
193
194                }
195                return ret;
196        }
197
198        public enum PropertyName{MolecularWeight, Absorbance_True, Absorbance_False, ExtinctionCoefficient_True, ExtinctionCoefficient_False,
199                InstabilityIndex, ApliphaticIndex, AverageHydropathyValue, IsoelectricPoint, NetCharge_pH_7, A, R,
200                N, D, C, E, Q, G, H, I, L,
201                K, M, F, P, S, T, W, Y, V};
202
203        private static void printHeader(PrintStream output, List<Character> propertyList, List<Character> specificList, String delimiter) throws IOException{
204                int specificCount = 0;
205                /*
206                 * 1 Molecular weight
207                 * 2 Absorbance (assumed Cys reduced and assume Cys to form cystines)
208                 * 3 Extinction coefficient (assumed Cys reduced and assume Cys to form cystines)
209                 * 4 Instability index
210                 * 5 Apliphatic index
211                 * 6 Average hydropathy value
212                 * 7 Isoelectric point
213                 * 8 Net charge at pH 7
214                 * 9 Composition of the 20 standard amino acid
215                 * 0 Composition of the specific amino acid
216                 */
217                List<String> sList = new ArrayList<>();
218                sList.add("SequenceName");
219                for(Character c:propertyList){
220                        switch(c){
221                        case '1': sList.add(PropertyName.MolecularWeight.toString()); break;
222                        case '2': sList.add(PropertyName.Absorbance_True.toString()); sList.add(PropertyName.Absorbance_False.toString()); break;
223                        case '3': sList.add(PropertyName.ExtinctionCoefficient_True.toString()); sList.add(PropertyName.ExtinctionCoefficient_False.toString()); break;
224                        case '4': sList.add(PropertyName.InstabilityIndex.toString()); break;
225                        case '5': sList.add(PropertyName.ApliphaticIndex.toString()); break;
226                        case '6': sList.add(PropertyName.AverageHydropathyValue.toString()); break;
227                        case '7': sList.add(PropertyName.IsoelectricPoint.toString()); break;
228                        case '8': sList.add(PropertyName.NetCharge_pH_7.toString()); break;
229                        case '9':
230                                sList.add(PropertyName.A.toString()); sList.add(PropertyName.R.toString());
231                                sList.add(PropertyName.N.toString()); sList.add(PropertyName.D.toString());
232                                sList.add(PropertyName.C.toString()); sList.add(PropertyName.E.toString());
233                                sList.add(PropertyName.Q.toString()); sList.add(PropertyName.G.toString());
234                                sList.add(PropertyName.H.toString()); sList.add(PropertyName.I.toString());
235                                sList.add(PropertyName.L.toString()); sList.add(PropertyName.K.toString());
236                                sList.add(PropertyName.M.toString()); sList.add(PropertyName.F.toString());
237                                sList.add(PropertyName.P.toString()); sList.add(PropertyName.S.toString());
238                                sList.add(PropertyName.T.toString()); sList.add(PropertyName.W.toString());
239                                sList.add(PropertyName.Y.toString()); sList.add(PropertyName.V.toString());
240                                break;
241                        case '0': sList.add("" + specificList.get(specificCount++)); break;
242                        }
243                }
244                for(int i = 0; i < sList.size(); i++){
245                        if(i != 0) output.print(delimiter);
246                        output.print(sList.get(i));
247                }
248                output.println();
249                output.flush();
250        }
251
252        private static void compute(PrintStream output, String header, String sequence, String delimiter,
253                        AminoAcidCompositionTable aaTable, List<Character> propertyList, List<Character> specificList, int decimalPlace) throws CompoundNotFoundException{
254                /*
255                 * 1 Molecular weight
256                 * 2 Absorbance (assumed Cys reduced and assume Cys to form cystines)
257                 * 3 Extinction coefficient
258                 * 4 Instability index
259                 * 5 Apliphatic index
260                 * 6 Average hydropathy value
261                 * 7 Isoelectric point
262                 * 8 Net charge at pH 7
263                 * 9 Composition of the 20 standard amino acid
264                 * 0 Composition of the specific amino acid
265                 */
266                ProteinSequence pSequence;
267                CompoundSet<AminoAcidCompound> aaSet;
268                if(aaTable != null){
269                        sequence = Utils.checkSequence(sequence, aaTable.getSymbolSet());
270                        pSequence = new ProteinSequence(sequence, aaTable.getAminoAcidCompoundSet());
271                        aaSet = aaTable.getAminoAcidCompoundSet();
272                }else{
273                        sequence = Utils.checkSequence(sequence);
274                        pSequence = new ProteinSequence(sequence);
275                        aaSet = AminoAcidCompoundSet.getAminoAcidCompoundSet();
276                }
277                IPeptideProperties pp = new PeptidePropertiesImpl();
278
279                int specificCount = 0;
280                List<Double> dList = new ArrayList<>();
281                for(Character c:propertyList){
282                        switch(c){
283                        case '1':
284                                if(aaTable == null)
285                                        dList.add(pp.getMolecularWeight(pSequence));
286                                else
287                                        dList.add(pp.getMolecularWeight(pSequence));
288                                break;
289                        case '2':
290                                dList.add(pp.getAbsorbance(pSequence, true));
291                                dList.add(pp.getAbsorbance(pSequence, false));
292                                break;
293                        case '3':
294                                dList.add(pp.getExtinctionCoefficient(pSequence, true));
295                                dList.add(pp.getExtinctionCoefficient(pSequence, false));
296                                break;
297                        case '4': dList.add(pp.getInstabilityIndex(pSequence)); break;
298                        case '5': dList.add(pp.getApliphaticIndex(pSequence)); break;
299                        case '6': dList.add(pp.getAvgHydropathy(pSequence)); break;
300                        case '7': dList.add(pp.getIsoelectricPoint(pSequence)); break;
301                        case '8': dList.add(pp.getNetCharge(pSequence)); break;
302                        case '9':
303                                Map<AminoAcidCompound, Double> aaCompound2Double = pp.getAAComposition(pSequence);
304                                //(A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V)
305                                dList.add(aaCompound2Double.get(Constraints.A));
306                                dList.add(aaCompound2Double.get(Constraints.R));
307                                dList.add(aaCompound2Double.get(Constraints.N));
308                                dList.add(aaCompound2Double.get(Constraints.D));
309                                dList.add(aaCompound2Double.get(Constraints.C));
310                                dList.add(aaCompound2Double.get(Constraints.E));
311                                dList.add(aaCompound2Double.get(Constraints.Q));
312                                dList.add(aaCompound2Double.get(Constraints.G));
313                                dList.add(aaCompound2Double.get(Constraints.H));
314                                dList.add(aaCompound2Double.get(Constraints.I));
315                                dList.add(aaCompound2Double.get(Constraints.L));
316                                dList.add(aaCompound2Double.get(Constraints.K));
317                                dList.add(aaCompound2Double.get(Constraints.M));
318                                dList.add(aaCompound2Double.get(Constraints.F));
319                                dList.add(aaCompound2Double.get(Constraints.P));
320                                dList.add(aaCompound2Double.get(Constraints.S));
321                                dList.add(aaCompound2Double.get(Constraints.T));
322                                dList.add(aaCompound2Double.get(Constraints.W));
323                                dList.add(aaCompound2Double.get(Constraints.Y));
324                                dList.add(aaCompound2Double.get(Constraints.V));
325                                break;
326                        case '0': dList.add(pp.getEnrichment(pSequence, aaSet.getCompoundForString("" + specificList.get(specificCount++)))); break;
327                        }
328                }
329                output.print(header.replace(delimiter, "_"));
330                dList.stream().forEach(item -> output.print(delimiter + Utils.roundToDecimals(item, decimalPlace)));
331                output.println();
332                output.flush();
333        }
334
335        private static void showHelp(){
336                System.err.println("NAME");
337                System.err.println("\tAn executable to generate physico-chemical properties of protein sequences.");
338                System.err.println();
339
340                System.err.println("EXAMPLES");
341                System.err.println("\tjava -jar AAProperties.jar -i test.fasta -a");
342                System.err.println("\t\tGenerates all possible properties.");
343                System.err.println();
344                System.err.println("\tjava -jar AAProperties.jar -i test.fasta -1 -3 -7");
345                System.err.println("\t\tGenerates only molecular weight, extinction coefficient and isoelectric point.");
346                System.err.println();
347                System.err.println("\tjava -jar AAProperties.jar -i test.fasta -0 A -0 N -1");
348                System.err.println("\t\tGenerates composition of two specific amino acid symbol and molecular weight.");
349                System.err.println();
350
351                System.err.println("OPTIONS");
352                System.err.println("\tRequired");
353                System.err.println("\t\t-i location of input FASTA file");
354                System.err.println();
355
356                System.err.println("\tOptional");
357                System.err.println("\t\t-o location of output file [standard output (default)]");
358                System.err.println("\t\t-f output format [csv (default) or tsv]");
359                System.err.println("\t\t-x location of Amino Acid Composition XML file for defining amino acid composition");
360                System.err.println("\t\t-y location of Element Mass XML file for defining mass of elements");
361                System.err.println("\t\t-d number of decimals (int) [4 (default)]");
362                System.err.println();
363
364                System.err.println("\tProvide at least one of them");
365                System.err.println("\t\t-a compute properties of option 1-9");
366                System.err.println("\t\t-1 compute molecular weight");
367                System.err.println("\t\t-2 compute absorbance");
368                System.err.println("\t\t-3 compute extinction coefficient");
369                System.err.println("\t\t-4 compute instability index");
370                System.err.println("\t\t-5 compute apliphatic index");
371                System.err.println("\t\t-6 compute average hydropathy value");
372                System.err.println("\t\t-7 compute isoelectric point");
373                System.err.println("\t\t-8 compute net charge at pH 7");
374                System.err.println("\t\t-9 compute composition of 20 standard amino acid (A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V)");
375                System.err.println("\t\t-0 compute composition of specific amino acid symbol");
376                System.err.println();
377        }
378}