001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.aaproperties; 022 023import org.biojava.nbio.aaproperties.xml.AminoAcidCompositionTable; 024import org.biojava.nbio.aaproperties.xml.CaseFreeAminoAcidCompoundSet; 025import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 026import org.biojava.nbio.core.sequence.ProteinSequence; 027import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 028import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 029import org.biojava.nbio.core.sequence.io.*; 030import org.biojava.nbio.core.sequence.template.CompoundSet; 031 032import java.io.File; 033import java.io.FileInputStream; 034import java.io.IOException; 035import java.io.PrintStream; 036import java.util.ArrayList; 037import java.util.LinkedHashMap; 038import java.util.List; 039import java.util.Map; 040import java.util.Map.Entry; 041 042 043public class CommandPrompt { 044 045 /** 046 * The main method 047 * @param args 048 * See showHelp for a list of available arguments 049 * @throws Exception 050 * To handle exception thrown by reading of XML files 051 */ 052 public static void main(String[] args) throws Exception{ 053 run(args); 054 } 055 056 private static AminoAcidCompositionTable checkForValidityAndObtainAATable(String inputLocation, int propertyListSize, String aminoAcidCompositionLocation, 057 String elementMassLocation) throws Exception{ 058 if(inputLocation == null) { 059 showHelp(); 060 throw new Error("Please do provide location of input file."); 061 } 062 if(propertyListSize == 0){ 063 showHelp(); 064 throw new Error("Please at least specify a property to compute."); 065 } 066 AminoAcidCompositionTable aaTable = null; 067 if(aminoAcidCompositionLocation != null && elementMassLocation == null){ 068 aaTable = PeptideProperties.obtainAminoAcidCompositionTable(new File(aminoAcidCompositionLocation)); 069 }else if(aminoAcidCompositionLocation != null && elementMassLocation != null){ 070 aaTable = PeptideProperties.obtainAminoAcidCompositionTable(new File(aminoAcidCompositionLocation, elementMassLocation)); 071 }else if(aminoAcidCompositionLocation == null && elementMassLocation != null){ 072 throw new Error("You have define the location of Element Mass XML file. Please also define the location of Amino Acid Composition XML file"); 073 } 074 return aaTable; 075 } 076 077 private static void readInputAndGenerateOutput(String outputLocation, List<Character> propertyList, List<Character> specificList, 078 String delimiter, String inputLocation, AminoAcidCompositionTable aaTable, int decimalPlace) throws Exception{ 079 PrintStream output; 080 if(outputLocation != null) 081 output = new PrintStream(new File(outputLocation)); 082 else 083 output = System.out; 084 printHeader(output, propertyList, specificList, delimiter); 085 LinkedHashMap<String, ProteinSequence> a = readInputFile(inputLocation, aaTable); 086 //Need for the last sequence 087 for(Entry<String, ProteinSequence> entry:a.entrySet()){ 088 compute(output, entry.getValue().getOriginalHeader(), entry.getValue().getSequenceAsString().trim(), delimiter, aaTable, propertyList, specificList, 089 decimalPlace); 090 } 091 output.close(); 092 } 093 094 public static void run(String[] args) throws Exception{ 095 /* 096 * Parse input arguments 097 */ 098 List<Character> propertyList = new ArrayList<Character>(); 099 List<Character> specificList = new ArrayList<Character>(); 100 String inputLocation = null; 101 String outputLocation = null; 102 String aminoAcidCompositionLocation = null; 103 String elementMassLocation = null; 104 String delimiter = ","; 105 int decimalPlace = 4; 106 107 for(int i = 0; i < args.length; i++){ 108 if(args[i].charAt(0) != '-' || args[i].length() != 2){ 109 showHelp(); 110 throw new Error("Unknown option: " + args[i]); 111 }else{ 112 switch(args[i].charAt(1)){ 113 //Required 114 case 'i': inputLocation = args[++i]; break; 115 //Optional 116 case 'o': outputLocation = args[++i]; break; 117 case 'f': 118 i++; 119 if(args[i].equalsIgnoreCase("csv")) delimiter = ","; 120 else if(args[i].equalsIgnoreCase("tsv")) delimiter = "\t"; 121 else throw new Error("Invalid value for -f: " + args[i] + ". Please choose either csv or tsv only."); 122 break; 123 case 'x': aminoAcidCompositionLocation = args[++i]; break; 124 case 'y': elementMassLocation = args[++i]; break; 125 case 'd': decimalPlace = Integer.parseInt(args[++i]); break; 126 //Properties 127 case 'a': 128 propertyList.add('1'); 129 propertyList.add('2'); 130 propertyList.add('3'); 131 propertyList.add('4'); 132 propertyList.add('5'); 133 propertyList.add('6'); 134 propertyList.add('7'); 135 propertyList.add('8'); 136 propertyList.add('9'); 137 break; 138 case '1': propertyList.add('1'); break; 139 case '2': propertyList.add('2'); break; 140 case '3': propertyList.add('3'); break; 141 case '4': propertyList.add('4'); break; 142 case '5': propertyList.add('5'); break; 143 case '6': propertyList.add('6'); break; 144 case '7': propertyList.add('7'); break; 145 case '8': propertyList.add('8'); break; 146 case '9': propertyList.add('9'); break; 147 case '0': 148 propertyList.add('0'); 149 i++; 150 if(args[i].length() != 1) throw new Error("Invalid value: " + args[i] + ". Amino Acid Symbol should be of single character"); 151 specificList.add(args[i].toUpperCase().charAt(0)); 152 break; 153 default: 154 showHelp(); 155 throw new Error("Unknown option: " + args[i]); 156 } 157 } 158 } 159 160 /* 161 * Check for validity of input arguments 162 */ 163 AminoAcidCompositionTable aaTable = checkForValidityAndObtainAATable(inputLocation, propertyList.size(), aminoAcidCompositionLocation, 164 elementMassLocation); 165 166 /* 167 * Read input file and generate output 168 */ 169 readInputAndGenerateOutput(outputLocation, propertyList, specificList, delimiter, inputLocation, aaTable, decimalPlace); 170 } 171 172 private static LinkedHashMap<String, ProteinSequence> readInputFile(String inputLocation, AminoAcidCompositionTable aaTable) throws Exception{ 173 FileInputStream inStream = new FileInputStream(inputLocation); 174 CompoundSet<AminoAcidCompound> set; 175 if(aaTable == null){ 176 set = CaseFreeAminoAcidCompoundSet.getAminoAcidCompoundSet(); 177 }else{ 178 set = aaTable.getAminoAcidCompoundSet(); 179 } 180 LinkedHashMap<String, ProteinSequence> ret; 181 if ( inputLocation.toLowerCase().contains(".gb")) { 182 GenbankReader<ProteinSequence, AminoAcidCompound> genbankReader = new GenbankReader<ProteinSequence, AminoAcidCompound>( 183 inStream, new GenericGenbankHeaderParser<ProteinSequence, AminoAcidCompound>(), 184 new ProteinSequenceCreator(set)); 185 ret = genbankReader.process(); 186 187 188 } else { 189 FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>( 190 inStream, new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(), 191 new ProteinSequenceCreator(set)); 192 ret = fastaReader.process(); 193 194 } 195 return ret; 196 } 197 198 public enum PropertyName{MolecularWeight, Absorbance_True, Absorbance_False, ExtinctionCoefficient_True, ExtinctionCoefficient_False, 199 InstabilityIndex, ApliphaticIndex, AverageHydropathyValue, IsoelectricPoint, NetCharge_pH_7, A, R, 200 N, D, C, E, Q, G, H, I, L, 201 K, M, F, P, S, T, W, Y, V}; 202 203 private static void printHeader(PrintStream output, List<Character> propertyList, List<Character> specificList, String delimiter) throws IOException{ 204 int specificCount = 0; 205 /* 206 * 1 Molecular weight 207 * 2 Absorbance (assumed Cys reduced and assume Cys to form cystines) 208 * 3 Extinction coefficient (assumed Cys reduced and assume Cys to form cystines) 209 * 4 Instability index 210 * 5 Apliphatic index 211 * 6 Average hydropathy value 212 * 7 Isoelectric point 213 * 8 Net charge at pH 7 214 * 9 Composition of the 20 standard amino acid 215 * 0 Composition of the specific amino acid 216 */ 217 List<String> sList = new ArrayList<String>(); 218 sList.add("SequenceName"); 219 for(Character c:propertyList){ 220 switch(c){ 221 case '1': sList.add(PropertyName.MolecularWeight.toString()); break; 222 case '2': sList.add(PropertyName.Absorbance_True.toString()); sList.add(PropertyName.Absorbance_False.toString()); break; 223 case '3': sList.add(PropertyName.ExtinctionCoefficient_True.toString()); sList.add(PropertyName.ExtinctionCoefficient_False.toString()); break; 224 case '4': sList.add(PropertyName.InstabilityIndex.toString()); break; 225 case '5': sList.add(PropertyName.ApliphaticIndex.toString()); break; 226 case '6': sList.add(PropertyName.AverageHydropathyValue.toString()); break; 227 case '7': sList.add(PropertyName.IsoelectricPoint.toString()); break; 228 case '8': sList.add(PropertyName.NetCharge_pH_7.toString()); break; 229 case '9': 230 sList.add(PropertyName.A.toString()); sList.add(PropertyName.R.toString()); 231 sList.add(PropertyName.N.toString()); sList.add(PropertyName.D.toString()); 232 sList.add(PropertyName.C.toString()); sList.add(PropertyName.E.toString()); 233 sList.add(PropertyName.Q.toString()); sList.add(PropertyName.G.toString()); 234 sList.add(PropertyName.H.toString()); sList.add(PropertyName.I.toString()); 235 sList.add(PropertyName.L.toString()); sList.add(PropertyName.K.toString()); 236 sList.add(PropertyName.M.toString()); sList.add(PropertyName.F.toString()); 237 sList.add(PropertyName.P.toString()); sList.add(PropertyName.S.toString()); 238 sList.add(PropertyName.T.toString()); sList.add(PropertyName.W.toString()); 239 sList.add(PropertyName.Y.toString()); sList.add(PropertyName.V.toString()); 240 break; 241 case '0': sList.add("" + specificList.get(specificCount++)); break; 242 } 243 } 244 for(int i = 0; i < sList.size(); i++){ 245 if(i != 0) output.print(delimiter); 246 output.print(sList.get(i)); 247 } 248 output.println(); 249 output.flush(); 250 } 251 252 private static void compute(PrintStream output, String header, String sequence, String delimiter, 253 AminoAcidCompositionTable aaTable, List<Character> propertyList, List<Character> specificList, int decimalPlace) throws CompoundNotFoundException{ 254 /* 255 * 1 Molecular weight 256 * 2 Absorbance (assumed Cys reduced and assume Cys to form cystines) 257 * 3 Extinction coefficient 258 * 4 Instability index 259 * 5 Apliphatic index 260 * 6 Average hydropathy value 261 * 7 Isoelectric point 262 * 8 Net charge at pH 7 263 * 9 Composition of the 20 standard amino acid 264 * 0 Composition of the specific amino acid 265 */ 266 ProteinSequence pSequence; 267 CompoundSet<AminoAcidCompound> aaSet; 268 if(aaTable != null){ 269 sequence = Utils.checkSequence(sequence, aaTable.getSymbolSet()); 270 pSequence = new ProteinSequence(sequence, aaTable.getAminoAcidCompoundSet()); 271 aaSet = aaTable.getAminoAcidCompoundSet(); 272 }else{ 273 sequence = Utils.checkSequence(sequence); 274 pSequence = new ProteinSequence(sequence); 275 aaSet = AminoAcidCompoundSet.getAminoAcidCompoundSet(); 276 } 277 IPeptideProperties pp = new PeptidePropertiesImpl(); 278 279 int specificCount = 0; 280 List<Double> dList = new ArrayList<Double>(); 281 for(Character c:propertyList){ 282 switch(c){ 283 case '1': 284 if(aaTable == null) 285 dList.add(pp.getMolecularWeight(pSequence)); 286 else 287 dList.add(pp.getMolecularWeight(pSequence)); 288 break; 289 case '2': 290 dList.add(pp.getAbsorbance(pSequence, true)); 291 dList.add(pp.getAbsorbance(pSequence, false)); 292 break; 293 case '3': 294 dList.add(pp.getExtinctionCoefficient(pSequence, true)); 295 dList.add(pp.getExtinctionCoefficient(pSequence, false)); 296 break; 297 case '4': dList.add(pp.getInstabilityIndex(pSequence)); break; 298 case '5': dList.add(pp.getApliphaticIndex(pSequence)); break; 299 case '6': dList.add(pp.getAvgHydropathy(pSequence)); break; 300 case '7': dList.add(pp.getIsoelectricPoint(pSequence)); break; 301 case '8': dList.add(pp.getNetCharge(pSequence)); break; 302 case '9': 303 Map<AminoAcidCompound, Double> aaCompound2Double = pp.getAAComposition(pSequence); 304 //(A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V) 305 dList.add(aaCompound2Double.get(Constraints.A)); 306 dList.add(aaCompound2Double.get(Constraints.R)); 307 dList.add(aaCompound2Double.get(Constraints.N)); 308 dList.add(aaCompound2Double.get(Constraints.D)); 309 dList.add(aaCompound2Double.get(Constraints.C)); 310 dList.add(aaCompound2Double.get(Constraints.E)); 311 dList.add(aaCompound2Double.get(Constraints.Q)); 312 dList.add(aaCompound2Double.get(Constraints.G)); 313 dList.add(aaCompound2Double.get(Constraints.H)); 314 dList.add(aaCompound2Double.get(Constraints.I)); 315 dList.add(aaCompound2Double.get(Constraints.L)); 316 dList.add(aaCompound2Double.get(Constraints.K)); 317 dList.add(aaCompound2Double.get(Constraints.M)); 318 dList.add(aaCompound2Double.get(Constraints.F)); 319 dList.add(aaCompound2Double.get(Constraints.P)); 320 dList.add(aaCompound2Double.get(Constraints.S)); 321 dList.add(aaCompound2Double.get(Constraints.T)); 322 dList.add(aaCompound2Double.get(Constraints.W)); 323 dList.add(aaCompound2Double.get(Constraints.Y)); 324 dList.add(aaCompound2Double.get(Constraints.V)); 325 break; 326 case '0': dList.add(pp.getEnrichment(pSequence, aaSet.getCompoundForString("" + specificList.get(specificCount++)))); break; 327 } 328 } 329 output.print(header.replace(delimiter, "_")); 330 for(int i = 0; i < dList.size(); i++){ 331 output.print(delimiter + Utils.roundToDecimals(dList.get(i), decimalPlace)); 332 } 333 output.println(); 334 output.flush(); 335 } 336 337 private static void showHelp(){ 338 System.err.println("NAME"); 339 System.err.println("\tAn executable to generate physico-chemical properties of protein sequences."); 340 System.err.println(); 341 342 System.err.println("EXAMPLES"); 343 System.err.println("\tjava -jar AAProperties.jar -i test.fasta -a"); 344 System.err.println("\t\tGenerates all possible properties."); 345 System.err.println(); 346 System.err.println("\tjava -jar AAProperties.jar -i test.fasta -1 -3 -7"); 347 System.err.println("\t\tGenerates only molecular weight, extinction coefficient and isoelectric point."); 348 System.err.println(); 349 System.err.println("\tjava -jar AAProperties.jar -i test.fasta -0 A -0 N -1"); 350 System.err.println("\t\tGenerates composition of two specific amino acid symbol and molecular weight."); 351 System.err.println(); 352 353 System.err.println("OPTIONS"); 354 System.err.println("\tRequired"); 355 System.err.println("\t\t-i location of input FASTA file"); 356 System.err.println(); 357 358 System.err.println("\tOptional"); 359 System.err.println("\t\t-o location of output file [standard output (default)]"); 360 System.err.println("\t\t-f output format [csv (default) or tsv]"); 361 System.err.println("\t\t-x location of Amino Acid Composition XML file for defining amino acid composition"); 362 System.err.println("\t\t-y location of Element Mass XML file for defining mass of elements"); 363 System.err.println("\t\t-d number of decimals (int) [4 (default)]"); 364 System.err.println(); 365 366 System.err.println("\tProvide at least one of them"); 367 System.err.println("\t\t-a compute properties of option 1-9"); 368 System.err.println("\t\t-1 compute molecular weight"); 369 System.err.println("\t\t-2 compute absorbance"); 370 System.err.println("\t\t-3 compute extinction coefficient"); 371 System.err.println("\t\t-4 compute instability index"); 372 System.err.println("\t\t-5 compute apliphatic index"); 373 System.err.println("\t\t-6 compute average hydropathy value"); 374 System.err.println("\t\t-7 compute isoelectric point"); 375 System.err.println("\t\t-8 compute net charge at pH 7"); 376 System.err.println("\t\t-9 compute composition of 20 standard amino acid (A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V)"); 377 System.err.println("\t\t-0 compute composition of specific amino acid symbol"); 378 System.err.println(); 379 } 380}