001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.ws.hmmer; 022 023import net.sf.json.JSONArray; 024import net.sf.json.JSONObject; 025import org.biojava.nbio.core.sequence.ProteinSequence; 026import org.slf4j.Logger; 027import org.slf4j.LoggerFactory; 028 029import java.io.*; 030import java.net.HttpURLConnection; 031import java.net.URL; 032import java.util.SortedSet; 033import java.util.TreeSet; 034 035 036/** 037 * Makes remote calls to the HMMER web service at the EBI web site and returns Pfam domain annotations for an input protein sequence. 038 * 039 * @author Andreas Prlic 040 * @since 3.0.3 041 */ 042public class RemoteHmmerScan implements HmmerScan { 043 044 private static final Logger LOGGER = LoggerFactory.getLogger(RemoteHmmerScan.class); 045 046 public static final String HMMER_SERVICE = "https://www.ebi.ac.uk/Tools/hmmer/search/hmmscan"; 047 048 public RemoteHmmerScan(){ 049 050 } 051 052 053 @Override 054 public SortedSet<HmmerResult> scan(ProteinSequence sequence) throws IOException { 055 056 URL url = new URL(HMMER_SERVICE); 057 058 return scan(sequence, url); 059 060 } 061 062 /** 063 * Scans a protein sequence for Pfam profile matches. 064 * 065 * @param sequence 066 * @param serviceLocation 067 * @return 068 * @throws IOException 069 */ 070 public SortedSet<HmmerResult> scan(ProteinSequence sequence, URL serviceLocation) throws IOException{ 071 072 StringBuffer postContent = new StringBuffer(); 073 074 postContent.append("hmmdb=pfam"); 075 076 077 // by default hmmscan runs with the HMMER3 cut_ga parameter enabled, the "gathering threshold", which depends on 078 // the cutoffs defined in the underlying HMM files. 079 // to request a different cutoff by e-value this could be enabled: 080 //postContent.append("&E=1"); 081 082 083 postContent.append("&seq="); 084 postContent.append(sequence.getSequenceAsString()); 085 086 087 HttpURLConnection connection = (HttpURLConnection) serviceLocation.openConnection(); 088 connection.setDoOutput(true); 089 connection.setDoInput(true); 090 connection.setConnectTimeout(15000); // 15 sec 091 connection.setInstanceFollowRedirects(false); 092 connection.setRequestMethod("POST"); 093 connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); 094 095 connection.setRequestProperty("Accept","application/json"); 096 097 connection.setRequestProperty("Content-Length", "" + 098 Integer.toString(postContent.toString().getBytes().length)); 099 100 //Send request 101 DataOutputStream wr = new DataOutputStream ( 102 connection.getOutputStream ()); 103 wr.write(postContent.toString().getBytes()); 104 wr.flush (); 105 wr.close (); 106 107 108 //Now get the redirect URL 109 URL respUrl = new URL( connection.getHeaderField( "Location" )); 110 111 int responseCode = connection.getResponseCode(); 112 if ( responseCode == 500){ 113 LOGGER.warn("Got 500 response code for URL {}. Response message: {}.", serviceLocation, connection.getResponseMessage()); 114 } 115 116 HttpURLConnection connection2 = (HttpURLConnection) respUrl.openConnection(); 117 connection2.setRequestMethod("GET"); 118 connection2.setRequestProperty("Accept", "application/json"); 119 connection2.setConnectTimeout(60000); // 1 minute 120 121 //Get the response 122 BufferedReader in = new BufferedReader( 123 new InputStreamReader( 124 connection2.getInputStream())); 125 126 String inputLine; 127 128 StringBuffer result = new StringBuffer(); 129 while ((inputLine = in.readLine()) != null) { 130 result.append(inputLine); 131 } 132 133 in.close(); 134 135 // process the response and build up a container for the data. 136 137 SortedSet<HmmerResult> results = new TreeSet<>(); 138 try { 139 JSONObject json = JSONObject.fromObject(result.toString()); 140 141 JSONObject hmresults = json.getJSONObject("results"); 142 143 144 JSONArray hits = hmresults.getJSONArray("hits"); 145 146 for(int i =0 ; i < hits.size() ; i++){ 147 JSONObject hit = hits.getJSONObject(i); 148 149 HmmerResult hmmResult = new HmmerResult(); 150 151 Object dclO = hit.get("dcl"); 152 Integer dcl = -1; 153 if ( dclO instanceof Long){ 154 Long dclL = (Long) dclO; 155 dcl = dclL.intValue(); 156 } else if ( dclO instanceof Integer){ 157 dcl = (Integer) dclO; 158 } 159 160 161 hmmResult.setAcc((String)hit.get("acc")); 162 hmmResult.setDcl(dcl); 163 hmmResult.setDesc((String)hit.get("desc")); 164 hmmResult.setEvalue(Float.parseFloat((String)hit.get("evalue"))); 165 hmmResult.setName((String)hit.get("name")); 166 hmmResult.setNdom((Integer)hit.get("ndom")); 167 hmmResult.setNreported((Integer)hit.get("nreported")); 168 hmmResult.setPvalue((Double)hit.get("pvalue")); 169 hmmResult.setScore(Float.parseFloat((String)hit.get("score"))); 170 171 JSONArray hmmdomains = hit.getJSONArray("domains"); 172 173 SortedSet<HmmerDomain> domains = new TreeSet<>(); 174 for ( int j= 0 ; j < hmmdomains.size() ; j++){ 175 JSONObject d = hmmdomains.getJSONObject(j); 176 Integer is_included = getInteger(d.get("is_included")); 177 if ( is_included == 0) { 178 continue; 179 } 180 181 182 // this filters out multiple hits to the same clan 183 Integer outcompeted = getInteger(d.get("outcompeted")); 184 if ( outcompeted != null && outcompeted == 1) { 185 continue; 186 } 187 188 Integer significant = getInteger(d.get("significant")); 189 190 if ( significant != 1) { 191 continue; 192 } 193 194 HmmerDomain dom = new HmmerDomain(); 195 dom.setAliLenth((Integer)d.get("aliL")); 196 dom.setHmmAcc((String)d.get("alihmmacc")); 197 dom.setHmmDesc((String)d.get("alihmmdesc")); 198 199 dom.setHmmFrom(getInteger(d.get("alihmmfrom"))); 200 dom.setHmmTo(getInteger(d.get("alihmmto"))); 201 dom.setSimCount((Integer) d.get("aliSimCount")); 202 dom.setSqFrom(getInteger(d.get("alisqfrom"))); 203 dom.setSqTo(getInteger(d.get("alisqto"))); 204 dom.setHmmName((String)d.get("alihmmname")); 205 dom.setEvalue(Float.parseFloat((String)d.get("ievalue"))); 206 207 domains.add(dom); 208 209 210 } 211 212 hmmResult.setDomains(domains); 213 214 results.add(hmmResult); 215 } 216 } catch (NumberFormatException e){ 217 LOGGER.warn("Could not parse number in Hmmer web service json response: {}", e.getMessage()); 218 } 219 220 return results; 221 222 } 223 224 225 private Integer getInteger(Object object) { 226 if ( object instanceof Integer) 227 return (Integer) object; 228 else if( object instanceof String) 229 return Integer.parseInt((String) object); 230 231 return null; 232 } 233 234}