HttpComponents組件探究 - HttpClient篇
在Java領域,談到網絡編程,可能大家腦海裏第一反應就是MINA,NETTY,GRIZZLY等優秀的開源框架。沒錯,不過在深入探究這些框架之前,我們需要先從最original的技術探究開始(當然,需要大家先熟悉java.net.*類庫)。這裏,我要和大家分享一下HttpComponents項目的部分組件特性。HttpClient,想必大家早都接觸過了吧。HttpComponents和HttpClient的”血緣“有點像guava和google-collection的關係。目前,HttpComponents已經是Apache的頂級項目了,它旨在為我們提供一個Http協議相關的Java平台工具集。它的代碼組織很精妙,主要分兩部分,一部分是核心工具集(包括HttpCore-bio,HttpCore-nio,HttpClient,HttpMIme,HttpCookie等),一部分是擴展工具集(目前主要包括ssl)
HttpClient主要包括Connection management,Status management,Authentication Management三部分。下麵給出對它的二次封裝,經過了線上的接近半年的驗證(這裏指的是httpClient 3,httpClient 4還有待檢驗),可以看做是一個高性能的Client封裝吧。感興趣的朋友可以根據apache的MPM IO模型進行部分參數的調整。
先來段httpClient 4的封裝,代碼如下:
/** * @author von gosling 2012-3-2 */ public class HttpComponentsClientExecutor implements DisposableBean { private static final int DEFAULT_MAX_TOTAL_CONNECTIONS = 100; private static final int DEFAULT_MAX_CONNECTIONS_PER_ROUTE = 5; //notice IE 6,7,8 private static final int DEFAULT_CONN_TIMEOUT_MILLISECONDS = 5 * 1000; private static final int DEFAULT_READ_TIMEOUT_MILLISECONDS = 60 * 1000; private static final String HTTP_HEADER_CONTENT_ENCODING = "Content-Encoding"; private static final String ENCODING_GZIP = "gzip"; private HttpClient httpClient; /** * Create a new instance of the HttpComponentsClient with a default * {@link HttpClient} that uses a default * {@link org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager}. */ public HttpComponentsClientExecutor() { SchemeRegistry schemeRegistry = new SchemeRegistry(); schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory())); schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory())); ThreadSafeClientConnManager connectionManager = new ThreadSafeClientConnManager( schemeRegistry); connectionManager.setMaxTotal(DEFAULT_MAX_TOTAL_CONNECTIONS); connectionManager.setDefaultMaxPerRoute(DEFAULT_MAX_CONNECTIONS_PER_ROUTE); this.httpClient = new DefaultHttpClient(connectionManager); setConnectTimeout(DEFAULT_CONN_TIMEOUT_MILLISECONDS); setReadTimeout(DEFAULT_READ_TIMEOUT_MILLISECONDS); } /** * Create a new instance of the HttpComponentsClient with the given * {@link HttpClient} instance. * * @param httpClient the HttpClient instance to use for this request */ public HttpComponentsClientExecutor(HttpClient httpClient) { Validate.notNull(httpClient, "HttpClient must not be null"); //notice: if you want to custom exception recovery mechanism //you should provide an implementation of the HttpRequestRetryHandler interface. this.httpClient = httpClient; } /** * Set the {@code HttpClient} used by this request. */ public void setHttpClient(HttpClient httpClient) { this.httpClient = httpClient; } /** * Return the {@code HttpClient} used by this request. */ public HttpClient getHttpClient() { return this.httpClient; } /** * Set the connection timeout for the underlying HttpClient. A timeout value * of 0 specifies an infinite timeout. * * @param timeout the timeout value in milliseconds */ public void setConnectTimeout(int timeout) { Validate.isTrue(timeout >= 0, "Timeout must be a non-negative value"); getHttpClient().getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); } /** * Set the socket timeout (SO_TIMEOUT) in milliseconds, which is the timeout * for waiting for data or, put differently, a maximum period inactivity * between two consecutive data packets.A timeout value of 0 specifies an * infinite timeout. * * @param timeout the timeout value in milliseconds */ public void setReadTimeout(int timeout) { Validate.isTrue(timeout >= 0, "Timeout must be a non-negative value"); getHttpClient().getParams().setIntParameter(CoreConnectionPNames.SO_TIMEOUT, timeout); } /** * Create a Commons HttpMethodBase object for the given HTTP method and URI * specification. * * @param httpMethod the HTTP method * @param uri the URI * @return the Commons HttpMethodBase object */ protected HttpUriRequest createHttpUriRequest(HttpMethod httpMethod, URI uri) { switch (httpMethod) { case GET: return new HttpGet(uri); case DELETE: return new HttpDelete(uri); case HEAD: return new HttpHead(uri); case OPTIONS: return new HttpOptions(uri); case POST: return new HttpPost(uri); case PUT: return new HttpPut(uri); case TRACE: return new HttpTrace(uri); default: throw new IllegalArgumentException("Invalid HTTP method: " + httpMethod); } } /** * Execute the given method on the provided URI. * * @param method the HTTP method to execute (GET, POST, etc.) * @param url the fully-expanded URL to connect to * @param responseHandler httpClient will automatically take care of * ensuring release of the connection back to the connection * manager regardless whether the request execution succeeds or * causes an exception,if using this response handler * @return an response object's string representation * @throws IOException * @throws ClientProtocolException */ public String doExecuteRequest(HttpMethod httpMethod, URI uri, ResponseHandler<String> responseHandler) throws ClientProtocolException, IOException { return httpClient.execute(createHttpUriRequest(httpMethod, uri), responseHandler); } public InputStream doExecuteRequest(HttpMethod httpMethod, URI uri) throws ClientProtocolException, IOException { //1. HttpUriRequest httpUriRequest = createHttpUriRequest(httpMethod, uri); //2. HttpResponse response = httpClient.execute(httpUriRequest); //3. validateResponse(response); //4. return getResponseBody(response); } /** * Validate the given response, throwing an exception if it does not * correspond to a successful HTTP response. * <p> * Default implementation rejects any HTTP status code beyond 2xx, to avoid * parsing the response body and trying to deserialize from a corrupted * stream. * * @param config the HTTP invoker configuration that specifies the target * service * @param response the resulting HttpResponse to validate * @throws NoHttpResponseException * @throws java.io.IOException if validation failed */ protected void validateResponse(HttpResponse response) throws IOException { StatusLine status = response.getStatusLine(); if (status.getStatusCode() >= 300) { throw new NoHttpResponseException( "Did not receive successful HTTP response: status code = " + status.getStatusCode() + ", status message = [" + status.getReasonPhrase() + "]"); } } /** * Extract the response body * <p> * The default implementation simply fetches the response body stream. If * the response is recognized as GZIP response, the InputStream will get * wrapped in a GZIPInputStream. * * @param httpResponse the resulting HttpResponse to read the response body * from * @return an InputStream for the response body * @throws java.io.IOException if thrown by I/O methods * @see #isGzipResponse * @see java.util.zip.GZIPInputStream */ protected InputStream getResponseBody(HttpResponse httpResponse) throws IOException { if (isGzipResponse(httpResponse)) { return new GZIPInputStream(httpResponse.getEntity().getContent()); } else { return httpResponse.getEntity().getContent(); } } /** * Determine whether the given response indicates a GZIP response. * <p> * The default implementation checks whether the HTTP "Content-Encoding" * header contains "gzip" (in any casing). * * @param httpResponse the resulting HttpResponse to check * @return whether the given response indicates a GZIP response */ protected boolean isGzipResponse(HttpResponse httpResponse) { Header encodingHeader = httpResponse.getFirstHeader(HTTP_HEADER_CONTENT_ENCODING); return (encodingHeader != null && encodingHeader.getValue() != null && encodingHeader .getValue().toLowerCase().contains(ENCODING_GZIP)); } /** * Shutdown hook that closes the underlying * {@link org.apache.http.conn.ClientConnectionManager * ClientConnectionManager}'s connection pool, if any. */ public void destroy() { getHttpClient().getConnectionManager().shutdown(); } enum HttpMethod { GET, POST, HEAD, OPTIONS, PUT, DELETE, TRACE } }
下麵是久經考驗的httpClient 3的二次封裝,如下:
/** * @author von gosling 2011-12-12 */ public class HttpClientUtils { private static final Logger log = LoggerFactory .getLogger(HttpClientUtils.class); private static int timeOut = 100; private static int retryCount = 1; private static int connectionTimeout = 100; private static int maxHostConnections = 32; //根據apache work MPM設置此值 private static int maxTotalConnections = 512; //同上 private static String charsetName = "UTF-8"; public static JSONObject executeMethod(HttpClient httpClient, HttpMethod method) { JSONObject result = new JSONObject(); StopWatch watch = new StopWatch(); int status = -1; try { log.info("Execute method({}) begin...", method.getURI()); watch.start(); status = httpClient.executeMethod(method); watch.stop(); if (status == HttpStatus.SC_OK) { InputStream inputStream = method.getResponseBodyAsStream(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); IOUtils.copy(inputStream, baos); String response = new String(baos.toByteArray(), charsetName); log.info("Response is:{}", response); result = JSONObject.parseObject(response); } else { log.error("Http request failure! status is {}", status); } } catch (SocketTimeoutException e) { log.error("Request time out!");//隻關注請求超時,對於其它兩類超時,使用通用異常捕獲 } catch (Exception e) { log.error("Error occur!", e); } finally { method.releaseConnection(); log.info("Method {},statusCode {},consuming {} ms", new Object[] { method.getName(), status, watch.getTime() }); } return result; } /** * @param uri * @param nameValuePairs * @return */ public static PostMethod createPostMethod(String uri, NameValuePair[] nameValuePairs) { PostMethod method = new PostMethod(uri); method.addParameters(nameValuePairs); method.getParams().setContentCharset(charsetName); return method; } /** * @param uri * @param nameValuePairs * @return */ public static GetMethod createGetMethod(String uri, NameValuePair[] nameValuePairs) { GetMethod method = new GetMethod(uri); List<NameValuePair> list = Lists.newArrayList(); if (nameValuePairs != null) { Collections.addAll(list, nameValuePairs); method.setQueryString(list.toArray(new NameValuePair[nameValuePairs.length])); } method.getParams().setContentCharset(charsetName); return method; } public static HttpClient createHttpClient() { //1. HttpClient httpClient = new HttpClient(new MultiThreadedHttpConnectionManager()); //2. HttpConnectionManagerParams httpConnectionManagerParams = httpClient .getHttpConnectionManager().getParams(); httpConnectionManagerParams.setConnectionTimeout(connectionTimeout); httpConnectionManagerParams.setTcpNoDelay(true);//Nagle's algorithm httpConnectionManagerParams.setSoTimeout(timeOut); httpConnectionManagerParams.setDefaultMaxConnectionsPerHost(maxHostConnections); httpConnectionManagerParams.setMaxTotalConnections(maxTotalConnections); //3. HttpClientParams httpClientParam = httpClient.getParams(); //httpClientParam.setConnectionManagerTimeout(connectionTimeout);//暫且不關注這個超時設置,後麵根據性能酌情考慮 httpClientParam.setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(retryCount, false)); httpClientParam.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); return httpClient; } public static JSONObject doGet(String url, NameValuePair[] params) { return executeMethod(createHttpClient(), createGetMethod(url, params)); } public static JSONObject doPost(String url, NameValuePair[] params) { return executeMethod(createHttpClient(), createPostMethod(url, params)); } protected HttpClientUtils() { } public void setTimeOut(int timeOut) { HttpClientUtils.timeOut = timeOut; } public static int getTimeOut() { return timeOut; } public static int getRetryCount() { return retryCount; } public void setRetryCount(int retryCount) { HttpClientUtils.retryCount = retryCount; } public static int getConnectionTimeout() { return connectionTimeout; } public void setConnectionTimeout(int connectionTimeout) { HttpClientUtils.connectionTimeout = connectionTimeout; } public static int getMaxHostConnections() { return maxHostConnections; } public void setMaxHostConnections(int maxHostConnections) { HttpClientUtils.maxHostConnections = maxHostConnections; } public static int getMaxTotalConnections() { return maxTotalConnections; } public void setMaxTotalConnections(int maxTotalConnections) { HttpClientUtils.maxTotalConnections = maxTotalConnections; } public static String getCharsetName() { return charsetName; } public void setCharsetName(String charsetName) { HttpClientUtils.charsetName = charsetName; } }
好了,有了活生生的代碼,我們來總結一下httpClient封裝過程中需要注意的一些事項吧。恩,其實更多的是體現在安全,性能上麵:
(1)多線程模型,尤其注意finally中collection的釋放問題。除此之外,需要考慮池化連接的異常處理,這是我文中提到特別注意的三大異常之一;
(2)Retry機製中對冪等性的處理。尤其是在httpClient4中,put和post操作,未按照http規範行事,需要我們額外注意;
(3)SSL、TLS的定製化處理;
(4)並發標記的處理,這裏使用了Concurrency in practice中的並發annotation,有什麼用?感興趣的朋友可以了解下SureLogic(https://www.surelogic.com/concurrency-tools.html),別問我要license,因為俺也不是apache開源社區的developer呀;
(5)攔截器對header的處理;
(6)collection stale check機製;
(7)Cookie specification choose或者是自定義實現;
恩,今天就寫到這裏吧。感謝大家的閱讀,如果哪裏有疑問,歡迎留言~
參考文獻:
1.https://www.w3.org/Protocols/rfc2616/rfc2616-sec7.html
2.https://hc.apache.org/httpcomponents-client-ga/tutorial/pdf/httpclient-tutorial.pdf
最後更新:2017-04-02 22:16:33