891
技術社區[雲棲]
使用Gzip加速網頁的傳輸
博學,切問,近思--詹子知 (https://jameszhan.github.io)
日前筆者在使用HttpClient在處理大數據請求的時候,在連續發請求的時候經常會出現異常 java.io.IOException: chunked stream ended unexpectedly。使用HttpMethod的abort方法也不能完全避免這種異常的出現,但是對於小數據的請求,這種異常就基本上難得一見了。對於同樣的頁麵請求,如何減少網絡的數據傳輸量呢。眾所周知,現在大部分的Web Server都是支持數據的壓縮傳輸的。要知道,一般的網頁內容經過壓縮,大小可以減少到原來的20%以下,而對於純英文為網站,網頁內容更是可以減少到原來內容的5%以下。而要使Web Server對數據進行壓縮傳輸,隻需要在請求頭上加入Accept-Encoding:gzip, deflate。public HttpMethod createHttpMethod(String url, String type, NameValuePair[] params, String contentType) { HttpMethod method = null; if (type.equalsIgnoreCase("POST")) { method = new PostMethod(url); method.setRequestHeader("Content-Type", contentType); if(params != null){ ((PostMethod) method).setRequestBody(params); } } else { method = new GetMethod(url); if(params != null){ method.setQueryString(params); } } method.setRequestHeader("Accept-Encoding", "gzip, deflate"); return method; }
這個時候,如果你請求的Web Server支持Gzip,返回來的響應便是被壓縮後的數據,那麼把壓縮後的數據解析成原來的網頁內容便是客戶端要做的事情了。對於當前的主流瀏覽器,都是支持對壓縮數據自動解壓的,而在我們的應用程序中,我們隻要對象網頁流稍作處理,便可以得到原來的網頁內容。
protected String doSuccess(HttpMethod method) throws IOException { InputStream in = method.getResponseBodyAsStream(); Header contentEncodingHeader = method.getResponseHeader("Content-Encoding"); if (contentEncodingHeader != null) { String contentEncoding = contentEncodingHeader.getValue(); if (contentEncoding.toLowerCase(Locale.US).indexOf("gzip") != -1) { in = new GZIPInputStream(in); } } return decoder.decode(in); }
上一篇文章,我們介紹了如何檢查文檔輸入流的編碼,本節我們就可以利用上文的HtmlInputStreamDecoder類來把文檔流來解析文檔內容。完整的代碼如下:
import java.io.IOException; import java.io.InputStream; import java.util.Locale; import java.util.zip.GZIPInputStream; import org.apache.commons.httpclient.Header; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpMethod; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.httpclient.URI; import org.apache.commons.httpclient.URIException; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.methods.PostMethod; import org.apache.log4j.Logger; public class HttpRequest { private static final Logger LOGGER = Logger.getLogger(HttpRequest.class); private HttpClient client; private HtmlInputStreamDecoder decoder; public HttpRequest() { this(new HttpClient(new MultiThreadedHttpConnectionManager())); } public HttpRequest(HttpClient client) { this.client = client; this.decoder = new HtmlInputStreamDecoder(); } public String doRequest(HttpMethod method) { String html = null; try { int statusCode = client.executeMethod(method); switch (statusCode) { case HttpStatus.SC_OK: html = doSuccess(method); break; case HttpStatus.SC_MOVED_PERMANENTLY: case HttpStatus.SC_MOVED_TEMPORARILY: case HttpStatus.SC_SEE_OTHER: case HttpStatus.SC_TEMPORARY_REDIRECT: doRedirect(method); break; default: html = doError(method); } } catch (HttpException e) { LOGGER.error("Http error occur while visit the url.", e); } catch (IOException e) { LOGGER.error("IO error occur while visit the url.", e); } finally { method.abort(); method.releaseConnection(); } return html; } protected String doSuccess(HttpMethod method) throws IOException { InputStream in = method.getResponseBodyAsStream(); Header contentEncodingHeader = method.getResponseHeader("Content-Encoding"); if (contentEncodingHeader != null) { String contentEncoding = contentEncodingHeader.getValue(); if (contentEncoding.toLowerCase(Locale.US).indexOf("gzip") != -1) { in = new GZIPInputStream(in); } } return decoder.decode(in); } protected String doError(HttpMethod method) { LOGGER.error("Error Response: " + method.getStatusLine()); return method.getStatusText(); } protected void doRedirect(HttpMethod method) throws URIException { Header locationHeader = method.getResponseHeader("location"); if (locationHeader != null) { String location = locationHeader.getValue(); if (location == null) { location = "/"; } doRequest(new GetMethod(getRedirectUrl(method.getURI(), location))); } } public HttpMethod createHttpMethod(String url, String type, NameValuePair[] params, String contentType) { HttpMethod method = null; if (type.equalsIgnoreCase("POST")) { method = new PostMethod(url); method.setRequestHeader("Content-Type", contentType); if(params != null){ ((PostMethod) method).setRequestBody(params); } } else { method = new GetMethod(url); if(params != null){ method.setQueryString(params); } } method.setRequestHeader("Accept-Encoding", "gzip, deflate"); return method; } protected static String getRedirectUrl(URI origin, String location) throws URIException { String redirect = null; if (location.startsWith("http:")) { redirect = location; } else if (location.startsWith("/")) { origin.setPath(location); redirect = origin.getURI(); } else { redirect = origin.getURI().replaceAll("(?<=/)[^/]+$", location); } return redirect; } }
代碼示例:
public static void main(String[] args) { HttpRequest request = new HttpRequest(); HttpMethod method = request.createHttpMethod("https://www.csdn.com", "GET", null, "text/html"); String html = request.doRequest(method); System.out.println(html); }
最後更新:2017-04-02 04:00:25