用HttpComponents下载网页

周末无聊,简单了解了下Apache基金会的HttpComponents,它是原Apache下jakarta下Commons下的一个子项目HttpClient,现在独立了。虽然看标题用这玩意下网页很自然,可它的Tutorial有一句话:它不是HttpClient的替代者,确实用这玩意下网页浪费了。简单地看了下HttpClient代码发现也是基于HttpCore,这次在只使用HttpCore下写个小型的下载网页工具,不废话了,直接上代码。(@鱼尾Swing 有木有代码高亮工具啊)

package net.swingworks.www;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.Socket;
import java.net.URL;
import org.apache.http.*;
import org.apache.http.impl.*;
import org.apache.http.message.BasicHttpRequest;
import org.apache.http.params.*;
import org.apache.http.protocol.*;

/**
 *
 * @author fuzzytalker
 */
public class DownloadFile {

    public static void down(String address, String local) {
        URL url;
        try {
            url = new URL(address);
        } catch (MalformedURLException ex) {
            ex.printStackTrace();
            System.out.println("不合法的URL:" + address);
            return;
        }
        InputStream in;
        try {
            in = down(url);
        } catch (Exception ex) {
            ex.printStackTrace();
            System.out.println("无法下载URL:" + address);
            return;
        }
        try {
            File file = new File(local);
            if (!file.getParentFile().exists()) {
                file.getParentFile().mkdirs();
            }

            //file.createNewFile();
            OutputStream out = new FileOutputStream(file);
            write(in, out);
        } catch (Exception ex) {
            ex.printStackTrace();
            System.out.println("写入文件失败:" + local);
            return;
        }
        System.out.println(url + " 已下载到 " + local);
    }

    public static InputStream down(URL url) throws Exception {
        HttpParams params = new BasicHttpParams();
        HttpProcessor httpproc = new ImmutableHttpProcessor(new HttpRequestInterceptor[]{
            new RequestContent(),
            new RequestTargetHost(),
            new RequestConnControl(),
            new RequestUserAgent(),
            new RequestExpectContinue()});
        HttpRequestExecutor httpexecutor = new HttpRequestExecutor();
        HttpContext context = new BasicHttpContext();
        DefaultHttpClientConnection conn = new DefaultHttpClientConnection();
        context.setAttribute(ExecutionContext.HTTP_CONNECTION, conn);

        Socket socket = new Socket(url.getHost(), url.getPort() < 0 ? 80 : url.getPort());
        conn.bind(socket, params);

        BasicHttpRequest request = new BasicHttpRequest("GET", url.getFile());
        request.setParams(params);
        httpexecutor.preProcess(request, httpproc, context);
        HttpResponse response = httpexecutor.execute(request, conn, context);
        response.setParams(params);
        httpexecutor.postProcess(response, httpproc, context);

        return response.getEntity().getContent();
    }

    public static void write(InputStream in, OutputStream out) throws IOException {
        byte[] b = new byte[2048];
        int read;
        while ((read = in.read(b)) != -1) {
            out.write(b, 0, read);
        }
        out.close();
        in.close();
    }
}

本代码基于HttpComponents 4.2 alpha1,可以从 http://hc.apache.org/downloads.cgi 下载

fuzzytalker

国内某综合类学校85后。常年戴啤酒瓶底。除写代码、听音乐和睡觉外无不良嗜好。积极友情置身工坊工作。

More Posts

3 Responses

Leave a Reply