清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
package com.jsoup;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.Writer;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Douban {
private static String path = "E:photo.html";
private static String url = "http://www.douban.com/photos/album/64180843/";
private static String headerName = "User-Agent";
private static String headerValue = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3";
private static String div_photo_wrap = ".photo_wrap";
private static String divp_photolst_photop = ".photolst_photo";
private static final int TOTAL_PAGE_SIZE = 29;
private static final int PAGE_SIZE = 18;
public static void select(String url, int start, Writer writer)
throws IOException {
DefaultHttpClient client = new DefaultHttpClient();
url = url + "?start=" + start;
HttpGet get = new HttpGet(url);
get.setHeader(headerName, headerValue);
HttpResponse response = client.execute(get);
InputStream stream = response.getEntity().getContent();
String html = IOUtils.toString(stream);
Document doc = Jsoup.parse(html);
Elements tweets = doc.body().select(div_photo_wrap)
.select(divp_photolst_photop);
for (Element tweet : tweets) {
String href = tweet.attr("href");
// String src=tweet.children().attr("src");
String id = tweet.attr("title");
// System.out.println("photo\t"+photo);
// System.out.println(tweet.outerHtml().replace(href, id));
writer.append(tweet.outerHtml().replace(href, id));
writer.flush();
// System.out.println("id\t"+id);
// System.out.println("src\t"+src);
}
}
public static void main(String[] args) throws IOException {
BufferedWriter writer = new BufferedWriter(new FileWriter(path));
String html = "<html><body>";
writer.append(html);
for (int i = 0; i < TOTAL_PAGE_SIZE; i++) {
int start = PAGE_SIZE * i;
Douban.select(url, start, writer);
}
writer.append("</body></html>");
writer.close();
System.out.println("*****************Finish*****************");
}
}