今天偶然间看到一篇关于 Java 爬虫入门的博客,想到以前就学过一点爬虫,于是乎就在博客的基础上写了一个 demo,用来爬取慕课网的实战课程。
首先需要发送 HTTP 请求到网页,用到了 HttpURLConnection 类,具体如下:
package util;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
/**
* @author yingming006
* Date: 2019/6/22
*/
public class ConnectionUtil {
public static String Connect(String address) {
URL url = null;
HttpURLConnection conn = null;
InputStream in = null;
BufferedReader reader = null;
StringBuffer stringBuffer = null;
try {
url = new URL(address);
// 得到 connection 对象
conn = (HttpURLConnection) url.openConnection();
// 建立连接
conn.connect();
// 获取输入流
in = conn.getInputStream();
reader = new BufferedReader(new InputStreamReader(in));
stringBuffer = new StringBuffer();
String line = null;
while ((line = reader.readLine()) != null) {
stringBuffer.append(line);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
conn.disconnect();