取网页源码

1
2
3
4
5
6
String line = "", source = "", url = "https://www.qidian.com/all";
HttpURLConnection l_connection = (HttpURLConnection) (new URL(url)).openConnection();
l_connection.connect();
InputStreamReader isr = new InputStreamReader(l_connection.getInputStream(), "UTF-8");
BufferedReader l_reader = new BufferedReader(isr);
while ((line = l_reader.readLine()) != null) source+=line;

取源码后截取中间文本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
int startPos = source.indexOf("左边文本", endPos);
int endPos = source.indexOf("右边文本", startPos+10);
if (endPos == 0 || endPos == -1)
endPos = source.length();
String part = source.substring(startPos, endPos);

String bookID, bookUrl, bookName;
String regEx = "要匹配的正则表达式";
Pattern p = Pattern.compile(regEx, Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(part);
while (m.find()){
String a = m.group(1);
String b = m.group(2);
}