java抓取图片_java 抓取网页的图片
//只能抓取一部分圖片,像折800有些子路徑的一行圖片代碼有好多個img,而且排列不規(guī)律,我的能力根本就沒法截取下來
package test;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class getImageByUrl4 {
/**
* @param args
*/
private List imageUrl = new ArrayList();//用于存儲圖片的url
private int count = 0;//圖片計數(shù)器
public static void main(String[] args) {
String netUrl = "http://www.zhe800.com";//要爬的網(wǎng)頁
new getImageByUrl4().init(netUrl);
}
public void init(String netUrl){
getPage(netUrl);
while(imageUrl.size()!=0)
{
getImage(imageUrl.remove(0));
}
}
//獲取網(wǎng)頁信息line中的圖片url并加入到集合中
public void getImageUrl(String line,String netUrl){
//三種正則表達式
//其他網(wǎng)站的圖片,http開頭如:src = "http://www.ecoc.com:8080/pic/jfjiejf.jpg
//String searchImgReg = "(src|SRC|background|BACKGROUND|background-image|BACKGROUND-IMAGE)=('|\")(http://([\\w-]+\\.)+[\\w-]+(:[0-9]+)*(/+[\\w-]+)*(/+[\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))";
String searchImgReg = "(src|SRC|background|BACKGROUND|background-image|BACKGROUND-IMAGE)\\w{0,2}=('|\")http(s)*://.{1,}.(jpg|JPG|png|PNG|gif|GIF)\"\\s";
//項目中的圖片,絕對路徑如:src = "/ecoc/lala/jj/ooellaie.jpg
//String searchImgReg2 = "(src|SRC|background|BACKGROUND|background-image|BACKGROUND-IMAGE)\\w{0,2}=('|\")/*(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('\")";
String searchImgReg2 = "(src|SRC|background|BACKGROUND|background-image|BACKGROUND-IMAGE)\\w{0,2}=('|\")/*(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))\"\\s";
try {
Pattern pat = Pattern.compile(searchImgReg);
Matcher matcher=pat.matcher(line);
String str =null;
while(matcher.find())
{
str = matcher.group();
String []sttr = str.split(" ");System.out.println(str);
for(int i = 0;i
String s = sttr[i];
Integer index_denghao = s.indexOf("=")+2;
imageUrl.add(s.substring(index_denghao,s.length()-1));
}
}
pat = Pattern.compile(searchImgReg2);
matcher=pat.matcher(line);
while(matcher.find())
{
str = matcher.group(); System.out.println(str);
String []sttr = str.split(" ");
for(int i = 0;i
String s = sttr[i];System.out.println(s);
Integer index_denghao = s.indexOf("=")+2;
Integer index_2 = netUrl.indexOf("/", 8);
if(index_2==-1)
index_2 = netUrl.length();
imageUrl.add(netUrl.substring(0, index_2)+"/"+s.substring(index_denghao,s.length()-1));
}
}
} catch (Exception e) {
}
}
//爬取網(wǎng)頁中的信息。
public void getPage(String netUrl){
BufferedReader mybr = null;
try {
URL myurl = new URL(netUrl);
URLConnection myconn = myurl.openConnection();
InputStream myin = myconn.getInputStream();
mybr = new BufferedReader(new InputStreamReader(myin,"UTF-8"));
String line;
while((line = mybr.readLine())!= null)
{
getImageUrl(line,netUrl);//判斷網(wǎng)頁中的jpg圖片
}
} catch (MalformedURLException e) {
System.out.println("getPage url異常");
} catch (IOException e) {
System.out.println("url連接異常");
e.printStackTrace();
}finally {
if( mybr != null)
{
try {
mybr.close();
} catch (IOException e) {
System.out.println("讀入流關(guān)閉異常");
}
}
}
}
//下載該圖片!
public void getImage(String imageUrl){
InputStream myin = null;
BufferedOutputStream myos = null;
try {
File file = new File("H:\\pic\\");
File[] files = file.listFiles();
for (File file2 : files) {
Integer fileName = Integer.valueOf(file2.getName().substring(0, file2.getName().indexOf(".")));
if(count
count = fileName;
}
}
URL myurl = new URL(imageUrl);
URLConnection myconn = myurl.openConnection();
myin = myconn.getInputStream();
myos = new BufferedOutputStream(new FileOutputStream("H:\\pic\\"+(++count)+".jpg"));
byte[] buff = new byte[1024];
int num = 0;
while((num = myin.read(buff))!= -1)
{
myos.write(buff, 0, num);
myos.flush();
}
} catch (MalformedURLException e) {
System.out.println("getImage url異常");
e.printStackTrace();
} catch (IOException e) {
System.out.println("下載圖片url連接異常");
e.printStackTrace();
}
finally{
if( myin != null){
try {
myin.close();
} catch (IOException e) {
System.out.println("讀入流關(guān)閉異常");
}
}
if( myos != null){
try {
myos.close();
} catch (IOException e) {
System.out.println("輸出流關(guān)閉異常");
}
}
}
}
}
總結(jié)
以上是生活随笔為你收集整理的java抓取图片_java 抓取网页的图片的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 禅道启动mysql报错_测试工具之在Li
- 下一篇: java 用于xcopy复制_java调