當前位置：首頁 > 编程语言 > python >内容正文

python

python多线程下载编程_python多线程下载图片

發布時間：2024/9/3 python 87 豆豆

生活随笔收集整理的這篇文章主要介紹了 python多线程下载编程_python多线程下载图片小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

功能：從p_w_picpath.baidu.com自動翻頁下載圖片的python程序

用法：運行程序后，輸入關鍵字即可

#!/usr/bin/python

# filename: getbaidupic.py

# description: get p_w_picpaths from p_w_picpath.baidu.com

# author: cjcse

# version: v 0.21

import urllib

import htmllib

import formatter

import string

import os

import sys

import time

import thread

#import threading

class Parser(htmllib.HTMLParser):

#return a dictionary mapping anchor texts to lists of associated hyperlinks

def __init__(self, verbose=0):

self.anchors = {}

f = formatter.NullFormatter()

htmllib.HTMLParser.__init__(self, f, verbose)

def anchor_bgn(self, href, name, type):

self.save_bgn()

self.anchor = href

def anchor_end(self):

text = string.strip(self.save_end())

if self.anchor and text:

self.anchors[text] = self.anchors.get(text, []) + [self.anchor]

def GetJpg(url):

try:

global save

global total

global successed

global failed

total += 1

seps = url.split("/")

size = len(seps)

name = seps[size-1]

name = save + "\\" + name

i = 1

list = name.split(".")

while os.path.exists(name):

if len(list) == 2:

name = list[0] + "_" + repr(i) + "." + list[1]

else:

name = list[0] + "_" + repr(i)

i += 1

dat = urllib.urlopen(url).read()

if len(dat) < 11024:

print url + "\t[Failed]"

return

op = open(name, "wb")

if not op:

print url + "\t[Failed]"

exit()

op.write(dat)

op.close()

print url + "\t[OK]"

except:

print url + "\t[Failed]"

def GetBaiduNextPage(url):

global pn

url += "&rn=" + repr(rn) + "&pn=" + repr(pn) + "&ln=" + repr(ln)

pn += 18

return url

def GetAllJpg(url):

html = urllib.urlopen(url).read()

p = Parser()

p.feed(html)

p.close()

cnt = 0

for k, v in p.anchors.items():

for uri in v:

if uri.find(".jpg") != -1:

ls = uri.split("&")

for st in ls:

url2 = st.split("=")

for st2 in url2:

st2 = string.lower(st2)

if string.find(st2, "http://") != -1 and string.find(st2, ".jpg") != -1:

try:

GetJpg(st2)

except:

continue

print "---------------------------------------------------------------------"

print "Description: Get p_w_picpaths from p_w_picpath.baidu.com. "

print "Author: cjcse from CU."

print "version: v 0.2."

print "---------------------------------------------------------------------"

str = raw_input("Input your keywords: ")

while (len(str) == 0):

str = raw_input("Keyword: ")

url = "http://p_w_picpath.baidu.com/i?ct=201326592&cl=2&lm=-1&tn=baidup_w_picpath&pv=&word=" + str + "&z=5"

try:

if not os.path.exists("c:\\p_w_picpath_baidu"):

os.mkdir("c:\\p_w_picpath_baidu")

except:

print "Failed to create directory in disk c:"

exit()

pages = 50

save = "c:\\p_w_picpath_baidu"

print "The p_w_picpaths will be stored in folder \"c:\\p_w_picpath_baidu\"."

rn = 21

pn = 18

ln = 2000

for i in range(0, pages):

thread.start_new_thread(GetAllJpg,(url,))

url = GetBaiduNextPage(url)

while True:

pass

本文參與騰訊云自媒體分享計劃，歡迎正在閱讀的你也加入，一起分享。

與50位技術專家面對面20年技術見證，附贈技術全景圖

總結

以上是生活随笔為你收集整理的python多线程下载编程_python多线程下载图片的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：四步相移法怎么获得相位信息_如何拥有超强
下一篇： pythonrandom模块_pytho