當前位置：首頁 > 编程语言 > python >内容正文

python

python能爬视频吗_Python爬取视频(其实是一篇福利)

發布時間：2025/3/19 python 25 豆豆

生活随笔收集整理的這篇文章主要介紹了 python能爬视频吗_Python爬取视频(其实是一篇福利) 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

先找一個視頻地址試驗一下：

# -*- coding: utf-8 -*-

import requests

def download_file(url, path):

with requests.get(url, stream=True) as r:

chunk_size = 1024

content_size = int(r.headers['content-length'])

print '下載開始'

with open(path, "wb") as f:

for chunk in r.iter_content(chunk_size=chunk_size):

f.write(chunk)

if __name__ == '__main__':

url = '就在原帖...'

path = '想存哪都行'

download_file(url, path)

遭遇當頭一棒：

AttributeError: __exit__

這文檔也會騙人的么！

看樣子是沒有實現上下文需要的__exit__方法。既然只是為了保證要讓r最后close以釋放連接池，那就使用contextlib的closing特性好了：

# -*- coding: utf-8 -*-

import requests

from contextlib import closing

def download_file(url, path):

with closing(requests.get(url, stream=True)) as r:

chunk_size = 1024

content_size = int(r.headers['content-length'])

print '下載開始'

with open(path, "wb") as f:

for chunk in r.iter_content(chunk_size=chunk_size):

f.write(chunk)

程序正常運行了，不過我盯著這文件，怎么大小不見變啊，到底是完成了多少了呢？還是要讓下好的內容及時存進硬盤，還能省點內存是不是：

# -*- coding: utf-8 -*-

import requests

from contextlib import closing

import os

def download_file(url, path):

with closing(requests.get(url, stream=True)) as r:

chunk_size = 1024

content_size = int(r.headers['content-length'])

print '下載開始'

with open(path, "wb") as f:

for chunk in r.iter_content(chunk_size=chunk_size):

f.write(chunk)

f.flush()

os.fsync(f.fileno())

文件以肉眼可見的速度在增大，真心疼我的硬盤，還是最后一次寫入硬盤吧，程序中記個數就好了：

def download_file(url, path):

with closing(requests.get(url, stream=True)) as r:

chunk_size = 1024

content_size = int(r.headers['content-length'])

print '下載開始'

with open(path, "wb") as f:

n = 1

for chunk in r.iter_content(chunk_size=chunk_size):

loaded = n*1024.0/content_size

f.write(chunk)

print '已下載{0:%}'.format(loaded)

n += 1

結果就很直觀了：

已下載2.579129%

已下載2.581255%

已下載2.583382%

已下載2.585508%

心懷遠大理想的我怎么會只滿足于這一個呢，寫個類一起使用吧：

# -*- coding: utf-8 -*-

import requests

from contextlib import closing

import time

def download_file(url, path):

with closing(requests.get(url, stream=True)) as r:

chunk_size = 1024*10

content_size = int(r.headers['content-length'])

print '下載開始'

with open(path, "wb") as f:

p = ProgressData(size = content_size, unit='Kb', block=chunk_size)

for chunk in r.iter_content(chunk_size=chunk_size):

f.write(chunk)

p.output()

class ProgressData(object):

def __init__(self, block,size, unit, file_name='', ):

self.file_name = file_name

self.block = block/1000.0

self.size = size/1000.0

self.unit = unit

self.count = 0

self.start = time.time()

def output(self):

self.end = time.time()

self.count += 1

speed = self.block/(self.end-self.start) if (self.end-self.start)>0 else 0

self.start = time.time()

loaded = self.count*self.block

progress = round(loaded/self.size, 4)

if loaded >= self.size:

print u'%s下載完成\r\n'%self.file_name

else:

print u'{0}下載進度{1:.2f}{2}/{3:.2f}{4} 下載速度{5:.2%} {6:.2f}{7}/s'.\

format(self.file_name, loaded, self.unit,\

self.size, self.unit, progress, speed, self.unit)

print '%50s'%('/'*int((1-progress)*50))

運行：

下載開始

下載進度10.24Kb/120174.05Kb 0.01% 下載速度4.75Kb/s

下載進度20.48Kb/120174.05Kb 0.02% 下載速度32.93Kb/s

看上去舒服多了。

下面要做的就是多線程同時下載了，主線程生產url放入隊列，下載線程獲取url：

import requests

from contextlib import closing

import time

import Queue

import hashlib

import threading

import os

def download_file(url, path):

with closing(requests.get(url, stream=True)) as r:

chunk_size = 1024*10

content_size = int(r.headers['content-length'])

if os.path.exists(path) and os.path.getsize(path)>=content_size:

print '已下載'

return

print '下載開始'

with open(path, "wb") as f:

p = ProgressData(size = content_size, unit='Kb', block=chunk_size, file_name=path)

for chunk in r.iter_content(chunk_size=chunk_size):

f.write(chunk)

p.output()

class ProgressData(object):

def __init__(self, block,size, unit, file_name='', ):

self.file_name = file_name

self.block = block/1000.0

self.size = size/1000.0

self.unit = unit

self.count = 0

self.start = time.time()

def output(self):

self.end = time.time()

self.count += 1

speed = self.block/(self.end-self.start) if (self.end-self.start)>0 else 0

self.start = time.time()

loaded = self.count*self.block

progress = round(loaded/self.size, 4)

if loaded >= self.size:

print u'%s下載完成\r\n'%self.file_name

else:

print u'{0}下載進度{1:.2f}{2}/{3:.2f}{4} {5:.2%} 下載速度{6:.2f}{7}/s'.\

format(self.file_name, loaded, self.unit,\

self.size, self.unit, progress, speed, self.unit)

print '%50s'%('/'*int((1-progress)*50))

queue = Queue.Queue()

def run():

while True:

url = queue.get(timeout=100)

if url is None:

print u'全下完啦'

break

h = hashlib.md5()

h.update(url)

name = h.hexdigest()

path = 'e:/download/' + name + '.mp4'

download_file(url, path)

def get_url():

queue.put(None)

if __name__ == '__main__':

get_url()

for i in xrange(4):

t = threading.Thread(target=run)

t.daemon = True

t.start()

與50位技術專家面對面20年技術見證，附贈技術全景圖

總結

以上是生活随笔為你收集整理的python能爬视频吗_Python爬取视频(其实是一篇福利)的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： redistemplate 设置永不过期
下一篇：同一个事务里面对同一条数据做2次修改_要