當前位置：首頁 > 编程语言 > python >内容正文

python

python LAADS+Selenium下载MODIS数据

發布時間：2024/8/26 python 48 豆豆

生活随笔收集整理的這篇文章主要介紹了 python LAADS+Selenium下载MODIS数据小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

from selenium import webdriver

from time import sleep

import tempfile

import os,sys

import pandas as pd

import geopandas as gpd

import time

#?構建查詢地址

def GetURL(ProductID,StartTime,EndTime,search_file):

?????#?查詢邊界

?????data = gpd.GeoDataFrame.from_file(search_file)

?????bbox = (data.bounds.values)[0].tolist()

?????#?研究區范圍，左上角和右下角。根據需要構造字符串

?????Area = str(round(bbox[0],1))+','+str(round(bbox[3],1))+','+str(round(bbox[2],1))+','+str(round(bbox[1],1))

?????#?輸入?MODIS?軌道矢量

?????modis_grid_file = 'E:\***\modis_WGS84_grid_world.shp'

?????modis_grid = gpd.GeoDataFrame.from_file(modis_grid_file)

?????#?查詢邊界覆蓋的軌道中心坐標

?????modis_intersection = modis_grid[modis_grid.intersects(data.geometry[0])]

?????path_row = 'Tile:'

?????for mv in modis_intersection.values:

?????????path_row += "H"+str(mv[1])+"V"+str(mv[2])+","

?????#?根據以上信息構建?Search?頁的網址

?????path_row = path_row[0:-1]

url='

?????return url

#?使用?Selenium?查詢影像

def SearchFileList(url):

?????#?創建文件夾，命名規則為程序運行的時刻

?????#?將使用?selenium?下載的文件使用該文件夾存儲

?????csvdir = 'E:\\***\\' + str(time.time()).replace('.','')

?????os.mkdir(csvdir)

?????#?配置?selenium?的參數

?????options = webdriver.ChromeOptions()

?????prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': csvdir}

?????options.add_experimental_option('prefs', prefs)

?????chromedriver = r"C:\***\Google\Chrome\Application\chromedriver.exe"#chromedriver.exe?的本地路徑

?????# options.add_argument('--headless') ?#?有無瀏覽器界面模式，根據需要設置

?????driver = webdriver.Chrome(executable_path=chromedriver,options=options)

?????#?自動打開?Search?頁

?????driver.get(url)

?????#?瀏覽器打開?Search?頁后，外匯跟單gendan5.com還要留足時間讓服務器進行數據檢索

?????#?所以這里?sleep50?秒，可以根據網速自行設定

?????sleep(50)

?????#?當然也可以判斷搜索結果，也就是包含?csv?的標簽是否出現

?????# WebDriverWait(driver,

?????#?下載?csv?文件

?????#?找到文本包含?csv?的標簽

?????# csvElement = driver.find_element_by_link_text('csv')

?????csvElement = driver.find_element_by_xpath('// *[ @ id = "tab4download"] / a[2]')

?????#?點擊下載

?????csvElement.click()

?????#?留下下載?csv?文件的時間

?????sleep(20)

?????#?關閉瀏覽器

?????driver.quit()

?????return csvdir

#?下載影像

def MODISDown(FileDir):

?????#?獲取下載的?csv?文件的文件名

?????csvfilename = os.listdir(FileDir)[0]

?????#?構造文件路徑

?????csvfilepath = os.path.join(FileDir, csvfilename)

?????# print(csvfilepath)

?????csvvalues = pd.read_csv(csvfilepath).values

?????os.remove(csvfilepath)

?????os.rmdir(FileDir)

?????#?下載數據

?????file_count = 0

?????for cv in csvvalues:

?????????file_count += 1

?????????#?構建數據的下載鏈接

?????????modislink='[1]

?????????outdir = 'E:/***/MODIS/'+(cv[1].split("/"))[5]

?????????# outdir = 'E:/Temp/' + (cv[1].split("/"))[5]

?????????if not os.path.isdir(outdir):

?????????????os.mkdir(outdir)

?????????path = outdir + '/' + (cv[1].split("/"))[7]

?????????if not os.path.exists(path):

?????????????print("({0}/{1}) Downloading {2}".format(file_count, len(csvvalues), modislink.split("/")[-1]))

?????????????with open(path, 'w+b') as out:

?????????????????geturl(modislink, out)

#?獲取下載鏈接并下載影像數據

def geturl(url,out=None):

?????USERAGENT = 'tis/download.py_1.0--' + sys.version.replace('\n', '').replace('\r', '')

?????headers = { 'user-agent' : USERAGENT }

?????token = '******' #?你的?token,?可登陸?Earthdata?網站后在?profile?中得到

?????headers['Authorization'] = 'Bearer ' + token

?????try:

?????????import ssl

?????????CTX = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)

?????????from urllib.request import urlopen, Request, URLError, HTTPError

?????????try:

?????????????response = urlopen(Request(url, headers=headers), context=CTX)

?????????????if out is None:

?????????????????return response.read().decode('utf-8')

?????????????else:

?????????????????start = time.time()

?????????????????#?將連接中的下載文件寫入臨時文件并返回文件寫入進度

?????????????????chunk_read(response, out, report_hook=chunk_report)

?????????????????elapsed = max(time.time() - start,1.0)

?????????????????#?平均下載速度

?????????????????rate = (get_total_size(response) / 1024 ** 2) / elapsed

?????????????????print("Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec".format(get_total_size(response), elapsed, rate))

?????????????????# shutil.copyfileobj(response, out)

?????????except HTTPError as e:

?????????????print('HTTP GET error code: %d' % e.code(), file=sys.stderr)

?????????????print('HTTP GET error message: %s' % e.message, file=sys.stderr)

?????????except URLError as e:

?????????????print('Failed to make request: %s' % e.reason, file=sys.stderr)

?????????return None

?????except AttributeError:

?????????# OS X Python 2 and 3 don't support tlsv1.1+ therefore... curl

?????????import subprocess

?????????try:

?????????????args = ['curl', '--fail', '-sS', '-L', '--get', url]

?????????????for (k,v) in headers.items():

?????????????????args.extend(['-H', ': '.join([k, v])])

?????????????if out is None:

?????????????????# python3's subprocess.check_output returns stdout as a byte string

?????????????????result = subprocess.check_output(args)

?????????????????return result.decode('utf-8') if isinstance(result, bytes) else result

?????????????else:

?????????????????subprocess.call(args, stdout=out)

?????????except subprocess.CalledProcessError as e:

?????????????print('curl GET error message: %' + (e.message if hasattr(e, 'message') else e.output), file=sys.stderr)

?????????return None

# ?chunk_read modified from

def chunk_read( response, local_file, chunk_size=10240, report_hook=None):

?????#?完整文件大小

?????file_size = get_total_size(response)

?????#?下載文件大小

?????bytes_so_far = 0

?????#?文件寫入本地

?????while 1:

?????????try:

?????????????#?從地址中讀取固定大小文件對象

?????????????chunk = response.read(chunk_size)

?????????except:

?????????????sys.stdout.write("\n > There was an error reading data. \n")

?????????????break

?????????try:

?????????????#?將讀取出的文件對象寫入本地文件

?????????????local_file.write(chunk)

?????????except TypeError:

?????????????local_file.write(chunk.decode(local_file.encoding))

?????????#?寫入完成即更新已下載文件大小

?????????bytes_so_far += len(chunk)

?????????if not chunk:

?????????????break

?????????if report_hook:

?????????????#?獲取下載進度

?????????????report_hook(bytes_so_far, file_size)

?????return bytes_so_far

def chunk_report( bytes_so_far, file_size):

?????if file_size is not None:

?????????#?計算下載進度百分比

?????????percent = float(bytes_so_far) / file_size

?????????percent = round(percent * 100, 2)

?????????sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" %

??????????????????????????(bytes_so_far, file_size, percent))

?????else:

?????????# We couldn't figure out the size.

?????????sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far))

def get_total_size(response):

????try:

???????file_size = response.info().getheader('Content-Length').strip()

????except AttributeError:

???????try:

??????????file_size = response.getheader('Content-Length').strip()

???????except AttributeError:

??????????print ("> Problem getting size")

??????????return None

????return int(file_size)

if __name__ == "__main__":

?????#?定義要下載數據的信息

?????ProductID = 'MOD021KM--61/' ?#?產品號?#sys.argv[1]#

?????#?設置數據的起始和截至時間。其實就是根據需要構造一個簡單的字符串

?????StartTime = '2020-06-01' ?#?開始時間?#sys.argv[2]#

?????EndTime = '2020-06-03' ?#?截至日期?#sys.argv[3]#

?????search_file = r'E:\***\?北京市?.shp' ?#?查詢范圍?#sys.argv[4]#

?????#?構建查詢地址

?????url = GetURL(ProductID,StartTime,EndTime,search_file)

?????#?獲取數據列表

?????csvdir = SearchFileList(url)

?????#?根據列表下載數據

?????MODISDown(csvdir)

總結

以上是生活随笔為你收集整理的python LAADS+Selenium下载MODIS数据的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： MySQL中的pid与socket是什么
下一篇： websocket python爬虫_p