python 解析excel模板_python 解析Excel
python 解析Excel
公司背景:好吧LZ太懶了.略...
原由起因:公司老板發話要導出公司數據庫中符合條件的數據,源數據有400萬,符合條件的大概有70萬左右吧.
最終目的:符合條件的數據并生成Excel
翠花,上代碼:
由于LZ python的底子并不是很好只會寫一些簡單的腳本,全當是記錄學習里程了。此次采用的是openpyxl,因為查到它支持Excel2010
# coding=utf-8
from openpyxl.workbook import Workbook
from openpyxl.writer.excel import ExcelWriter
from openpyxl.styles import Color, Fill
from openpyxl.cell import Cell
import datetime
from pymongo import MongoClient
import pymongo
import smtplib
from email.MIMEText import MIMEText
from email.MIMEMultipart import MIMEMultipart
from email.MIMEBase import MIMEBase
from email import Encoders
import time
mongoDB = MongoClient('beta-mongo01')#公司數據庫采用的是mongodb 別問我為什么LZ也不知道為毛不用關系型數據庫
db_name = 'core'
db = mongoDB[db_name]
rows = db.customerProfiles.find()
filters = ["理財","金融","證劵","咨詢","銀行","財務","信托","基金","期貨","租賃","投資","保險","會計","審計","投行","券商","股權","風險","財務","財富","資產"]#這是篩選條件 自己猜我們是干啥的吧...
def getfiltersByexperiences(experiences):#這是數據篩選
for i in filters:
if i.decode('utf-8') in experiences:
return True
def getExperieces(row):#篩選工作經歷
try:
experiences = ""
i = 0
while i < len(row["workExperiences"]):
experiences = experiences + row["workExperiences"][i]["position"] + row["workExperiences"][i]["organization"]
i =i +1
continue
return getfiltersByexperiences(experiences)
except Exception as e:
experiences = ""
return getfiltersByexperiences(experiences)
def getfiltersByexpect(expect):#另一個篩選
for i in filters:
if i.decode('utf-8') in expect:
return True
def getExpect(row):#這也是
try:
expect = ""
i = 0
while i < len(row["expect"]["expectIndustry"]):
expect = expect + row["expect"]["expectIndustry"][i]
i =i +1
continue
return getfiltersByexpect(expect)
except Exception as e:
expect = ""
return getfiltersByexpect(expect)
def getfullName(row):#這也是
try:
if row["fullName"] != "" and row["fullName"] != None:
return True
else:
return False
except Exception as e:
return False
def getEmail(row):#這也是
try:
if row["descriptions"]["contactEmail"] != "" and row["descriptions"]["contactEmail"] != None:
return True
else:
return False
except Exception as e:
return False
def getPhone(row):#這也是
try:
if row["descriptions"]["contactPhoneNumber"] != "" and row["descriptions"]["contactPhoneNumber"] != None:
return True
else:
return False
except Exception as e:
return False
now = datetime.datetime.now()
#新建一個workbook
wb = Workbook()
#第一個sheet是ws
ws = wb.worksheets[0]
#設置ws的名稱
ws.title = u"簡歷數據"
#給A1賦值
ws.cell('A1').value = '%s'%("編號")
ws.cell('B1').value = '%s'%("CustomerId")
ws.cell('C1').value = '%s'%("姓名")
ws.cell('D1').value = '%s'%("性別")
ws.cell('E1').value = '%s'%("所在地")
ws.cell('F1').value = '%s'%("郵箱")
ws.cell('G1').value = '%s'%("電話")
ws.cell('H1').value = '%s'%("曾經任職職位")
ws.cell('I1').value = '%s'%("曾經任職公司")
ws.cell('J1').value = '%s'%("期望行業")
ws.cell('K1').value = '%s'%("工作年份")
ws.cell('L1').value = '%s'%("簡歷更新時間")
ws.cell('M1').value = '%s'%("簡歷來源")
count = 2
for row in rows:#循環取數據
if getfullName(row) == True:
if getEmail(row) == True or getPhone(row) == True:
if getExperieces(row) == True or getExpect(row) == True:
count = count +1
if count >200002:
try:
position = ""
if len(row["workExperiences"]) == 0:
pass
else:
i = 0
while i < len(row["workExperiences"]):
position = position + row["workExperiences"][i]["position"] + "/"
i =i +1
continue
except Exception as e:
position = ""
try:
organization = ""
if len(row["workExperiences"]) == 0:
pass
else:
i = 0
while i < len(row["workExperiences"]):
organization = organization + row["workExperiences"][i]["organization"] + "/"
i =i +1
continue
except Exception as e:
organization = ""
try:
expectedIndustry = ""
if len(row["expect"]["expectedIndustry"]) == 0:
pass
else:
i = 0
while i < len(row["expect"]["expectIndustry"]):
expectedIndustry = expectedIndustry + row["expect"]["expectIndustry"][i]
i =i +1
continue
except Exception as e:
expectedIndustry = ""
try:#開始寫excel
ws.cell(str('A'+str(count))).value = '%s'%(str(count-1))
ws.cell(str('B'+str(count))).value = '%s'%(str(row.get("_id","")))
ws.cell(str('C'+str(count))).value = '%s'%(row.get("fullName",""))
ws.cell(str('D'+str(count))).value = '%s'%(row.get("gender",""))
ws.cell(str('E'+str(count))).value = '%s'%(row.get("descriptions","").get("city",""))
ws.cell(str('F'+str(count))).value = '%s'%(row.get("descriptions","").get("contactEmail",""))
ws.cell(str('G'+str(count))).value = '%s'%(row.get("descriptions","").get("contactPhoneNumber",""))
ws.cell(str('H'+str(count))).value = '%s'%(position)
ws.cell(str('I'+str(count))).value = '%s'%(organization)
ws.cell(str('J'+str(count))).value = '%s'%(expectedIndustry)
ws.cell(str('K'+str(count))).value = '%s'%(str(row.get("descriptions","").get("workLife","")) + "年")
ws.cell(str('L'+str(count))).value = '%s'%(str(row.get("updateTime","2015-05-05 00:00:00"))[0:11])
ws.cell(str('M'+str(count))).value = '%s'%(str(row.get("source","")))
except Exception as e:
count = count +1
continue
print(count)
if count == 500002:
break
else:
continue
else:
continue
else:
continue
else:
continue
##修改某一列寬度
ws.column_dimensions["A"].width =10.0
ws.column_dimensions["B"].width =25.0
ws.column_dimensions["C"].width =10.0
ws.column_dimensions["D"].width =10.0
ws.column_dimensions["E"].width =15.0
ws.column_dimensions["F"].width =20.0
ws.column_dimensions["G"].width =15.0
ws.column_dimensions["H"].width =25.0
ws.column_dimensions["I"].width =35.0
ws.column_dimensions["J"].width =35.0
ws.column_dimensions["K"].width =15.0
ws.column_dimensions["L"].width =15.0
ws.column_dimensions["M"].width =10.0
#文件Name
file_name = str(now.strftime("%Y%m%d")) + "(2).xlsx"
#文件存放地址
file_dir = '/usr/src/Python'
##保存生成xlsx
wb.save(filename = str(file_name))
ew = ExcelWriter(workbook = wb)
還有一些從網上找的記錄下來 各種方法...
python讀取excel文件代碼:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 讀取excel數據
# 小羅的需求,取第二行以下的數據,然后取每行前13列的數據
import xlrd
data = xlrd.open_workbook('test.xls') # 打開xls文件
table = data.sheets()[0] # 打開第一張表
nrows = table.nrows # 獲取表的行數
for i in range(nrows): # 循環逐行打印
if i == 0: # 跳過第一行
continue
print table.row_values(i)[:13] # 取前十三列
使用xlrd讀取文件,使用xlwt生成Excel文件(可以控制Excel中單元格的格式)。但是用xlrd讀取excel是不能對其進行操作的;而 xlwt生成excel文件是不能在已有的excel文件基礎上進行修改的,如需要修改文件就要使用xluntils模塊。pyExcelerator模 塊與xlwt類似,也可以用來生成excel文件。
#coding=utf-8
#######################################################
#filename:test_xlrd.py
#author:defias
#date:xxxx-xx-xx
#function:讀excel文件中的數據
#######################################################
import xlrd
#打開一個workbook
workbook = xlrd.open_workbook('E:\\Code\\Python\\testdata.xls')
#抓取所有sheet頁的名稱
worksheets = workbook.sheet_names()
print('worksheets is %s' %worksheets)
#定位到sheet1
worksheet1 = workbook.sheet_by_name(u'Sheet1')
"""
#通過索引順序獲取
worksheet1 = workbook.sheets()[0]
#或
worksheet1 = workbook.sheet_by_index(0)
"""
"""
#遍歷所有sheet對象
for worksheet_name in worksheets:
worksheet = workbook.sheet_by_name(worksheet_name)
"""
#遍歷sheet1中所有行row
num_rows = worksheet1.nrows
for curr_row in range(num_rows):
row = worksheet1.row_values(curr_row)
print('row%s is %s' %(curr_row,row))
#遍歷sheet1中所有列col
num_cols = worksheet1.ncols
for curr_col in range(num_cols):
col = worksheet1.col_values(curr_col)
print('col%s is %s' %(curr_col,col))
#遍歷sheet1中所有單元格cell
for rown in range(num_rows):
for coln in range(num_cols):
cell = worksheet1.cell_value(rown,coln)
print cell
"""
#其他寫法:
cell = worksheet1.cell(rown,coln).value
print cell
#或
cell = worksheet1.row(rown)[coln].value
print cell
#或
cell = worksheet1.col(coln)[rown].value
print cell
#獲取單元格中值的類型,類型 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error
cell_type = worksheet1.cell_type(rown,coln)
print cell_type
"""
#coding=utf-8
#######################################################
#filename:test_xlwt.py
#author:defias
#date:xxxx-xx-xx
#function:新建excel文件并寫入數據
#######################################################
import xlwt
#創建workbook和sheet對象
workbook = xlwt.Workbook() #注意Workbook的開頭W要大寫
sheet1 = workbook.add_sheet('sheet1',cell_overwrite_ok=True)
sheet2 = workbook.add_sheet('sheet2',cell_overwrite_ok=True)
#向sheet頁中寫入數據
sheet1.write(0,0,'this should overwrite1')
sheet1.write(0,1,'aaaaaaaaaaaa')
sheet2.write(0,0,'this should overwrite2')
sheet2.write(1,2,'bbbbbbbbbbbbb')
"""
#-----------使用樣式-----------------------------------
#初始化樣式
style = xlwt.XFStyle()
#為樣式創建字體
font = xlwt.Font()
font.name = 'Times New Roman'
font.bold = True
#設置樣式的字體
style.font = font
#使用樣式
sheet.write(0,1,'some bold Times text',style)
"""
#保存該excel文件,有同名文件時直接覆蓋
workbook.save('E:\\Code\\Python\\test2.xls')
print '創建excel文件完成!'
#coding=utf-8
#######################################################
#filename:test_xlutils.py
#author:defias
#date:xxxx-xx-xx
#function:向excel文件中寫入數據
#######################################################
import xlrd
import xlutils.copy
#打開一個workbook
rb = xlrd.open_workbook('E:\\Code\\Python\\test1.xls')
wb = xlutils.copy.copy(rb)
#獲取sheet對象,通過sheet_by_index()獲取的sheet對象沒有write()方法
ws = wb.get_sheet(0)
#寫入數據
ws.write(1, 1, 'changed!')
#添加sheet頁
wb.add_sheet('sheetnnn2',cell_overwrite_ok=True)
#利用保存時同名覆蓋達到修改excel文件的目的,注意未被修改的內容保持不變
wb.save('E:\\Code\\Python\\test1.xls')
#coding=utf-8
#######################################################
#filename:test_pyExcelerator_read.py
#author:defias
#date:xxxx-xx-xx
#function:讀excel文件中的數據
#######################################################
import pyExcelerator
#parse_xls返回一個列表,每項都是一個sheet頁的數據。
#每項是一個二元組(表名,單元格數據)。其中單元格數據為一個字典,鍵值就是單元格的索引(i,j)。如果某個單元格無數據,那么就不存在這個值
sheets = pyExcelerator.parse_xls('E:\\Code\\Python\\testdata.xls')
print sheets
#coding=utf-8
#######################################################
#filename:test_pyExcelerator.py
#author:defias
#date:xxxx-xx-xx
#function:新建excel文件并寫入數據
#######################################################
import pyExcelerator
#創建workbook和sheet對象
wb = pyExcelerator.Workbook()
ws = wb.add_sheet(u'第一頁')
#設置樣式
myfont = pyExcelerator.Font()
myfont.name = u'Times New Roman'
myfont.bold = True
mystyle = pyExcelerator.XFStyle()
mystyle.font = myfont
#寫入數據,使用樣式
ws.write(0,0,u'ni hao 帕索!',mystyle)
#保存該excel文件,有同名文件時直接覆蓋
wb.save('E:\\Code\\Python\\mini.xls')
print '創建excel文件完成!'
總結
以上是生活随笔為你收集整理的python 解析excel模板_python 解析Excel的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: @EnableConfiguration
- 下一篇: 一文总结买卖股票的最佳时机的所有情况(附