一、openpyxl介绍
目前有许多常见的Excel 操作的库,例如:xlrd,xlwt,甚至Pandas中也自带了相应的read_excel,to_excel方法。但各有优缺点。openpyxl能够对单元格进行操作,且不改变其他sheet和cell的值。
对于Excel的操作主要是读和写。中间对于数据的处理推荐使用Pandas。
主要应用的场景:从数据库或者其他Excel中取数,经过简单的处理后(Pandas、Numpy等),将其写入特定的Excel文件,该特定的Excel文件中事先已写好相应的公式、规则、图表等,能够快速生成可用的分析报表数据。
二、openpyxl读取数据
一个Excel文件打开后就是一个工作簿workbook;
一个workbook中有若干张表单worksheet;
一个worksheet有若干行row,若干列column,若干个单元格cell;
2.1.定位到workbook
2.1.1.打开
wb = openpyxl.load_workbook(‘excelName’)
2.1.2.获取表单
#返回str
wb.sheetnames
or
for sheet in wb:
sheet.title #返回str
2.1.3.创建表单
#在现有表单的最后插入一张新的表单
sheet = wb.create_sheet(‘sheetname’)
2.2.定位到worksheet
sheet = wb.get_sheet_by_name(‘sheetname’)
or
sheet = wb[‘sheetname’]
or
sheet = wb.active #当前活跃的表单
2.3.定位到cell
2.3.1.单个单元格
c = sheet[‘A1’] #定位到sheet表单的A1单元格
c = sheet.cell(row=1, column=3) #定位cell,行列都从1开始
c.value #获取单元格的值,根据值返回类型
c.row #获取行号,返回int
c.column #获取列号,返回int
c.coordinate #获取行列号,返回str,C1
2.3.2.整行/整列
#获取第C列,返回有序元组(cell, )
col = sheet[‘C’]
#获取第5行,返回同上
row = sheet[5]
#获取B列到C列的单元格,返回((B列有序cell),(C列有序cell),)
col_range = sheet[‘B:C’]
#获取2到6行(包括第6行)的单元格,返回类似上,空单元格返回None
row_range = sheet[2:6]
2.3.3.区域
cell_range = sheet[‘A1:B6’] #获取某一区域,返回先行后列的元组
or
#通过生成器的方式获取某一区域的cell
for row in sheet.iter_rows(min_row=1, max_row=6,
min_col=3, max_col=5): #先行
for cell in row: #后列
cell.value
2.3.4.行列
int = sheet.max_row #行数
int = sheet.max_column #列数
from openpyxl.utils import (
get_column_letter,
column_index_from_string
)
str = get_column_letter(45) #第45列对应的字母
int = column_index_from_string(‘AAS’) #第AAS列对应的数字
三、实例openpyxl转Pandas
import openpyxl
import pandas as pd
def openpyxl_to_pandas(data, sheet_index=None, header=None):
"""
convert data from openpyxl to DataFrame in Pandas
usage:
load_workbook
convert
pd.
pd.
...
:param data: data from openpyxl, including workbook, worksheet and cell
Workbook: return active sheet value
Worksheet: return sheet value
Cell: return cell value
:param sheet_index: int, None; which sheet in workbook
:param header: int, None, default None
Row (0-indexed) to use for the column labels of the parsed DataFrame.
If a list of integers is passed those row positions will be combined
into a MultiIndex. Use None if there is no header.
:return: DataFrame
"""
_df = None
title = None
# deal workbook
if isinstance(data, openpyxl.workbook.workbook.Workbook):
if sheet_index is None:
# active sheet not only 1st sheet
_df = pd.DataFrame(data.active.values)
title = [c.value for c in data.active[1]]
if isinstance(sheet_index, int):
name = data.get_sheet_names()[sheet_index]
sh = data.get_sheet_by_name(name)
_df = pd.DataFrame(sh.values)
title = [c.value for c in sh[1]]
# deal worksheet
if isinstance(data, openpyxl.workbook.workbook.Worksheet):
# 1.generate dataframe
tmp = data.values
_df = pd.DataFrame(tmp)
title = [c.value for c in data[1]]
# deal cell
if isinstance(data, tuple):
tmp = []
for row in data:
tmp.append([c.value for c in row])
_df = pd.DataFrame(tmp)
title = [c.value for c in data[0]]
# deal header
if header is None:
return df
if header == 0:
_df = _df.drop([0], axis=0)
_df.columns = title
_df = _df.reset_index(drop=True)
return df
if header > 0 and isinstance(header, int):
title = [c.value for c in data[header + 1]]
_df = _df.drop([i for i in range(0, header)], axis=0)
_df.columns = title
_df = _df.reset_index(drop=True)
return _df
四、openpyxl写入数据
4.1.创建工作薄
wb = openpyxl.Workbook() #新建Workbook
4.2.表单
#指定位置和表单名,若不指定index,则放在最后
wb.create_sheet(index=0, title=‘haha’)
#通过表单名来删除
wb.remove_sheet(wb.get_sheet_by_name(‘name’))
4.3.写数据
4.3.1.单个值
sheet[‘A1’] = 值 #可str,数值
or
sheet.cell(row=1, column=3, value=值) #写入row行column列中
or
sheet.cell(row=1, column=3).value = 值
4.3.2.整行
sheet.append(list) #整行,默认从A列,已有数据的下一行开始写入
4.3.3.列表
l = [[1,2],[3,4]]
for row in l:
sheet.append(row) #转换为按行写入
4.4.保存
wb.save(filename) #保存,必传参,不会覆盖未修改的内容
五、实例:Pandas转openpyxl
import openpyxl
import pandas as pd
import copy
def pandas_to_openpyxl(df, whichExcel=None, whichSheet=None,
startRow=1, startCol=1,
index=False, header=True):
"""
convert pandas data to openpyxl and write to excel
usage:
load_workbook
convert(df, wb, 'sheet')
convert(df, whichSheet=ws)
...
save
:param df: raw data
:param whichExcel:
openpyxl.workbook.workbook.Workbook or None
target excel
:param whichSheet:
openpyxl.workbook.workbook.Worksheet or str
target sheet
:param startRow: start with 1
:param startCol: start with 1 or A
:param index: true means save pandas' dataframe index
:param header: true means save pandas' dataframe header
:return:
"""
# 1.get target sheet
_sh = None
if whichExcel is None:
if isinstance(whichSheet, openpyxl.workbook.workbook.Worksheet):
_sh = whichSheet
else:
return 'ERROR: please input openpyxl.workbook.workbook.Worksheet'
elif isinstance(whichExcel, openpyxl.workbook.workbook.Workbook):
if isinstance(whichSheet, str):
_sh = whichExcel[whichSheet]
else:
return 'ERROR: please input str sheet name'
else:
return 'ERROR: please input openpyxl.workbook.workbook.Workbook'
# 2.reset row and col
srow = startRow
scol = None
if isinstance(startCol, int):
scol = startCol
elif isinstance(startCol, str):
from openpyxl.utils import column_index_from_string
scol = column_index_from_string(startCol)
# 3.backup df
data = df.copy()
# 4.write index and header
if index and header:
ind = data.index
hea = data.columns
row = srow + 1
for i in ind:
_sh.cell(row=row, column=scol, value=i)
row = row + 1
col = scol + 1
for h in hea:
_sh.cell(row=srow, column=col, value=h).style='Pandas'
col = col + 1
srow = srow + 1
scol = scol + 1
elif index:
ind = data.index
row = copy.copy(srow)
for i in ind:
_sh.cell(row=row, column=scol, value=i)
row = row + 1
scol = scol + 1
elif header:
hea = data.columns
col = copy.copy(scol)
for h in hea:
_sh.cell(row=srow, column=col, value=h).style='Pandas'
col = col + 1
srow = srow + 1
# 5.write data to excel
tmp = data.values.tolist()
for r in tmp:
# write data
col = copy.copy(scol)
for c in r:
_sh.cell(row=srow, column=col, value=c)
col = col + 1
srow = srow + 1
return 'success'
六、openpyxl格式
可参考官方文档
七、实例:清除sheet中的数据
import openpyxl
def clear_sheet(whichExcel=None, whichSheet=None, data_only=True):
"""
clear data from sheet
:param whichExcel:
openpyxl.workbook.workbook.Workbook or None
target excel
:param whichSheet:
openpyxl.workbook.workbook.Worksheet or str
target sheet
:param data_only: true means only clear data
:return: status
"""
_sh = None
if whichExcel is None:
if isinstance(whichSheet, openpyxl.workbook.workbook.Worksheet):
_sh = whichSheet
else:
return 'ERROR: please input openpyxl.workbook.workbook.Worksheet'
elif isinstance(whichExcel, openpyxl.workbook.workbook.Workbook):
if isinstance(whichSheet, str):
_sh = whichExcel[whichSheet]
else:
return 'ERROR: please input str sheet name'
else:
return 'ERROR: please input openpyxl.workbook.workbook.Workbook'
from openpyxl.styles import Border, Font
if data_only:
for r in _sh.iter_rows():
for c in r:
c.border = Border() # delete border
c.value = ''
else:
for r in _sh.iter_rows():
for c in r:
c.border = Border() # delete border
c.font = Font('宋体')
c.value = ''
python培训:http://www.baizhiedu.com/python2019