400-616-5551

您所在位置: 首页> 学习课程> python培训 | Python之Excel操作

python培训 | Python之Excel操作

发布百知教育 来源:学习课程 2019-09-27

一、openpyxl介绍


目前有许多常见的Excel 操作的库,例如:xlrd,xlwt,甚至Pandas中也自带了相应的read_excel,to_excel方法。但各有优缺点。openpyxl能够对单元格进行操作,且不改变其他sheet和cell的值。

 

对于Excel的操作主要是读和写。中间对于数据的处理推荐使用Pandas。


主要应用的场景:从数据库或者其他Excel中取数,经过简单的处理后(Pandas、Numpy等),将其写入特定的Excel文件,该特定的Excel文件中事先已写好相应的公式、规则、图表等,能够快速生成可用的分析报表数据。



二、openpyxl读取数据


一个Excel文件打开后就是一个工作簿workbook;

一个workbook中有若干张表单worksheet;

一个worksheet有若干行row,若干列column,若干个单元格cell;


2.1.定位到workbook

 

2.1.1.打开

wb = openpyxl.load_workbook(‘excelName’)


2.1.2.获取表单

#返回str

wb.sheetnames 

or

for sheet in wb:

    sheet.title   #返回str


2.1.3.创建表单

#在现有表单的最后插入一张新的表单

sheet = wb.create_sheet(‘sheetname’)


2.2.定位到worksheet

sheet = wb.get_sheet_by_name(‘sheetname’)

or

sheet = wb[‘sheetname’]

or

sheet = wb.active   #当前活跃的表单


2.3.定位到cell


2.3.1.单个单元格

c = sheet[‘A1’]   #定位到sheet表单的A1单元格

c = sheet.cell(row=1, column=3)   #定位cell,行列都从1开始

c.value   #获取单元格的值,根据值返回类型

c.row   #获取行号,返回int

c.column   #获取列号,返回int

c.coordinate   #获取行列号,返回str,C1


2.3.2.整行/整列

#获取第C列,返回有序元组(cell, )

col = sheet[‘C’]  


#获取第5行,返回同上 

row = sheet[5]  


#获取B列到C列的单元格,返回((B列有序cell),(C列有序cell),) 

col_range = sheet[‘B:C’]   


#获取2到6行(包括第6行)的单元格,返回类似上,空单元格返回None

row_range = sheet[2:6]   


2.3.3.区域

cell_range = sheet[‘A1:B6’]   #获取某一区域,返回先行后列的元组

or

#通过生成器的方式获取某一区域的cell

for row in sheet.iter_rows(min_row=1, max_row=6, 

                           min_col=3, max_col=5):  #先行

    for cell in row:   #后列

        cell.value


2.3.4.行列

int = sheet.max_row   #行数

int = sheet.max_column   #列数


from openpyxl.utils import (

    get_column_letter, 

    column_index_from_string

)

str = get_column_letter(45)   #第45列对应的字母

int = column_index_from_string(‘AAS’)   #第AAS列对应的数字



三、实例openpyxl转Pandas


import openpyxl

import pandas as pd



def openpyxl_to_pandas(data, sheet_index=None, header=None):

    """

    convert data from openpyxl to DataFrame in Pandas

    usage:

        load_workbook

        convert

        pd.

        pd.

        ...

    :param data: data from openpyxl, including workbook, worksheet and cell

        Workbook: return active sheet value

        Worksheet: return sheet value

        Cell: return cell value

    :param sheet_index: int, None; which sheet in workbook

    :param header: int, None, default None

        Row (0-indexed) to use for the column labels of the parsed DataFrame.

        If a list of integers is passed those row positions will be combined

        into a MultiIndex. Use None if there is no header.

    :return: DataFrame

    """


    _df = None

    title = None


    # deal workbook

    if isinstance(data, openpyxl.workbook.workbook.Workbook):

        if sheet_index is None:

            # active sheet not only 1st sheet

            _df = pd.DataFrame(data.active.values)

            title = [c.value for c in data.active[1]]

        if isinstance(sheet_index, int):

            name = data.get_sheet_names()[sheet_index]

            sh = data.get_sheet_by_name(name)

            _df = pd.DataFrame(sh.values)

            title = [c.value for c in sh[1]]


    # deal worksheet

    if isinstance(data, openpyxl.workbook.workbook.Worksheet):

        # 1.generate dataframe

        tmp = data.values

        _df = pd.DataFrame(tmp)

        title = [c.value for c in data[1]]


    # deal cell

    if isinstance(data, tuple):

        tmp = []

        for row in data:

            tmp.append([c.value for c in row])

        _df = pd.DataFrame(tmp)

        title = [c.value for c in data[0]]


    # deal header

    if header is None:

        return df

    if header == 0:

        _df = _df.drop([0], axis=0)

        _df.columns = title

        _df = _df.reset_index(drop=True)

        return df

    if header > 0 and isinstance(header, int):

        title = [c.value for c in data[header + 1]]

        _df = _df.drop([i for i in range(0, header)], axis=0)

        _df.columns = title

        _df = _df.reset_index(drop=True)

        return _df



四、openpyxl写入数据


4.1.创建工作薄

wb = openpyxl.Workbook()   #新建Workbook


4.2.表单

 #指定位置和表单名,若不指定index,则放在最后

wb.create_sheet(index=0, title=‘haha’)  


#通过表单名来删除

wb.remove_sheet(wb.get_sheet_by_name(‘name’))   


4.3.写数据


4.3.1.单个值

sheet[‘A1’] = 值  #可str,数值

or

sheet.cell(row=1, column=3, value=值)   #写入row行column列中

or

sheet.cell(row=1, column=3).value = 值


4.3.2.整行

sheet.append(list)   #整行,默认从A列,已有数据的下一行开始写入


4.3.3.列表

l = [[1,2],[3,4]]

for row in l:

    sheet.append(row)   #转换为按行写入


4.4.保存

wb.save(filename)   #保存,必传参,不会覆盖未修改的内容



五、实例:Pandas转openpyxl


import openpyxl

import pandas as pd

import copy



def pandas_to_openpyxl(df, whichExcel=None, whichSheet=None,

                               startRow=1, startCol=1,

                               index=False, header=True):

    """

    convert pandas data to openpyxl and write to excel

    usage:

        load_workbook

        convert(df, wb, 'sheet')

        convert(df, whichSheet=ws)

        ...

        save

    :param df: raw data

    :param whichExcel:

        openpyxl.workbook.workbook.Workbook or None

        target excel

    :param whichSheet:

        openpyxl.workbook.workbook.Worksheet or str

        target sheet

    :param startRow: start with 1

    :param startCol: start with 1 or A

    :param index: true means save pandas' dataframe index

    :param header: true means save pandas' dataframe header

    :return:

    """


    # 1.get target sheet

    _sh = None

    if whichExcel is None:

        if isinstance(whichSheet, openpyxl.workbook.workbook.Worksheet):

            _sh = whichSheet

        else:

            return 'ERROR: please input openpyxl.workbook.workbook.Worksheet'

    elif isinstance(whichExcel, openpyxl.workbook.workbook.Workbook):

        if isinstance(whichSheet, str):

            _sh = whichExcel[whichSheet]

        else:

            return 'ERROR: please input str sheet name'

    else:

        return 'ERROR: please input openpyxl.workbook.workbook.Workbook'


    # 2.reset row and col

    srow = startRow

    scol = None

    if isinstance(startCol, int):

        scol = startCol

    elif isinstance(startCol, str):

        from openpyxl.utils import column_index_from_string

        scol = column_index_from_string(startCol)


    # 3.backup df

    data = df.copy()


    # 4.write index and header

    if index and header:

        ind = data.index

        hea = data.columns

        row = srow + 1

        for i in ind:

            _sh.cell(row=row, column=scol, value=i)

            row = row + 1

        col = scol + 1

        for h in hea:

            _sh.cell(row=srow, column=col, value=h).style='Pandas'

            col = col + 1

        srow = srow + 1

        scol = scol + 1

    elif index:

        ind = data.index

        row = copy.copy(srow)

        for i in ind:

            _sh.cell(row=row, column=scol, value=i)

            row = row + 1

        scol = scol + 1

    elif header:

        hea = data.columns

        col = copy.copy(scol)

        for h in hea:

            _sh.cell(row=srow, column=col, value=h).style='Pandas'

            col = col + 1

        srow = srow + 1


    # 5.write data to excel

    tmp = data.values.tolist()

    for r in tmp:

        # write data

        col = copy.copy(scol)

        for c in r:

            _sh.cell(row=srow, column=col, value=c)

            col = col + 1

        srow = srow + 1

    return 'success'



六、openpyxl格式


可参考官方文档


python培训


七、实例:清除sheet中的数据



import openpyxl



def clear_sheet(whichExcel=None, whichSheet=None, data_only=True):

    """

    clear data from sheet

    :param whichExcel:

        openpyxl.workbook.workbook.Workbook or None

        target excel

    :param whichSheet:

        openpyxl.workbook.workbook.Worksheet or str

        target sheet

    :param data_only: true means only clear data

    :return: status

    """

    _sh = None

    if whichExcel is None:

        if isinstance(whichSheet, openpyxl.workbook.workbook.Worksheet):

            _sh = whichSheet

        else:

            return 'ERROR: please input openpyxl.workbook.workbook.Worksheet'

    elif isinstance(whichExcel, openpyxl.workbook.workbook.Workbook):

        if isinstance(whichSheet, str):

            _sh = whichExcel[whichSheet]

        else:

            return 'ERROR: please input str sheet name'

    else:

        return 'ERROR: please input openpyxl.workbook.workbook.Workbook'


    from openpyxl.styles import Border, Font

    if data_only:

        for r in _sh.iter_rows():

            for c in r:

                c.border = Border() # delete border

                c.value = ''

    else:

        for r in _sh.iter_rows():

            for c in r:

                c.border = Border() # delete border

                c.font = Font('宋体')

                c.value = ''



python培训:http://www.baizhiedu.com/python2019




上一篇:python培训 | 即学即用的 30 段 Python 实用代码

下一篇:应届生去公司找个Java程序员的职位需要什么技能?

相关推荐

www.baizhiedu.com

有位老师想和您聊一聊

关闭

立即申请