按xls标签替换docx及xls内容
WPSoffice环境下,需要批量替换doc文档及xls表格某些内容,在windows下,可以用VBA宏实现,先建一个标签表格,然后按标签批量替换。但在Linux下,WPS表格宏不能跨文档操作WPS文字,于是想用python实现。
1、标签文件与模板文件格式:
标签文件A列为标签名,B列为值。
模板文件关键词用#包围,如下:
2、基本功能
读入标签内容,然后打开模板文件,替换关键词为标签值,保存到新文件。
用到的python库:xlrd、xlutils、tkinter。读入的标签及文件列表使用Treeview展示,并提供修改、增加项、删除项功能。
3、几个重要代码片段
3.1 读xls内容
# 读入标签表# 打开一个 Excel 文件workbook = xlrd.open_workbook(fname)# 通过索引获取第一个工作表sheet = workbook.sheet_by_index(0)da = []# 遍历所有行和列输出for i in range(sheet.nrows):# 键值分布在1、2列key = str(sheet.cell_value(i, 0)).replace(' ', '') # 删除空格val = sheet.cell_value(i, 1)date_format = sheet.cell_type(i, 1)# 日期类型if date_format == xlrd.XL_CELL_DATE:date_tuple = xlrd.xldate_as_tuple(val, workbook.datemode)val = '' + str(date_tuple[0]) + "年" + str(date_tuple[1]) + "月" + str(date_tuple[2]) + "日"# 文本类型if date_format == xlrd.XL_CELL_TEXT:val = '' + sheet.cell_value(i, 1)if str(key) != '':da.append((key, val))# print(da)workbook.release_resources()del workbook
3.2 替换docx文档关键词
def procdocx(self, fname, lbitems, foutname):document = Document(fname) # 打开文档for paragraph in document.paragraphs:runs = paragraph.runs # 得到所有runfor i, run in enumerate(runs):key = run.text# 当前run不包含key值if key.count('#') == 0:continuecounter = i # 记录起始位置if key.count('#') == 1: # 如先导符与key值分在多个run中while counter < len(runs): # 将先导符与后续run组成key后再查找counter = counter + 1if counter >= len(runs):breakkey += runs[counter].texttmp = runs[counter].textruns[counter].clear() # 清空当前runs,本算法会改变文档内容格式if tmp.count('#') > 0: # 注:如果有多个连续#,且第二个key分在多个run中,会有可能出现替换错误break# 开始替换for item in lbitems:key1 = '#' + self.labeltree.item(item)['values'][0].replace(' ', '') + '#'if key.find(key1) >= 0:# for j in range(i, counter + 1):# runs[j].clear()key = key.replace(key1, self.labeltree.item(item)['values'][1])runs[i].text = key# 另存到输出文件夹document.save(foutname)
3.3 替换xls表格关键词
def procxls(self, fname, lbitems, foutname):modxls = xlrd.open_workbook(fname, formatting_info=True)# 将操作文件对象拷贝,变成可写newxls = copy(modxls)# 获取所有sheet页及数量sheets = modxls.sheet_names()for shn in range(len(sheets)):# 通过索引获取第shn个工作表modsheet = modxls.sheet_by_index(shn)newsheet = newxls.get_sheet(shn)# 遍历mod所有行和列输出for i in range(modsheet.nrows):for j in range(modsheet.ncols):val = modsheet.cell_value(i, j)val_format = modsheet.cell_type(i, j)if val_format != xlrd.XL_CELL_TEXT:continue# 开始替换for item in lbitems:key1 = '#' + self.labeltree.item(item)['values'][0].replace(' ', '') + '#'if val.find(key1) >= 0:newval = val.replace(key1, self.labeltree.item(item)['values'][1])newsheet.write(i, j, newval)newxls.save(foutname)
3.4 Treeview代码
1) 创建一个treeview列表
# 创建 self.labeltree 并添加到 self.mainframe 中columns1 = ("label", "value")self.labeltree = ttk.Treeview(frame51, show="headings", columns=columns1)self.labeltree.column('label', width=100, anchor='w')self.labeltree.heading('label', text="标签名")self.labeltree.column('value', width=300, anchor='w')self.labeltree.heading('value', text="标签值")self.labelscrollx = ttk.Scrollbar(frame51, orient="horizontal", command=self.labeltree.xview)self.labelscrolly = ttk.Scrollbar(frame51, orient="vertical", command=self.labeltree.yview)self.labeltree.configure(xscrollcommand=self.labelscrollx.set)self.labeltree.configure(yscrollcommand=self.labelscrolly.set)self.labelscrollx.pack(side="bottom", fill="x")self.labelscrolly.pack(side="right", fill="y")self.labeltree.pack(expand=1, fill='both', padx='1', pady='1')
2) 双击treeview编辑
# 双击标签库列表def set_lbcell_value2(self, event):# 获取选中的项目col = self.labeltree.identify_column(event.x) # 列row = self.labeltree.identify_row(event.y) # 行if col == '' or row == '':returnselected_item = self.labeltree.selection()[0]cn = int(str(col).replace('#', ''))rn = int(str(row).replace('I', ''))x, y, w, h = self.labeltree.bbox(selected_item, col)txt = self.labeltree.item(selected_item, 'values')[cn-1]edstr = StringVar()edstr.set(txt)entryedit = ttk.Entry(self.labeltree, width=w//8, textvariable=edstr)entryedit.place(x=x, y=y)def saveedit(even):self.labeltree.set(selected_item, column=col, value=edstr.get())entryedit.destroy()entryedit.bind("<FocusOut>", saveedit)# 按下时触发entryedit.bind('<Return>', saveedit) # 不是Enter而是Return
3) 右击treeview,弹出操作菜单
# 右击标签库def set_lbcel_popup(self, event):# 创建右键菜单self.right_click_menu = Menu(self.labeltree, tearoff=False)self.right_click_menu.add_command(label="增加", command=lambda: self.on_menu_click_add(event))if self.labeltree.selection():self.right_click_menu.add_command(label="删除", command=lambda: self.on_menu_click_del(event))self.right_click_menu.post(event.x_root, event.y_root)# 新增标签def on_menu_click_add(self, event):self.labeltree.insert('', 'end', values=('新标签', '值'))self.labeltree.update()# 删除标签def on_menu_click_del(self, event):if self.labeltree.selection():self.labeltree.delete(self.labeltree.selection())
4、几个不足之处:
1)python-docx库只能操作docx文件,对doc文档支持不好。
2)对模板文档中标签格式有一定要求,但基本功能已实现。
5、源代码已上传,链接:https://download.csdn.net/download/zhoury/89612390