# pip install python-docx
from docx import Document
import os
infos = [
['0001',2023,2,23,12,00,'闯红灯',300],
['0002',2023,3,23,12,00,'压线',300],
['0004',2021,2,23,10,00,'违章停车',300],
['0022',2020,2,23,12,00,'非法掉头',300],
]
def test():
doc = Document('./data/word.docx')
# 处理文字块文本内容 Document - Paragraph - Run三级结构
for p in doc.paragraphs:
for run in p.runs:
run.text = run.text.replace('{one}',info[0])
run.text = run.text.replace('{two}',str(info[1]))
run.text = run.text.replace('{three}',str(info[2]))
run.text = run.text.replace('{four}',str(info[3]))
run.text = run.text.replace('{five}',str(info[4]))
run.text = run.text.replace('{six}',str(info[5]))
run.text = run.text.replace('{seven}',info[6])
run.text = run.text.replace('{eight}',str(info[7]))
if not os.path.exists('./data/new'):
os.makedirs('./data/new')
doc.save(f'./data/new/auto_{info[0]}.docx')
# 处理表格中的文本内容 Document - Table - Row/Column - Cell四级结构单元格内容又有 Document - Paragraph - Run
## 按行遍历
#for table in doc.tables:
# for row in table.rows:
# for cell in row.cells:
# print(cell.text)
# 按列遍历
#for table in doc.tables:
# for column in table.columns:
# for cell in column.cells:
# print(cell.text)
# main
if __name__ == "__main__":
test()
# 替换字段
def sub_text(run,filename):
# 定义正则表达式的规则
ascii_pattern = re.compile(r"{\w+}", re.ASCII)
# 根据正则表达式找出内容
res = ascii_pattern.findall(run.text)
if res:
for r in res:
try:
# config_data is dict
run.text = run.text.replace(r, config_data[r])
except:
print(f'Error : {filename} 中出现了 {r} ,但Excl表中不存在与 {r} 对应的变量信息')
# 遍历段落,然后替换字段
def sub_paragraphs(doc,filename):
for paragraph in doc.paragraphs:
for run in paragraph.runs:
# print(run.text) # 检查文本中变量
sub_text(run,filename)
# 遍历表格,再替换表格中的字段
def sub_tables(doc,filename):
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
for p in cell.paragraphs:
for run in p.runs:
# print(run.text) # 检查文本中变量
sub_text(run,filename)
# 处理并保存文档
def doc_txt_date(filename):
doc = Document(f'./data/{filename}')
sub_paragraphs(doc, filename)
sub_tables(doc, filename)
newdir = './new/']
if not os.path.exists(newdir):
os.makedirs(newdir)
doc.save(f'{newdir}/{filename}')