python-docx

学习笔记/个人随记/Python/assets/Pasted image 20230407165054.png

# pip install python-docx

from docx import Document
import os

infos = [
	['0001',2023,2,23,12,00,'闯红灯',300],
	['0002',2023,3,23,12,00,'压线',300],
	['0004',2021,2,23,10,00,'违章停车',300],
	['0022',2020,2,23,12,00,'非法掉头',300],
]

def test():
	doc = Document('./data/word.docx')
	# 处理文字块文本内容  Document - Paragraph - Run三级结构
	for p in doc.paragraphs:
		for run in p.runs:
			run.text = run.text.replace('{one}',info[0])
			run.text = run.text.replace('{two}',str(info[1]))
			run.text = run.text.replace('{three}',str(info[2]))
			run.text = run.text.replace('{four}',str(info[3]))
			run.text = run.text.replace('{five}',str(info[4]))
			run.text = run.text.replace('{six}',str(info[5]))
			run.text = run.text.replace('{seven}',info[6])
			run.text = run.text.replace('{eight}',str(info[7]))
	if not os.path.exists('./data/new'):
		os.makedirs('./data/new')
	doc.save(f'./data/new/auto_{info[0]}.docx')
	# 处理表格中的文本内容   Document - Table - Row/Column - Cell四级结构单元格内容又有 Document - Paragraph - Run
	## 按行遍历
	#for table in doc.tables:
	#	for row in table.rows:
	#		for cell in row.cells:
	#			print(cell.text)
		   
	# 按列遍历     
	#for table in doc.tables:
	#	for column in table.columns:
	#		for cell in column.cells:
	#			print(cell.text)


# main
if __name__ == "__main__":
	test()

# 替换字段
def sub_text(run,filename):
    # 定义正则表达式的规则
    ascii_pattern = re.compile(r"{\w+}", re.ASCII)
    # 根据正则表达式找出内容
    res = ascii_pattern.findall(run.text)
    if res:
        for r in res:
            try:
	            # config_data is dict
                run.text = run.text.replace(r, config_data[r])
            except:
                print(f'Error : {filename} 中出现了 {r}  ,但Excl表中不存在与 {r} 对应的变量信息')

# 遍历段落，然后替换字段  
def sub_paragraphs(doc,filename):  
    for paragraph in doc.paragraphs:  
        for run in paragraph.runs:  
            # print(run.text) # 检查文本中变量  
            sub_text(run,filename)  
  
# 遍历表格，再替换表格中的字段  
def sub_tables(doc,filename):  
    for table in doc.tables:  
        for row in table.rows:  
            for cell in row.cells:  
                for p in cell.paragraphs:  
                    for run in p.runs:  
                        # print(run.text)  # 检查文本中变量  
                        sub_text(run,filename)

# 处理并保存文档  
def doc_txt_date(filename):  
    doc = Document(f'./data/{filename}')  
    sub_paragraphs(doc, filename)  
    sub_tables(doc, filename)  
    newdir = './new/']  
    if not os.path.exists(newdir):  
        os.makedirs(newdir)  
    doc.save(f'{newdir}/{filename}')