编程 Python

python读取word文档,插入mysql数据库的示例代码

Posted in Python onNovember 07, 2018

表格内容如下：

1、实现批量导入word文档，取文档标题中的数字作为编号

2、除取上面打钩的内容需要匹配出来入库入库，其他内容全部直接入库mysql

# wuyanfeng
# -*- coding:utf-8 -*-
# 读取docx中的文本代码示例
import docx
import pymysql
import re
import os

# 创建数据库链接
conn = pymysql.connect(
 host='rm-bp1vu5d84dg12c6d59o.mysql.rds.aliyuncs.com',
 port=3306,
 user='root',
 passwd='wYf092415*',
 db='pays',
 charset='utf8',
)
# 创建游标
cursor = conn.cursor()

#切片函数
def section(info,key,len11):
 a = len(info)
 print(a, type(a))
 d = []
 e = 0
 g = -1
 i = 0
 task_class=[]
 while i < len(info):
  # for i in range(len(info)):
  # i+=1
  print("i::::", i)
  try:
   #c = info.index("a", e)
   #print("c:::::", c)
   c = info.index(key, e)
   #print("c:::::", c)

   print("c类型判断",type(c))
  except ValueError:
   print(ValueError)
  try:
   if (c != '') & (g < int(c)):
    d.append(c)
    g = c
    i = c + 1
    print("illlldddd:", i)
    e = c + 1
    continue

   elif (c == ''):
     break
  except UnboundLocalError:
   print(UnboundLocalError)

   return task_class
  break
 print("d", d, type(d))
 print(d[0], type(d[0]))
 print("d的长度：",len(d))
 #开始切片
 if len(d) != 0:
  for j in range(len(d)):
   print("info11:::", info, type(info))
   info = ''.join(info)
   print("info222:::",info,type(info))
   print("d[%d]"%j,d[j])
   #print("d[j]:5"%j,info[d[j]:5])

   llll = info[d[j]+1:d[j]+5]
   print("d[%d]:5" % j, llll)
   task_class.append(llll)
   print("task_class::11", task_class)

 task_class=",".join(task_class)
 print("str1112222",task_class)
 return task_class


def insettable(file):
 print("file：：：:::::::::::::::::", file)
 print("type：：：:::::::::::::::::", type(file))
 # file1 = file
 # file1 = str(file1)
 ddd = re.findall("知识库\\\(\d+)", file)
 print("ddd：::::::::：：", ddd)
 print("ddd[0]:::", ddd[0])
 ddd = int(ddd[0])
 print("ddd::::", type(ddd))

 file = docx.Document(file)
 # 读取表格：
 t = file.tables[0]
 print(t)
 print("1:", t.cell(0, 0).text) # 1
 cell1 = t.cell(0, 0).text
 print("tyep::::", type(t.cell(0, 0).text))

 print("2:", t.cell(0, 1).text) # 2
 cell2 = t.cell(0, 1).text

 print("2:", t.cell(0, 2).text) # 2
 cell3 = t.cell(0, 2).text

 print("2:", t.cell(0, 3).text) # 2
 cell4 = t.cell(0, 3).text
 print("cell4:::::::::", cell4)

 print("3:", t.cell(1, 0).text) # 3
 cell5 = t.cell(1, 0).text

 print("4:", t.cell(1, 2).text) # 4
 cell6 = t.cell(1, 2).text

 print("5:", t.cell(1, 3).text) # 5
 task_type = t.cell(1, 3).text
 # task_type = re.findall('.*[☑√](.*)$', cell7)
 # task_type = ''.join(cell7)
 print("task_type111111:", task_type)
 # task_class = task_class[0:4]
 '''低级处理方式
 a = int(task_type.count("☑"))
 print("a|||||||", a, type(a))
 b = int(task_type.count("√"))
 print("b|||||||", b, type(a))
 if (a == 1) | (b == 1):
  print("111111111111111111")
  # task_type = re.findall('.*[☑√](.*)$', task_type)
  task_type = re.findall('.*[☑√](.*)$', task_type)
  print("task_type1", task_type)
  task_type = ''.join(task_type)
  print("task_type2", task_type)
  task_type = task_type[0:4]
  print("task_type3:d:%s，b=%d" % (a, b), task_type)
 elif (a == 0) & (b == 0):
  print("2222222222222222222")
  task_type = '法定职责'
  print("a:%s，b=%s" % (a, b), task_type)
 elif (a == 2) | (b == 2):
  print("333333333333333333333")
  task_type = '法定职责,工作职责 '
  print("a:%s，b=%s" % (a, b), task_type)
 '''
 #调用切片函数
 task_type1 = section(task_type, "√", 4)
 task_type2 = section(task_type, "☑", 4)
 task_type1 = "".join(task_type1)
 task_type2 = "".join(task_type2)
 print("task_type1:::", task_type1,type(task_type1))
 print("task_type2:::", task_type2,type(task_type2))
 if task_type1.strip()!="":
  task_type = task_type1
  print("task_type111:::", task_type1)
 elif task_type2.strip()!="":
  task_type = task_type2
  print("task_type222:::", task_type2)

 print("6:", t.cell(1, 4).text) # 6
 cell8 = t.cell(1, 4).text

 print("7:", t.cell(2, 1).text) # 7
 cell9 = t.cell(2, 1).text

 # 获取文档对象
 # file = docx.Document("D:\\配置库\\公案APP\\1.2 系统规格\\知识库\\14人员死亡先期处置.docx")
 print("段落数:" + str(len(file.paragraphs))) # 段落数为13，每个回车隔离一段
 lenn = len(file.paragraphs)
 print("len:", lenn)
 # 输出每一段的内容
 for para in file.paragraphs:
  print(para.text)

 # 输出段落编号及段落内容
 for i in range(len(file.paragraphs)):
  print("第" + str(i) + "段的内容是：" + file.paragraphs[i].text)

 list6 = []
 for i in range(len(file.paragraphs)):
  if 0 == i:
   print("i:", i)
   lis0 = file.paragraphs[i].text
   print("list0:", lis0)
   print(type(lis0))

  elif 1 == i:
   print("i:", i)
   task_class = file.paragraphs[i].text
   print("lis1", task_class,type(task_class))
   '''低级处理方式
   print("task_class111111:", task_class)

   c = int(task_class.count("☑"))
   task_class = ''.join(task_class)
   #print(task_class.index('☑'))
   print("c|||||||", c, type(c))
   d = int(task_class.count("√"))
   print(task_class.index('√'))

   print("d|||||||", d, type(d))
   task_class = re.findall(r'[☑√](?:.*)', task_class)
   task_class = ''.join(task_class)
   task_class = task_class[1:5]
   print("task_class", task_class)
  '''
   #调用切片函数
   task_class1 = section(task_class, "√", 4)
   task_class2 = section(task_class, "☑", 4)
   task_class1 = "".join(task_class1)
   task_class2 = "".join(task_class2)
   print("task_class1:::", task_class1,type(task_class1))
   print("task_class2:::", task_class2,type(task_class2))
   if task_class1.strip()!="":
    task_class = task_class1
    print("task_class11:::", task_class1)
   elif task_class2.strip()!="":
    task_class = task_class2
    print("task_class22:::", task_class2)


  if 2 == i:
   print("i:", i)
   lis2 = file.paragraphs[i].text

   print("lis2", lis2)
   print(type(lis2))
   preparer = re.findall('填表单位：(.*?)$', lis2)
   preparer = ''.join(preparer)
   print("preparer:%s" % preparer)

  # elif 3 == i:
  #  print("i:", i)
  #  lis3 = file.paragraphs[i].text
  elif 3 == i:
   print("i:", i)
   lis4 = file.paragraphs[i].text
   print("lis4", lis4)
   print(type(lis4))
  elif 3 < i < lenn - 1:
   print("i:", i)
   print(file.paragraphs[i].text)
   print(type(file.paragraphs[i].text))
   # list6[i-5] = list6.append(file.paragraphs[i].text)
   list6.append(str(file.paragraphs[i].text).strip('\xa0'))
   # list6.append("%s\n" % str(file.paragraphs[i].text).strip('\xa0'))
   print(list6)
 key_steps = "\n".join(list6)
 # print("key_steps:\n",key_steps.strip('\n'))

 cursor.execute(
  "insert into `t_knowledge_base` (`no`, `preparer`, `task_class`, `task_name`, `task_specification`, `task_type`, `task_desc`, `task_basis`, `key_steps`) values ('%d','%s','%s','%s','%s','%s','%s',NULL,'%s')" % (
   ddd, preparer, task_class, cell2, cell4, task_type, cell9, key_steps))
 conn.commit()


def traverse(f):
 fs = os.listdir(f)
 for f1 in fs:
  tmp_path = os.path.join(f, f1)
  if not os.path.isdir(tmp_path):
   print('文件: %s' % tmp_path)
   insettable(tmp_path)
  else:
   print('文件夹：%s' % tmp_path)
   traverse(tmp_path)


path = 'D:\\配置库\公案APP\\1.2 系统规格\\知识库'
traverse(path)

#单文件调测
# path = 'D:\\配置库\\公案APP\\1.2 系统规格\\知识库\\14人员死亡先期处置.docx'
# insettable(path)

# 关闭游标
cursor.close()
# 关闭连接
conn.close()

以上这篇python读取word文档,插入mysql数据库的示例代码就是小编分享给大家的全部内容了，希望能给大家一个参考，也希望大家多多支持三水点靠木。

python读取word文档,插入mysql数据库的示例代码

- Author -

无所住心

声明：登载此文出于传递更多信息之目的，并不意味着赞同其观点或证实其描述。

Python 相关文章推荐

Python实现的简单发送邮件脚本分享

Nov 07 Python

Python中利用sqrt()方法进行平方根计算的教程

May 15 Python

python实现数组插入新元素的方法

May 22 Python

浅谈Python中函数的参数传递

Jun 21 Python

django之session与分页(实例讲解)

Nov 13 Python

python批量导入数据进Elasticsearch的实例

May 30 Python

python使用Plotly绘图工具绘制散点图、线形图

Apr 02 Python

如何爬取通过ajax加载数据的网站

Aug 15 Python

Python pip配置国内源的方法

Feb 14 Python

Pandas的Apply函数具体使用

Jul 21 Python

Windows下Sqlmap环境安装教程详解

Aug 04 Python

利用python调用摄像头的实例分析

Jun 07 Python

pandas.DataFrame删除/选取含有特定数值的行或列实例

Nov 07 #Python

python 返回列表中某个值的索引方法

Nov 07 #Python

pandas 根据列的值选取所有行的示例

Nov 07 #Python

Pandas过滤dataframe中包含特定字符串的数据方法

Nov 07 #Python

pandas筛选某列出现编码错误的解决方法

Nov 07 #Python

python绘制中国大陆人口热力图

Nov 07 #Python

利用Python将数值型特征进行离散化操作的方法

Nov 06 #Python