大神论坛

找回密码
快速注册
查看: 550 | 回复: 0

[源码] 读取 ofd 内容 获取文字 python源码

主题

帖子

0

积分

初入江湖

UID
667
积分
0
精华
威望
0 点
违规
大神币
68 枚
注册时间
2023-10-14 10:49
发表于 2023-12-17 11:06
本帖最后由 a1100330 于 2023-12-17 11:06 编辑

读取 ofd 内容 获取文字 python源码

import zipfile
import xmltodict
def mygetofd():
def unzip_file(zip_path, unzip_path=None):
"""
:param zip_path: ofd格式文件路径
:param unzip_path: 解压后的文件存放目录
:return: unzip_path
"""
if not unzip_path:
unzip_path = zip_path.split('.')[0]
with zipfile.ZipFile(zip_path, 'r') as f:
for file in f.namelist():
f.extract(file, path=unzip_path)
return unzip_path

def parse_ofd(path):
"""
:param content: ofd文件字节内容
:param path: ofd文件存取路径
# data_dict[row['@Name']] = row.get('#text')
"""
# with open(path, "wb") as f:
# f.write(content)
global file_path
file_path = unzip_file(path)
# xml_path = f"{file_path}/OFD.xml"
xml_path = f"{file_path}/Doc_0/Pages/Page_0/Content.xml"
data_dict = {}
try:
with open(xml_path, "r", encoding="utf-8") as f:
_text = f.read()
tree = xmltodict.parse(_text)
for row in tree['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict['ofd:TextCode'] = row['ofd:TextCode'].get('#text')
# return data_dict

for i in range(0, 25535):
eachword = tree['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i]['ofd:TextCode'].get(
'#text')

with open(os.getcwd() + '/ofdtxt/00.txt', "a") as f:
f.write(' ' + eachword)
except:
pass
xml_path11 = f"{file_path}/OFD.xml"
from xml.dom.minidom import parse
element = parse(xml_path11).documentElement
nodes = element.getElementsByTagName('ofd:CustomDatas')
global ofdinfo3
ofd_info2 = {}
for i in range(len(nodes)):
sun_node = nodes[i].childNodes
for j in range(len(sun_node)):
name = sun_node[j].getAttribute('Name')
value = sun_node[j].firstChild.data
ofd_info2[name] = value
print(ofd_info2)

textPad2mail_log.insert('end', ofd_info2)

textPad3fanyi.insert('end', textPad2mail_log.get("1.0", 'end'))
ofdinfo3 = textPad3fanyi.get("1.0", 'end').replace(", ", '\n').replace("'", '').replace( '{', '').replace( '}', '')
# .replace(' ','')
textPad2mail_log.delete("1.0", 'end')
textPad2mail_log.insert("1.0", ofdinfo3)
textPad3fanyi.delete("1.0", 'end')
textPad2mail_log.delete("1.0", 'end')
# page2 ----------------------------------------------------
xml_path2 = f"{file_path}/Doc_0/Pages/Page_1/Content.xml"
data_dict2 = {}
try:
with open(xml_path2, "r", encoding="utf-8") as f:
_text2 = f.read()
tree2 = xmltodict.parse(_text2)
for row2 in tree2['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict2['ofd:TextCode'] = row2['ofd:TextCode'].get('#text')
# return data_dict

for i2 in range(0, 25535):
eachword2 = tree2['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i2]['ofd:TextCode'].get(
'#text')

with open(os.getcwd() + '/ofdtxt/01.txt', "a") as f:
f.write(' ' + eachword2)
except:
pass
# page3 ----------------------------------------------------
xml_path3 = f"{file_path}/Doc_0/Pages/Page_2/Content.xml"
data_dict3 = {}
try:
with open(xml_path3, "r", encoding="utf-8") as f:
_text3 = f.read()
tree3 = xmltodict.parse(_text3)
for row3 in tree3['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict3['ofd:TextCode'] = row3['ofd:TextCode'].get('#text')
# return data_dict

for i3 in range(0, 25535):
eachword3 = tree3['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i3]['ofd:TextCode'].get(
'#text')

with open(os.getcwd() + '/ofdtxt/02.txt', "a") as f:
f.write(' ' + eachword3)
except:
pass
# page4 ----------------------------------------------------
xml_path4 = f"{file_path}/Doc_0/Pages/Page_3/Content.xml"
data_dict4 = {}
try:
with open(xml_path4, "r", encoding="utf-8") as f:
_text4 = f.read()
tree4 = xmltodict.parse(_text4)
for row4 in tree4['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict4['ofd:TextCode'] = row4['ofd:TextCode'].get('#text')
# return data_dict

for i4 in range(0, 25535):
eachword4 = tree4['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i4]['ofd:TextCode'].get(
'#text')

with open(os.getcwd() + '/ofdtxt/03.txt', "a") as f:
f.write(' ' + eachword4)
except:
pass
# page5----------------------------------------------------
xml_path5 = f"{file_path}/Doc_0/Pages/Page_4/Content.xml"
data_dict5 = {}
try:
with open(xml_path5, "r", encoding="utf-8") as f:
_text5 = f.read()
tree5 = xmltodict.parse(_text5)
for row5 in tree5['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict5['ofd:TextCode'] = row5['ofd:TextCode'].get('#text')
# return data_dict

for i5 in range(0, 25535):
eachword5 = tree5['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i5]['ofd:TextCode'].get(
'#text')

with open(os.getcwd() + '/ofdtxt/04.txt', "a") as f:
f.write(' ' + eachword5)
except:
pass
# page6----------------------------------------------------
xml_path6 = f"{file_path}/Doc_0/Pages/Page_5/Content.xml"
data_dict6 = {}
try:
with open(xml_path6, "r", encoding="utf-8") as f:
_text6 = f.read()
tree6 = xmltodict.parse(_text6)
for row6 in tree6['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict6['ofd:TextCode'] = row6['ofd:TextCode'].get('#text')
# return data_dict

for i6 in range(0, 26636):
eachword6 = tree6['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i6]['ofd:TextCode'].get(
'#text')

with open(os.getcwd() + '/ofdtxt/05.txt', "a") as f:
f.write(' ' + eachword6)
except:
pass
# page7----------------------------------------------------
xml_path7 = f"{file_path}/Doc_0/Pages/Page_6/Content.xml"
data_dict7 = {}
try:
with open(xml_path7, "r", encoding="utf-8") as f:
_text7 = f.read()
tree7 = xmltodict.parse(_text7)
for row7 in tree7['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict7['ofd:TextCode'] = row7['ofd:TextCode'].get('#text')
# return data_dict

for i7 in range(0, 27737):
eachword7 = tree7['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i7]['ofd:TextCode'].get(
'#text')

with open(os.getcwd() + '/ofdtxt/06.txt', "a") as f:
f.write(' ' + eachword7)
except:
pass
# page8----------------------------------------------------
xml_path8 = f"{file_path}/Doc_0/Pages/Page_7/Content.xml"
data_dict8 = {}
try:
with open(xml_path8, "r", encoding="utf-8") as f:
_text8 = f.read()
tree8 = xmltodict.parse(_text8)
for row8 in tree8['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict8['ofd:TextCode'] = row8['ofd:TextCode'].get('#text')
# return data_dict

for i8 in range(0, 28838):
eachword8 = tree8['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i8]['ofd:TextCode'].get(
'#text')

with open(os.getcwd() + '/ofdtxt/07.txt', "a") as f:
f.write(' ' + eachword8)
except:
pass
# page9----------------------------------------------------
xml_path9 = f"{file_path}/Doc_0/Pages/Page_8/Content.xml"
data_dict9 = {}
try:
with open(xml_path9, "r", encoding="utf-8") as f:
_text9 = f.read()
tree9 = xmltodict.parse(_text9)
for row9 in tree9['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict9['ofd:TextCode'] = row9['ofd:TextCode'].get('#text')
# return data_dict

for i9 in range(0, 29939):
eachword9 = tree9['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i9]['ofd:TextCode'].get(
'#text')

with open(os.getcwd() + '/ofdtxt/08.txt', "a") as f:
f.write(' ' + eachword9)
except:
pass
# page10----------------------------------------------------
xml_path10 = f"{file_path}/Doc_0/Pages/Page_9/Content.xml"
data_dict10 = {}
try:
with open(xml_path10, "r", encoding="utf-8") as f:
_text10 = f.read()
tree10 = xmltodict.parse(_text10)
for row10 in tree10['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict10['ofd:TextCode'] = row10['ofd:TextCode'].get('#text')
# return data_dict

for i10 in range(0, 29939):
eachword10 = tree10['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i10][
'ofd:TextCode'].get(
'#text')

with open(os.getcwd() + '/ofdtxt/09.txt', "a") as f:
f.write(' ' + eachword10)
except:
pass
# page11----------------------------------------------------
xml_path11ha = f"{file_path}/Doc_0/Pages/Page_10/Content.xml"
data_dict11ha = {}
try:
with open(xml_path11ha, "r", encoding="utf-8") as f:
_text11ha = f.read()
tree11ha = xmltodict.parse(_text11ha)
for row11ha in tree11ha['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict11ha['ofd:TextCode'] = row11ha['ofd:TextCode'].get('#text')
for i11 in range(0, 29939):
eachword11 = tree11ha['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i11][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/10.txt', "a") as f:
f.write(' ' + eachword11)
except:
pass
# page12----------------------------------------------------
xml_path12 = f"{file_path}/Doc_0/Pages/Page_11/Content.xml"
data_dict12 = {}
try:
with open(xml_path12, "r", encoding="utf-8") as f:
_text12 = f.read()
tree12 = xmltodict.parse(_text12)
for row12 in tree12['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict12['ofd:TextCode'] = row12['ofd:TextCode'].get('#text')
for i12 in range(0, 29939):
eachword12 = tree12['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i12][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/11.txt', "a") as f:
f.write(' ' + eachword12)
except:
pass
xml_path13 = f"{file_path}/Doc_0/Pages/Page_12/Content.xml"
data_dict13 = {}
try:
with open(xml_path13, "r", encoding="utf-8") as f:
_text13 = f.read()
tree13 = xmltodict.parse(_text13)
for row13 in tree13['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict13['ofd:TextCode'] = row13['ofd:TextCode'].get('#text')
for i13 in range(0, 29939):
eachword13 = tree13['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i13][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/12.txt', "a") as f:
f.write(' ' + eachword13)
except:
pass
xml_path14 = f"{file_path}/Doc_0/Pages/Page_13/Content.xml"
data_dict14 = {}
try:
with open(xml_path14, "r", encoding="utf-8") as f:
_text14 = f.read()
tree14 = xmltodict.parse(_text14)
for row14 in tree14['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict14['ofd:TextCode'] = row14['ofd:TextCode'].get('#text')
for i14 in range(0, 29939):
eachword14 = tree14['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i14][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/13.txt', "a") as f:
f.write(' ' + eachword14)
except:
pass
xml_path15 = f"{file_path}/Doc_0/Pages/Page_14/Content.xml"
data_dict15 = {}
try:
with open(xml_path15, "r", encoding="utf-8") as f:
_text15 = f.read()
tree15 = xmltodict.parse(_text15)
for row15 in tree15['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict15['ofd:TextCode'] = row15['ofd:TextCode'].get('#text')
for i15 in range(0, 29939):
eachword15 = tree15['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i15][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/14.txt', "a") as f:
f.write(' ' + eachword15)
except:
pass
xml_path16 = f"{file_path}/Doc_0/Pages/Page_15/Content.xml"
data_dict16 = {}
try:
with open(xml_path16, "r", encoding="utf-8") as f:
_text16 = f.read()
tree16 = xmltodict.parse(_text16)
for row16 in tree16['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict16['ofd:TextCode'] = row16['ofd:TextCode'].get('#text')
for i16 in range(0, 29939):
eachword16 = tree16['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i16][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/15.txt', "a") as f:
f.write(' ' + eachword16)
except:
pass
xml_path17 = f"{file_path}/Doc_0/Pages/Page_16/Content.xml"
data_dict17 = {}
try:
with open(xml_path17, "r", encoding="utf-8") as f:
_text17 = f.read()
tree17 = xmltodict.parse(_text17)
for row17 in tree17['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict17['ofd:TextCode'] = row17['ofd:TextCode'].get('#text')
for i17 in range(0, 29939):
eachword17 = tree17['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i17][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/16.txt', "a") as f:
f.write(' ' + eachword17)
except:
pass
xml_path18 = f"{file_path}/Doc_0/Pages/Page_17/Content.xml"
data_dict18 = {}
try:
with open(xml_path18, "r", encoding="utf-8") as f:
_text18 = f.read()
tree18 = xmltodict.parse(_text18)
for row18 in tree18['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict18['ofd:TextCode'] = row18['ofd:TextCode'].get('#text')
for i18 in range(0, 29939):
eachword18 = tree18['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i18][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/17.txt', "a") as f:
f.write(' ' + eachword18)
except:
pass
xml_path19 = f"{file_path}/Doc_0/Pages/Page_18/Content.xml"
data_dict19 = {}
try:
with open(xml_path19, "r", encoding="utf-8") as f:
_text19 = f.read()
tree19 = xmltodict.parse(_text19)
for row19 in tree19['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict19['ofd:TextCode'] = row19['ofd:TextCode'].get('#text')
for i19 in range(0, 29939):
eachword19 = tree19['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i19][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/18.txt', "a") as f:
f.write(' ' + eachword19)
except:
pass
xml_path20 = f"{file_path}/Doc_0/Pages/Page_19/Content.xml"
data_dict20 = {}
try:
with open(xml_path20, "r", encoding="utf-8") as f:
_text20 = f.read()
tree20 = xmltodict.parse(_text20)
for row20 in tree20['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict20['ofd:TextCode'] = row20['ofd:TextCode'].get('#text')
for i20 in range(0, 29939):
eachword20 = tree20['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i20][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/19.txt', "a") as f:
f.write(' ' + eachword20)
except:
pass
xml_path21 = f"{file_path}/Doc_0/Pages/Page_20/Content.xml"
data_dict21 = {}
try:
with open(xml_path21, "r", encoding="utf-8") as f:
_text21 = f.read()
tree21 = xmltodict.parse(_text21)
for row21 in tree21['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict21['ofd:TextCode'] = row21['ofd:TextCode'].get('#text')
for i21 in range(0, 29939):
eachword21 = tree21['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i21][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/20.txt', "a") as f:
f.write(' ' + eachword21)
except:
pass
xml_path22 = f"{file_path}/Doc_0/Pages/Page_21/Content.xml"
data_dict22 = {}
try:
with open(xml_path22, "r", encoding="utf-8") as f:
_text22 = f.read()
tree22 = xmltodict.parse(_text22)
for row22 in tree22['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict22['ofd:TextCode'] = row22['ofd:TextCode'].get('#text')
for i22 in range(0, 29939):
eachword22 = tree22['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i22][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/21.txt', "a") as f:
f.write(' ' + eachword22)
except:
pass
xml_path23 = f"{file_path}/Doc_0/Pages/Page_22/Content.xml"
data_dict23 = {}
try:
with open(xml_path23, "r", encoding="utf-8") as f:
_text23 = f.read()
tree23 = xmltodict.parse(_text23)
for row23 in tree23['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict23['ofd:TextCode'] = row23['ofd:TextCode'].get('#text')
for i23 in range(0, 29939):
eachword23 = tree23['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i23][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/22.txt', "a") as f:
f.write(' ' + eachword23)
except:
pass
xml_path24 = f"{file_path}/Doc_0/Pages/Page_23/Content.xml"
data_dict24 = {}
try:
with open(xml_path24, "r", encoding="utf-8") as f:
_text24 = f.read()
tree24 = xmltodict.parse(_text24)
for row24 in tree24['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict24['ofd:TextCode'] = row24['ofd:TextCode'].get('#text')
for i24 in range(0, 29939):
eachword24 = tree24['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i24][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/23.txt', "a") as f:
f.write(' ' + eachword24)
except:
pass
xml_path25 = f"{file_path}/Doc_0/Pages/Page_24/Content.xml"
data_dict25 = {}
try:
with open(xml_path25, "r", encoding="utf-8") as f:
_text25 = f.read()
tree25 = xmltodict.parse(_text25)
for row25 in tree25['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict25['ofd:TextCode'] = row25['ofd:TextCode'].get('#text')
for i25 in range(0, 29939):
eachword25 = tree25['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i25][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/24.txt', "a") as f:
f.write(' ' + eachword25)
except:
pass
xml_path26 = f"{file_path}/Doc_0/Pages/Page_25/Content.xml"
data_dict26 = {}
try:
with open(xml_path26, "r", encoding="utf-8") as f:
_text26 = f.read()
tree26 = xmltodict.parse(_text26)
for row26 in tree26['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict26['ofd:TextCode'] = row26['ofd:TextCode'].get('#text')
for i26 in range(0, 29939):
eachword26 = tree26['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i26][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/25.txt', "a") as f:
f.write(' ' + eachword26)
except:
pass
xml_path27 = f"{file_path}/Doc_0/Pages/Page_26/Content.xml"
data_dict27 = {}
try:
with open(xml_path27, "r", encoding="utf-8") as f:
_text27 = f.read()
tree27 = xmltodict.parse(_text27)
for row27 in tree27['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict27['ofd:TextCode'] = row27['ofd:TextCode'].get('#text')
for i27 in range(0, 29939):
eachword27 = tree27['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i27][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/26.txt', "a") as f:
f.write(' ' + eachword27)
except:
pass
xml_path28 = f"{file_path}/Doc_0/Pages/Page_27/Content.xml"
data_dict28 = {}
try:
with open(xml_path28, "r", encoding="utf-8") as f:
_text28 = f.read()
tree28 = xmltodict.parse(_text28)
for row28 in tree28['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict28['ofd:TextCode'] = row28['ofd:TextCode'].get('#text')
for i28 in range(0, 29939):
eachword28 = tree28['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i28][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/27.txt', "a") as f:
f.write(' ' + eachword28)
except:
pass
xml_path29 = f"{file_path}/Doc_0/Pages/Page_28/Content.xml"
data_dict29 = {}
try:
with open(xml_path29, "r", encoding="utf-8") as f:
_text29 = f.read()
tree29 = xmltodict.parse(_text29)
for row29 in tree29['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict29['ofd:TextCode'] = row29['ofd:TextCode'].get('#text')
for i29 in range(0, 29939):
eachword29 = tree29['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i29][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/28.txt', "a") as f:
f.write(' ' + eachword29)
except:
pass
xml_path30 = f"{file_path}/Doc_0/Pages/Page_29/Content.xml"
data_dict30 = {}
try:
with open(xml_path30, "r", encoding="utf-8") as f:
_text30 = f.read()
tree30 = xmltodict.parse(_text30)
for row30 in tree30['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict30['ofd:TextCode'] = row30['ofd:TextCode'].get('#text')
for i30 in range(0, 29939):
eachword30 = tree30['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i30][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/29.txt', "a") as f:
f.write(' ' + eachword30)
except:
pass
xml_path31 = f"{file_path}/Doc_0/Pages/Page_30/Content.xml"
data_dict31 = {}
try:
with open(xml_path31, "r", encoding="utf-8") as f:
_text31 = f.read()
tree31 = xmltodict.parse(_text31)
for row31 in tree31['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict31['ofd:TextCode'] = row31['ofd:TextCode'].get('#text')
for i31 in range(0, 29939):
eachword31 = tree31['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i31][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/30.txt', "a") as f:
f.write(' ' + eachword31)
except:
pass
xml_path32 = f"{file_path}/Doc_0/Pages/Page_31/Content.xml"
data_dict32 = {}
try:
with open(xml_path32, "r", encoding="utf-8") as f:
_text32 = f.read()
tree32 = xmltodict.parse(_text32)
for row32 in tree32['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict32['ofd:TextCode'] = row32['ofd:TextCode'].get('#text')
for i32 in range(0, 29939):
eachword32 = tree32['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i32][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/31.txt', "a") as f:
f.write(' ' + eachword32)
except:
pass
xml_path33 = f"{file_path}/Doc_0/Pages/Page_32/Content.xml"
data_dict33 = {}
try:
with open(xml_path33, "r", encoding="utf-8") as f:
_text33 = f.read()
tree33 = xmltodict.parse(_text33)
for row33 in tree33['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict33['ofd:TextCode'] = row33['ofd:TextCode'].get('#text')
for i33 in range(0, 29939):
eachword33 = tree33['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i33][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/32.txt', "a") as f:
f.write(' ' + eachword33)
except:
pass
xml_path34 = f"{file_path}/Doc_0/Pages/Page_33/Content.xml"
data_dict34 = {}
try:
with open(xml_path34, "r", encoding="utf-8") as f:
_text34 = f.read()
tree34 = xmltodict.parse(_text34)
for row34 in tree34['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict34['ofd:TextCode'] = row34['ofd:TextCode'].get('#text')
for i34 in range(0, 29939):
eachword34 = tree34['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i34][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/33.txt', "a") as f:
f.write(' ' + eachword34)
except:
pass
xml_path35 = f"{file_path}/Doc_0/Pages/Page_34/Content.xml"
data_dict35 = {}
try:
with open(xml_path35, "r", encoding="utf-8") as f:
_text35 = f.read()
tree35 = xmltodict.parse(_text35)
for row35 in tree35['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict35['ofd:TextCode'] = row35['ofd:TextCode'].get('#text')
for i35 in range(0, 29939):
eachword35 = tree35['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i35][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/34.txt', "a") as f:
f.write(' ' + eachword35)
except:
pass

myofdfile = askopenfilename(defaultextension='.ofd')

_data_dict = parse_ofd(myofdfile)
os.startfile(os.getcwd() + '\\allofdinone.bat')
time.sleep(2)
try:
os.remove(os.getcwd() + '/ofdtxt/00.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/01.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/02.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/03.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/04.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/05.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/06.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/07.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/08.txt')
except:
pass

try:
os.remove(os.getcwd() + '/ofdtxt/09.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/10.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/11.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/12.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/13.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/14.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/15.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/16.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/17.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/18.txt')
except:
pass

try:
os.remove(os.getcwd() + '/ofdtxt/19.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/20.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/21.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/22.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/23.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/24.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/25.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/26.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/27.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/28.txt')
except:
pass

try:
os.remove(os.getcwd() + '/ofdtxt/29.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/30.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/31.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/32.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/33.txt')
except:
pass
try:
os.remove(os.getcwd() + '/ofdtxt/34.txt')
except:
pass

shutil.rmtree(file_path)
f3 = open('ofdpgall.txt', 'r')
ofdcontent = f3.read()
new_ofdcontent = ofdcontent.replace('。', '。\n').replace(':', ':\n')
print(new_ofdcontent)
f3.close()
# with open('myofd_content.txt', 'w', encoding='utf-8') as file:
# file.write(new_ofdcontent)
time.sleep(2)
os.remove(os.getcwd() + '/ofdpgall.txt')

textPad.insert('end', new_ofdcontent)
try:
textPad.insert('end', '\n')
textPad.insert('end', '\n')
textPad.insert('end', ofdinfo3)
except:
pass


注:若转载请注明大神论坛来源(本贴地址)与作者信息。

返回顶部