Commit c9143d32 authored by leeyena's avatar leeyena

241106

parent cf911f9e
......@@ -38,14 +38,9 @@ class print_xml :
data_file = open(xml_file_path,'r',encoding='utf-8')
soup = BeautifulSoup(data_file,"xml")
self.parsing(soup,extract_path)
#soup_xml = soup.prettify()
#with open(xml_file_path,'rb') as f:
# xml_content = f.read()
#root = etree.XML(xml_content)
#pretty_xml_string = etree.tostring(root,pretty_print=True).decode('utf-8')
'''
xml 원본 파일 text 로 확인하기
'''
'''
with open(f"fine{i}.txt", "w",encoding='utf-8') as file:
file.write(soup.prettify())
......@@ -53,20 +48,6 @@ class print_xml :
'''
return True
'''
xml_dict = xmltodict.parse(xml_content)
json_data = json.dumps(xml_dict, indent=4)
# JSON 데이터를 파일로 저장
with open('output.json', 'w') as json_file:
json.dump(xml_dict, json_file, indent=4)
print("JSON 파일로 저장 완료: output.json")
'''
#print(pretty_xml_string)
#exit()
#return self.parsing(soup,extract_path)
def table(self,tbl,zip_file_path) :
max = 0
......@@ -110,57 +91,32 @@ class print_xml :
continue
elif t.text is None:
continue
#text += t.text
if run.find("hp:tbl") :
data = self.table(run,zip_file_path)
make_table[int(row)][int(col)].append(data)
#print(text)
span = tc.find('hp:cellSpan')
colSpan = span.get('colSpan')
rowSpan = span.get('rowSpan')
#make_table = make_table.tolist()
if rowSpan != "1" :
for i in range(0,int(rowSpan)):
make_table[int(row)+i][int(col)].append(t.text)
else :
make_table[int(row)][int(col)].append(t.text)
''' cellSpan을 구해서 같은 머라해야하노.. 세부분야?로 나눠져 있는 것을 넣어준다'''
'''if colSpan != "1":
for i in range(0,int(colSpan)):
make_table[int(row)][int(col)+i] = text
'''
cellSpan을 구해서 셀 병합 처리
'''
#make_table = make_table.tolist()
data = {
"type": "table",
"content": make_table,
}
tbl.decompose()
#self.obj_list.append(data)
return data
'''
for i in make_table[0]:
if i is None :
print(i)
'''
'''
data = {
"type": "table",
"content": make_table,
}
'''
'''
self.obj_list.append(data)
return make_table
'''
def text(self,p) :
text_list = []
......@@ -176,24 +132,15 @@ class print_xml :
else:
continue
text += ''.join(t.stripped_strings)
if text.strip() :
text_list.append(text)
#print(text)
#text_list.append(text)
#for i in p.find_all("hp:t"):
#text_array = np.array(text)
#text_array = text_array.tolist()
data = {
"type": "text",
"content": text_list
}
#print(data)
self.obj_list.append(data)
......@@ -222,22 +169,12 @@ class print_xml :
data = self.image(tag_p,zip_file_path)
self.obj_list.append(data)
#elif tag_p.find_all('hp:drawText'):
# self.drawText(tag_p)
elif tag_p.find_all('hp:t'):
if not tag_p.find_parent('hp:tbl'):
self.text(tag_p)
#exit()
# for run in tag_p.findall('./hp:run',self.np):
# for tag in run :
# if tag.tag == f'{{{self.np["hp"]}}}t':
# self.text(tag)
# elif tag.tag == f'{{{self.np["hp"]}}}tbl':
# self.table(tag)
# elif tag.tag == f'{{{self.np["hp"]}}}pic':
# self.image(tag,zip_file_path)
def add(self,list):
result = []
......@@ -247,12 +184,6 @@ class print_xml :
if not item["content"] or item["content"] == [[]]:
continue # 빈 content일 경우 추가하지 않고 건너뜀
current_text.append(item["content"])
#elif item["type"] == "table":
# for i in item["content"]:
# print(i)
#for j in range(len(i)):
# if i[j]
#exit()
elif item["type"] != "text":
if current_text:
result.append({"type": "text", "content": current_text})
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment