Commit c9143d32 authored by leeyena's avatar leeyena

241106

parent cf911f9e
...@@ -38,34 +38,15 @@ class print_xml : ...@@ -38,34 +38,15 @@ class print_xml :
data_file = open(xml_file_path,'r',encoding='utf-8') data_file = open(xml_file_path,'r',encoding='utf-8')
soup = BeautifulSoup(data_file,"xml") soup = BeautifulSoup(data_file,"xml")
self.parsing(soup,extract_path) self.parsing(soup,extract_path)
'''
#soup_xml = soup.prettify() xml 원본 파일 text 로 확인하기
'''
#with open(xml_file_path,'rb') as f:
# xml_content = f.read()
#root = etree.XML(xml_content)
#pretty_xml_string = etree.tostring(root,pretty_print=True).decode('utf-8')
''' '''
with open(f"fine{i}.txt", "w",encoding='utf-8') as file: with open(f"fine{i}.txt", "w",encoding='utf-8') as file:
file.write(soup.prettify()) file.write(soup.prettify())
exit() exit()
''' '''
return True return True
'''
xml_dict = xmltodict.parse(xml_content)
json_data = json.dumps(xml_dict, indent=4)
# JSON 데이터를 파일로 저장
with open('output.json', 'w') as json_file:
json.dump(xml_dict, json_file, indent=4)
print("JSON 파일로 저장 완료: output.json")
'''
#print(pretty_xml_string)
#exit()
#return self.parsing(soup,extract_path)
def table(self,tbl,zip_file_path) : def table(self,tbl,zip_file_path) :
...@@ -110,57 +91,32 @@ class print_xml : ...@@ -110,57 +91,32 @@ class print_xml :
continue continue
elif t.text is None: elif t.text is None:
continue continue
#text += t.text
if run.find("hp:tbl") : if run.find("hp:tbl") :
data = self.table(run,zip_file_path) data = self.table(run,zip_file_path)
make_table[int(row)][int(col)].append(data) make_table[int(row)][int(col)].append(data)
#print(text)
span = tc.find('hp:cellSpan') span = tc.find('hp:cellSpan')
colSpan = span.get('colSpan') colSpan = span.get('colSpan')
rowSpan = span.get('rowSpan') rowSpan = span.get('rowSpan')
#make_table = make_table.tolist()
if rowSpan != "1" : if rowSpan != "1" :
for i in range(0,int(rowSpan)): for i in range(0,int(rowSpan)):
make_table[int(row)+i][int(col)].append(t.text) make_table[int(row)+i][int(col)].append(t.text)
else : else :
make_table[int(row)][int(col)].append(t.text) make_table[int(row)][int(col)].append(t.text)
''' cellSpan을 구해서 같은 머라해야하노.. 세부분야?로 나눠져 있는 것을 넣어준다''' '''
cellSpan을 구해서 셀 병합 처리
'''if colSpan != "1":
for i in range(0,int(colSpan)):
make_table[int(row)][int(col)+i] = text
''' '''
#make_table = make_table.tolist()
data = { data = {
"type": "table", "type": "table",
"content": make_table, "content": make_table,
} }
tbl.decompose() tbl.decompose()
#self.obj_list.append(data)
return data return data
'''
for i in make_table[0]:
if i is None :
print(i)
'''
'''
data = {
"type": "table",
"content": make_table,
}
'''
'''
self.obj_list.append(data)
return make_table
'''
def text(self,p) : def text(self,p) :
text_list = [] text_list = []
...@@ -176,24 +132,15 @@ class print_xml : ...@@ -176,24 +132,15 @@ class print_xml :
else: else:
continue continue
text += ''.join(t.stripped_strings) text += ''.join(t.stripped_strings)
if text.strip() : if text.strip() :
text_list.append(text) text_list.append(text)
#print(text)
#text_list.append(text)
#for i in p.find_all("hp:t"):
#text_array = np.array(text)
#text_array = text_array.tolist()
data = { data = {
"type": "text", "type": "text",
"content": text_list "content": text_list
} }
#print(data)
self.obj_list.append(data) self.obj_list.append(data)
...@@ -222,23 +169,13 @@ class print_xml : ...@@ -222,23 +169,13 @@ class print_xml :
data = self.image(tag_p,zip_file_path) data = self.image(tag_p,zip_file_path)
self.obj_list.append(data) self.obj_list.append(data)
#elif tag_p.find_all('hp:drawText'): #elif tag_p.find_all('hp:drawText'):
# self.drawText(tag_p) # self.drawText(tag_p)
elif tag_p.find_all('hp:t'): elif tag_p.find_all('hp:t'):
if not tag_p.find_parent('hp:tbl'): if not tag_p.find_parent('hp:tbl'):
self.text(tag_p) self.text(tag_p)
#exit()
# for run in tag_p.findall('./hp:run',self.np):
# for tag in run :
# if tag.tag == f'{{{self.np["hp"]}}}t':
# self.text(tag)
# elif tag.tag == f'{{{self.np["hp"]}}}tbl':
# self.table(tag)
# elif tag.tag == f'{{{self.np["hp"]}}}pic':
# self.image(tag,zip_file_path)
def add(self,list): def add(self,list):
result = [] result = []
current_text = [] current_text = []
...@@ -247,12 +184,6 @@ class print_xml : ...@@ -247,12 +184,6 @@ class print_xml :
if not item["content"] or item["content"] == [[]]: if not item["content"] or item["content"] == [[]]:
continue # 빈 content일 경우 추가하지 않고 건너뜀 continue # 빈 content일 경우 추가하지 않고 건너뜀
current_text.append(item["content"]) current_text.append(item["content"])
#elif item["type"] == "table":
# for i in item["content"]:
# print(i)
#for j in range(len(i)):
# if i[j]
#exit()
elif item["type"] != "text": elif item["type"] != "text":
if current_text: if current_text:
result.append({"type": "text", "content": current_text}) result.append({"type": "text", "content": current_text})
...@@ -263,7 +194,7 @@ class print_xml : ...@@ -263,7 +194,7 @@ class print_xml :
result.append({"type": "text", "content": current_text}) result.append({"type": "text", "content": current_text})
return result return result
''' '''
def drawText (self,p): def drawText (self,p):
text = "" text = ""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment