241106

c9143d32 · leeyena · cf911f9e · c9143d32
Commit c9143d32 authored Nov 06, 2024 by leeyena
Show whitespace changes
Inline Side-by-side

Showing with 13 additions and 82 deletions

test05.py hwpx/test05.py +13 -82

No files found.
--- a/hwpx/test05.py
+++ b/hwpx/test05.py
@@ -38,14 +38,9 @@ class print_xml :
            data_file = open(xml_file_path,'r',encoding='utf-8')
            soup = BeautifulSoup(data_file,"xml")
            self.parsing(soup,extract_path)
-        
-        #soup_xml = soup.prettify()
-
-        #with open(xml_file_path,'rb') as f:
-        #   xml_content = f.read()
-        
-        #root = etree.XML(xml_content)
-        #pretty_xml_string = etree.tostring(root,pretty_print=True).decode('utf-8')
+        '''
+        xml 원본 파일 text 로 확인하기
+        '''
        '''
            with open(f"fine{i}.txt", "w",encoding='utf-8') as file:
                file.write(soup.prettify())
@@ -53,20 +48,6 @@ class print_xml :
        '''
        return True
               
-        
-        '''
-        xml_dict = xmltodict.parse(xml_content)
-        json_data = json.dumps(xml_dict, indent=4)
-        # JSON 데이터를 파일로 저장
-        with open('output.json', 'w') as json_file:
-            json.dump(xml_dict, json_file, indent=4)
-
-        print("JSON 파일로 저장 완료: output.json")
-        '''
-        #print(pretty_xml_string)
-        #exit()
-        #return self.parsing(soup,extract_path) 
-               
    def table(self,tbl,zip_file_path) :
        
        max = 0
@@ -110,57 +91,32 @@ class print_xml :
                                continue
                        elif t.text is None:
                            continue 
-                        #text += t.text 
        
                        if run.find("hp:tbl") :
                            data = self.table(run,zip_file_path)   
                            make_table[int(row)][int(col)].append(data)
     
-                       
-                        #print(text)
                        span = tc.find('hp:cellSpan')
                        colSpan = span.get('colSpan')
                        rowSpan = span.get('rowSpan')
-                            #make_table = make_table.tolist()
+                     
                        if rowSpan != "1" :
                            for i in range(0,int(rowSpan)):
                                make_table[int(row)+i][int(col)].append(t.text)
-                        
                        else :
                            make_table[int(row)][int(col)].append(t.text) 
        
-                            ''' cellSpan을 구해서 같은 머라해야하노.. 세부분야?로 나눠져 있는 것을 넣어준다'''
-                        
-                            '''if colSpan != "1":
-                            for i in range(0,int(colSpan)):
-                                make_table[int(row)][int(col)+i] = text 
+                            ''' 
+                            cellSpan을 구해서 셀 병합 처리
                            '''
  
-        #make_table = make_table.tolist()
        data = {
            "type": "table",
            "content": make_table,
        }
        tbl.decompose() 
-        #self.obj_list.append(data)
        return data
    
-        '''
-        for i in make_table[0]:
-            if i is None :
-                print(i)
-        '''
-        '''
-        data = {
-            "type": "table",
-            "content": make_table,
-        }
-        '''
-        '''
-        self.obj_list.append(data)
-         
-        return make_table 
-        '''

    def text(self,p) :
        text_list = []
@@ -176,24 +132,15 @@ class print_xml :
            else:
                continue
            text += ''.join(t.stripped_strings)
+            
        if text.strip() :
            text_list.append(text)
            
-        #print(text)
-        #text_list.append(text)
-     
-            
-        
-        #for i in p.find_all("hp:t"):
-       
-          
-        #text_array = np.array(text)
-        #text_array = text_array.tolist()
        data = {
            "type": "text",
            "content": text_list
        } 
-        #print(data)
+
        self.obj_list.append(data)


@@ -222,22 +169,12 @@ class print_xml :
                data = self.image(tag_p,zip_file_path)
                self.obj_list.append(data) 
                
-            
            #elif tag_p.find_all('hp:drawText'):
            #    self.drawText(tag_p)
            
            elif tag_p.find_all('hp:t'):
                if not tag_p.find_parent('hp:tbl'):
                    self.text(tag_p)    
-        #exit()
-            # for run in tag_p.findall('./hp:run',self.np):
-            #     for tag in run : 
-            #         if tag.tag == f'{{{self.np["hp"]}}}t':
-            #             self.text(tag)
-            #         elif tag.tag == f'{{{self.np["hp"]}}}tbl':
-            #             self.table(tag) 
-            #         elif tag.tag == f'{{{self.np["hp"]}}}pic': 
-            #             self.image(tag,zip_file_path)
                    
    def add(self,list):
        result = []
@@ -247,12 +184,6 @@ class print_xml :
                if not item["content"] or item["content"] == [[]]:
                    continue  # 빈 content일 경우 추가하지 않고 건너뜀
                current_text.append(item["content"])
-            #elif item["type"] == "table":
-            #    for i in item["content"]:
-            #        print(i)
-                    #for j in range(len(i)):
-                    #    if i[j]
-                #exit()    
            elif item["type"] != "text":
                if current_text:
                    result.append({"type": "text", "content": current_text})