xiaojincai
/
docxconvert


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
							#-*-coding:utf-8 -*-
import re,os
import json,uuid
from bs4 import BeautifulSoup
from win32com.client import Dispatch
import pythoncom
from upload_to_oss import TedOSS
import threading 
import shutil
#from win32com.client.dynamic import Dispatch, ERRORS_BAD_CONTEXT

class DocxConverter(object):
    """
    """
    def __init__(self,docpath="test4.docx"):
        """
        """
        self.docpath = docpath
        self.oss = TedOSS()
        
    def upload_imgfiles(self,uuidhtml):
        """
        """
        imgroot = os.path.join(os.path.dirname(self.docpath))
        imgdir = os.path.join(imgroot,uuidhtml+".files")
        if os.path.exists(imgdir):
            for file in os.listdir(imgdir):
                imgfile = os.path.join(imgdir,file)
                ossfile = uuidhtml+".files/"+file
                self.oss.upload_from_local(imgfile,ossfile)
            #shutil.rmtree(imgdir)
        pythoncom.CoUninitialize()
        
    def docx2pdf(self):
        """
        """
        pythoncom.CoInitialize()
        self.word = Dispatch("Word.Application")
        self.word.Visible = 0
        print(self.docpath,33333333333)
        self.doc = self.word.Documents.Open(self.docpath)
        self.uuidhtml = str(uuid.uuid4())
        #html = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".html")
        #html = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".html")
        #self.doc.SaveAs(html,10)
        
        pdffile = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".pdf")
        print(pdffile,1111111111111)
        self.doc.SaveAs(pdffile,17)
        
        #AvDoc = Dispatch("AcroExch.AVDoc")
        #try:
        #    if AvDoc.Open(pdffile,""):
        #        pdDoc = AvDoc.GetPDDoc()
        #        jsObject = pdDoc.GetJSObject()
        #        jsObject.SaveAs(html,"com.adobe.acrobat.html")
        #except Exception as e:
        #    import traceback
        #    traceback.print_exc()
        #    AvDoc.close(True)
        #finally:
        #    AvDoc.Close(True)
            
        self.doc.Close()
        self.word.Quit()
        os.remove(self.docpath)
        
        return pdffile

if __name__ == "__main__":
    docpath = "E:\\WorkSpace\\docxconvert\\lasttest.docx"
    docxconverter = DocxConverter(docpath)
    docxconverter.docx2html()