xjc 3 years ago
parent
commit
de562078b0
4 changed files with 34 additions and 15 deletions
  1. 21 2
      parsedocx.py
  2. BIN
      parsedocx.pyc
  3. BIN
      twainscan.pyc
  4. 13 13
      xscanserver.py

+ 21 - 2
parsedocx.py

@@ -7,6 +7,7 @@ import pythoncom
 from upload_to_oss import TedOSS
 import threading 
 import shutil
+from win32com.client.dynamic import Dispatch, ERRORS_BAD_CONTEXT
 
 class DocxConverter(object):
     """
@@ -27,7 +28,7 @@ class DocxConverter(object):
                 imgfile = os.path.join(imgdir,file)
                 ossfile = uuidhtml+".files/"+file
                 self.oss.upload_from_local(imgfile,ossfile)
-            shutil.rmtree(imgdir)
+            #shutil.rmtree(imgdir)
         pythoncom.CoUninitialize()
         
     def docx2html(self):
@@ -38,8 +39,26 @@ class DocxConverter(object):
         self.word.Visible = 0
         self.doc = self.word.Documents.Open(self.docpath)
         self.uuidhtml = str(uuid.uuid4())
+        #html = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".html")
+        pdffile = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".pdf")
+        self.doc.SaveAs(pdffile,17)
         html = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".html")
-        self.doc.SaveAs(html,10)
+        
+        from win32com.client.dynamic import Dispatch, ERRORS_BAD_CONTEXT
+        
+        AvDoc = Dispatch("AcroExch.AVDoc")
+        try:
+            if AvDoc.Open(pdffile,""):
+                pdDoc = AvDoc.GetPDDoc()
+                jsObject = pdDoc.GetJSObject()
+                jsObject.SaveAs(html,"com.adobe.acrobat.html")
+        except Exception as e:
+            import traceback
+            traceback.print_exc()
+            AvDoc.close(True)
+        finally:
+            AvDoc.Close(True)
+            
         self.doc.Close()
         self.word.Quit()
         os.remove(self.docpath)

BIN
parsedocx.pyc


BIN
twainscan.pyc


+ 13 - 13
xscanserver.py

@@ -43,7 +43,7 @@ def parsedocx():
         html = docxconv.docx2html()
         parser = QuestionsParser(html)
         questions = parser.get_questions()
-        os.remove(html)
+        #os.remove(html)
         res = make_response(jsonify(questions))
         return res
         res = json.dumps(questions)
@@ -148,15 +148,15 @@ class XscanService(win32serviceutil.ServiceFramework):
         
  
 if __name__=='__main__': 
-    if len(sys.argv) == 1:
-        try:
-            evtsrc_dll = os.path.abspath(servicemanager.__file__)
-            servicemanager.PrepareToHostSingle(XscanService)
-            servicemanager.Initialize('XscanService', evtsrc_dll)
-            servicemanager.StartServiceCtrlDispatcher()
-        except win32service.error, details:
-            if details[0] == winerror.ERROR_FAILED_SERVICE_CONTROLLER_CONNECT:
-                win32serviceutil.usage()
-    else:
-        win32serviceutil.HandleCommandLine(XscanService)
-    #main()
+    #if len(sys.argv) == 1:
+    #    try:
+    #        evtsrc_dll = os.path.abspath(servicemanager.__file__)
+    #        servicemanager.PrepareToHostSingle(XscanService)
+    #        servicemanager.Initialize('XscanService', evtsrc_dll)
+    #        servicemanager.StartServiceCtrlDispatcher()
+    #    except win32service.error, details:
+    #        if details[0] == winerror.ERROR_FAILED_SERVICE_CONTROLLER_CONNECT:
+    #            win32serviceutil.usage()
+    #else:
+    #    win32serviceutil.HandleCommandLine(XscanService)
+    main()