]> git.neil.brown.name Git - edlib.git/commitdiff
pdf-to-text convert to delayed/async conversion
authorNeilBrown <neil@brown.name>
Fri, 26 May 2023 09:57:24 +0000 (19:57 +1000)
committerNeilBrown <neil@brown.name>
Fri, 26 May 2023 22:18:43 +0000 (08:18 +1000)
pdf-to-text now happens asynchronously on demand

Signed-off-by: NeilBrown <neil@brown.name>
DOC/TODO.md
python/lib-pdf-to-text.py

index fb27b4ce371e6faef27e6d5ba51710ac764b6125..855e5a7e7d83e03373a4879773e01e5273238dc7 100644 (file)
@@ -56,10 +56,12 @@ Current priorities
 Bugs to be fixed
 ----------------
 
+- [ ] notmuch: don't mark up links in text created by w3m - they are
+      already marked if needed.
 - [X] notmuch: "reply" should clear unread/new flags.
 - [X] transparent images appear in email with horiz lines
 - [ ] Replying to w3m/html mail results in unsightly markup in reply
-- [ ] converting email parts with external code should be async
+- [X] converting email parts with external code should be async
 - [X] fill:end-re should default to fill:start-re
 - [X] md-mode should set the fill re-s don't rely on config - there
       isn't an md-mode yet!
index 0c1bfa2957e5c2ba9cf1bb60077fe62d1c51539f..9531d319df7bf7f73bed46f73de7f0eb07005e92 100644 (file)
@@ -8,25 +8,93 @@
 
 import subprocess
 
-def pdf_to_text(key, home, focus, comm2, **a):
+class pdf_pane(edlib.Pane):
+    def __init__(self, focus, content, delayed):
+        edlib.Pane.__init__(self, focus)
+        self.doc = focus
+        self.pipe = None
+        self.add_notify(focus, "Close")
+        self.content = content
+        self.have_converting = True
+        focus.call("doc:replace", 1, "(Converting content to text...)\n")
+        if delayed:
+            self.call("doc:request:convert-now")
+        else:
+            self.handle_visible("key", focus)
+
+    def handle_visible(self, key, focus, **a):
+        "handle:convert-now"
+
+        p = subprocess.Popen(["/usr/bin/pdftotext", "-layout", "-", "-"], close_fds=True,
+                             stdout=subprocess.PIPE, stderr = subprocess.PIPE,
+                             stdin =subprocess.PIPE)
+
+        self.pipe = p
+        # FIXME this could block if pipe fills
+        os.write(p.stdin.fileno(), self.content)
+        p.stdin.close()
+        p.stdin = None
+        fd = p.stdout.fileno()
+        fcntl.fcntl(fd, fcntl.F_SETFL,
+                    fcntl.fcntl(fd, fcntl.F_GETFL) | os.O_NONBLOCK)
+        self.call("event:read", fd, self.read)
+
+    def handle_close(self, key, **a):
+        "handle:Close"
+
+        if self.pipe:
+            self.pipe.kill()
+            self.pipe.communicate()
+        return 1
+
+    def handle_doc_close(self, key, focus, **a):
+        "handle:Notify:Close"
+        if focus == self.doc:
+            self.doc = None
+            if self.pipe:
+                self.pipe.kill()
+        return 1
+
+    def read(self, key, **a):
+        if not self.pipe:
+            return edlib.Efalse
+        try:
+            r = os.read(self.pipe.stdout.fileno(), 65536)
+        except IOError:
+            return 1
+
+        if not self.doc:
+            return edlib.Efalse
+
+        if r:
+            if self.have_converting:
+                m = edlib.Mark(self.doc)
+                m2 = m.dup()
+                m.step(1)
+                self.have_converting = False
+            else:
+                m = edlib.Mark(self.doc)
+                m2 = m
+            self.doc.call("doc:set-ref", m2)
+            self.doc.call("doc:replace", 1, r.decode('utf-8','ignore'),
+                          m, m2)
+            return 1
+        # EOF
+        if not self.pipe:
+            return edlib.Efalse
+        out, err = self.pipe.communicate()
+        self.pipe = None
+        if err:
+            edlib.LOG("pdf-to-text", err.decode('utf-8','ignore'))
+
+        self.close()
+        return edlib.Efalse
+
+def pdf_to_text(key, home, focus, num, comm2, **a):
     pdf = focus.call("doc:get-bytes", ret='bytes')
 
-    p = subprocess.Popen(["/usr/bin/pdftotext", "-layout", "-", "-"], close_fds=True,
-                         stdout=subprocess.PIPE, stderr = subprocess.PIPE,
-                         stdin =subprocess.PIPE)
-    out,err = p.communicate(pdf)
-    if err:
-        edlib.LOG("pdftotext:", err.decode("utf-8", 'ignore'))
-
-    if out:
-        doc = focus.call("doc:from-text", "pdf-document",
-                         out.decode("utf-8", 'ignore'),
-                         ret='pane')
-    else:
-        doc = focus.call("doc:from-text", "pdf-document",
-                         "PDF conversion failed\n" +
-                         err.decode('utf-8','ignore'),
-                         ret='pane')
+    doc = focus.call("doc:from-text", "pdf-document", "", ret='pane')
+    pdf_pane(doc, pdf, num)
     comm2("cb", doc)
     return 1