From: NeilBrown Date: Fri, 26 May 2023 09:57:24 +0000 (+1000) Subject: pdf-to-text convert to delayed/async conversion X-Git-Url: http://git.neil.brown.name/?a=commitdiff_plain;h=5749fd39d5e1ec0291449f5abbfe0f8b9a95fdf0;p=edlib.git pdf-to-text convert to delayed/async conversion pdf-to-text now happens asynchronously on demand Signed-off-by: NeilBrown --- diff --git a/DOC/TODO.md b/DOC/TODO.md index fb27b4ce..855e5a7e 100644 --- a/DOC/TODO.md +++ b/DOC/TODO.md @@ -56,10 +56,12 @@ Current priorities Bugs to be fixed ---------------- +- [ ] notmuch: don't mark up links in text created by w3m - they are + already marked if needed. - [X] notmuch: "reply" should clear unread/new flags. - [X] transparent images appear in email with horiz lines - [ ] Replying to w3m/html mail results in unsightly markup in reply -- [ ] converting email parts with external code should be async +- [X] converting email parts with external code should be async - [X] fill:end-re should default to fill:start-re - [X] md-mode should set the fill re-s don't rely on config - there isn't an md-mode yet! diff --git a/python/lib-pdf-to-text.py b/python/lib-pdf-to-text.py index 0c1bfa29..9531d319 100644 --- a/python/lib-pdf-to-text.py +++ b/python/lib-pdf-to-text.py @@ -8,25 +8,93 @@ import subprocess -def pdf_to_text(key, home, focus, comm2, **a): +class pdf_pane(edlib.Pane): + def __init__(self, focus, content, delayed): + edlib.Pane.__init__(self, focus) + self.doc = focus + self.pipe = None + self.add_notify(focus, "Close") + self.content = content + self.have_converting = True + focus.call("doc:replace", 1, "(Converting content to text...)\n") + if delayed: + self.call("doc:request:convert-now") + else: + self.handle_visible("key", focus) + + def handle_visible(self, key, focus, **a): + "handle:convert-now" + + p = subprocess.Popen(["/usr/bin/pdftotext", "-layout", "-", "-"], close_fds=True, + stdout=subprocess.PIPE, stderr = subprocess.PIPE, + stdin =subprocess.PIPE) + + self.pipe = p + # FIXME this could block if pipe fills + os.write(p.stdin.fileno(), self.content) + p.stdin.close() + p.stdin = None + fd = p.stdout.fileno() + fcntl.fcntl(fd, fcntl.F_SETFL, + fcntl.fcntl(fd, fcntl.F_GETFL) | os.O_NONBLOCK) + self.call("event:read", fd, self.read) + + def handle_close(self, key, **a): + "handle:Close" + + if self.pipe: + self.pipe.kill() + self.pipe.communicate() + return 1 + + def handle_doc_close(self, key, focus, **a): + "handle:Notify:Close" + if focus == self.doc: + self.doc = None + if self.pipe: + self.pipe.kill() + return 1 + + def read(self, key, **a): + if not self.pipe: + return edlib.Efalse + try: + r = os.read(self.pipe.stdout.fileno(), 65536) + except IOError: + return 1 + + if not self.doc: + return edlib.Efalse + + if r: + if self.have_converting: + m = edlib.Mark(self.doc) + m2 = m.dup() + m.step(1) + self.have_converting = False + else: + m = edlib.Mark(self.doc) + m2 = m + self.doc.call("doc:set-ref", m2) + self.doc.call("doc:replace", 1, r.decode('utf-8','ignore'), + m, m2) + return 1 + # EOF + if not self.pipe: + return edlib.Efalse + out, err = self.pipe.communicate() + self.pipe = None + if err: + edlib.LOG("pdf-to-text", err.decode('utf-8','ignore')) + + self.close() + return edlib.Efalse + +def pdf_to_text(key, home, focus, num, comm2, **a): pdf = focus.call("doc:get-bytes", ret='bytes') - p = subprocess.Popen(["/usr/bin/pdftotext", "-layout", "-", "-"], close_fds=True, - stdout=subprocess.PIPE, stderr = subprocess.PIPE, - stdin =subprocess.PIPE) - out,err = p.communicate(pdf) - if err: - edlib.LOG("pdftotext:", err.decode("utf-8", 'ignore')) - - if out: - doc = focus.call("doc:from-text", "pdf-document", - out.decode("utf-8", 'ignore'), - ret='pane') - else: - doc = focus.call("doc:from-text", "pdf-document", - "PDF conversion failed\n" + - err.decode('utf-8','ignore'), - ret='pane') + doc = focus.call("doc:from-text", "pdf-document", "", ret='pane') + pdf_pane(doc, pdf, num) comm2("cb", doc) return 1