already marked if needed. This was fixed by making conversion
async so:
- [ ] async email part converts need to do their own URL marking.
- PDF particularly, and html2md. Maybe others.
+ PDF particularly, and html2md. Maybe others. - LATER pdf and doc
+ do it now by copying the code.
- [X] notmuch: "reply" should clear unread/new flags.
- [X] transparent images appear in email with horiz lines
- [X] Replying to w3m/html mail results in unsightly markup in reply
self.doc.call("doc:set-ref", m2)
self.doc.call("doc:replace", 1, out.decode("utf-8", 'ignore'),
m, m2)
+ self.mark_urls(self.doc)
self.close()
return edlib.Efalse
+ def mark_urls(self, doc):
+ ms = edlib.Mark(doc)
+ me = ms.dup()
+ doc.call("doc:set-ref", me)
+ while ms < me:
+ try:
+ len = doc.call("text-search",
+ "(http|https|ftp|mail):[^][\\s\";<>]+", ms, me)
+ len -= 1
+ except:
+ return
+ # People sometimes put a period or ')' at the end of a URL.
+ while doc.prior(ms) in '.)':
+ doc.prev(ms)
+ len -= 1
+ m1 = ms.dup()
+ i = 0
+ while i < len:
+ doc.prev(m1)
+ i += 1
+ url = doc.call("doc:get-str", m1, ms, ret='str')
+ tag = doc['next-url-tag']
+ if not tag:
+ tag = "1"
+ doc.call("doc:set-attr", 1, m1, "render:url", "%d:%s"%(len,tag))
+ doc['next-url-tag'] = "%d" % (int(tag) + 1)
+ doc["url:" + tag] = url
+
def doc_to_text(key, home, focus, num, str1, comm2, **a):
if not str1 or '.' not in str1:
self.pipe = None
if err:
edlib.LOG("pdf-to-text", err.decode('utf-8','ignore'))
+ else:
+ self.mark_urls(self.doc)
self.close()
return edlib.Efalse
+ def mark_urls(self, doc):
+ ms = edlib.Mark(doc)
+ me = ms.dup()
+ doc.call("doc:set-ref", me)
+ while ms < me:
+ try:
+ len = doc.call("text-search",
+ "(http|https|ftp|mail):[^][\\s\";<>]+", ms, me)
+ len -= 1
+ except:
+ return
+ # People sometimes put a period or ')' at the end of a URL.
+ while doc.prior(ms) in '.)':
+ doc.prev(ms)
+ len -= 1
+ m1 = ms.dup()
+ i = 0
+ while i < len:
+ doc.prev(m1)
+ i += 1
+ url = doc.call("doc:get-str", m1, ms, ret='str')
+ tag = doc['next-url-tag']
+ if not tag:
+ tag = "1"
+ doc.call("doc:set-attr", 1, m1, "render:url", "%d:%s"%(len,tag))
+ doc['next-url-tag'] = "%d" % (int(tag) + 1)
+ doc["url:" + tag] = url
+
def pdf_to_text(key, home, focus, num, comm2, **a):
pdf = focus.call("doc:get-bytes", ret='bytes')