]> git.neil.brown.name Git - edlib.git/commitdiff
Add lib-unicode-names and use it for emacs :C-q
authorNeilBrown <neil@brown.name>
Sun, 2 Jul 2023 05:14:58 +0000 (15:14 +1000)
committerNeilBrown <neil@brown.name>
Wed, 12 Jul 2023 22:17:52 +0000 (08:17 +1000)
:C-q can now be given a unicode name from NamesList.txt and it will
insert the first match - which might not be the one you want.

Signed-off-by: NeilBrown <neil@brown.name>
DOC/TODO.md
Makefile
lib-unicode-names.c [new file with mode: 0644]
mode-emacs.c
modules.ini

index dc8c185834c5b402d95b71d478cab036a545c6e0..ec589cbb66c9b9239c18c9c63623d8429f4cf46f 100644 (file)
@@ -25,7 +25,7 @@ the file.
       lines containing current content.
 - [X] emacs: Num-C-l doesn't work if it would require part of a wrapped line
       off top of screen
-- [ ] emacs: :C-q to recognise names of Unicode chars: e.g. WASTEBASKET
+- [X] emacs: :C-q to recognise names of Unicode chars: e.g. WASTEBASKET
        Possibly matches a list which continued :C-q cycles through
 - [ ] linecount 'view' mode improvements
 - [ ] allocate pane->data together with pane.  A single allocation so
@@ -279,8 +279,12 @@ Module features
 
 - [X] Num-C-l doesn't work if it would require part of a wrapped line
       off top of screen
-- [ ] :C-q to recognise names of Unicode chars: e.g. WASTEBASKET
+- [X] :C-q to recognise names of Unicode chars: e.g. WASTEBASKET
        Possibly matches a list which continued :C-q cycles through
+- [ ] :CX-= to look-up unicode name of character if not ASCII
+- [ ] ":C-q ?" or "- :C-q" to open a find dialog with list of unicode
+      chars.
+- [ ] :C-q when finding unicode chars, allow repeated :C-q to find more
 - [ ] split some generic functionality like arrows and mouse clicks
       into a separate module to be shared with other edit modes.
 - [ ] sort the command names for command-completion?
index 60258e011113fa57d94e2abe22aa4313346ba46b..f16bbed476397d846b9a230056681841bac187cf 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -86,6 +86,7 @@ SHOBJ = O/doc-text.o O/doc-dir.o O/doc-docs.o \
        O/lib-renderline.o O/lib-x11selection-gtk.o O/lib-autosave.o \
        O/lib-x11selection-xcb.o O/display-x11-xcb.o \
        O/lib-linefilter.o O/lib-wiggle.o O/lib-aspell.o O/lib-calc.o \
+       O/lib-unicode-names.o \
        O/lang-python.o \
        O/mode-emacs.o O/emacs-search.o \
        O/display-ncurses.o
@@ -238,7 +239,7 @@ test-rexel: rexel
        ./rexel -T
 
 NamesList.txt:
-       wget http://www.unicode.org/Public/UCD/latest/ucd/NamesList.txt
+       wget https://unicode.org/Public/UNIDATA/NamesList.txt
 
 checksym: edlib
        @nm edlib  | awk '$$2 == "T" {print $$3}' | while read a; do grep $$a *.h > /dev/null || echo  $$a; done | grep -vE '^(_.*|main)$$' ||:
diff --git a/lib-unicode-names.c b/lib-unicode-names.c
new file mode 100644 (file)
index 0000000..8c4f94e
--- /dev/null
@@ -0,0 +1,130 @@
+/*
+ * Copyright Neil Brown ©2023 <neil@brown.name>
+ * May be distributed under terms of GPLv2 - see file:COPYING
+ *
+ * Parse the Unicode NamesList.txt file to find names for
+ * unicode characters.
+ */
+
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include "core.h"
+
+struct unicode_data {
+       struct command c;
+       char *names;
+       int len;
+};
+
+static void report_names(struct unicode_data *ud safe, const char *name safe,
+                        int which,
+                        struct pane *p safe, struct command *c safe)
+{
+       /* name must be start of a word, as either primary or secondary
+        * name.  Ignore case.
+        * If "which" is zero, return them all, else only return the
+        * nth one where which==n
+        */
+       char *ptn = strconcat(p, "?i:^([0-9A-F]{4,5}    |       = ).*\\b", name);
+       int i;
+
+       if (!ud->names)
+               return;
+
+       for (i = 0; i < ud->len; ) {
+               int ch, s;
+               char *cp, *n, *eol;
+
+               s = call("text-search", p, 0, NULL, ptn,
+                        ud->len - i, NULL, ud->names + i);
+               if (s <= 0)
+                       break;
+               i += s-1;
+               /* i is now the start of the match */
+               cp = ud->names + i;
+               eol = strchr(cp, '\n');
+               if (!eol)
+                       break;
+               i = (eol - ud->names) + 1;
+               if (eol[-1] == '\r')
+                       eol -= 1;
+               if (*cp == '\t') {
+                       /* secondary name "\t= "*/
+                       n = strndup(cp+3, eol-cp-3);
+                       /* find number */
+                       while (cp > ud->names &&
+                              (cp[-1] != '\n' || cp[0] == '\t'))
+                               cp -= 1;
+               } else {
+                       /* primary name "XXXXX?\t" */
+                       if (cp[4] == '\t')
+                               n = strndup(cp+5, eol-cp-5);
+                       else
+                               n = strndup(cp+6, eol-cp-6);
+               }
+               ch = strtoul(cp, &eol, 16);
+               if (eol == cp+4 || eol == cp+5) {
+                       if (which == 0)
+                               comm_call(c, "cb", p, ch, NULL, n);
+                       else {
+                               which -= 1;
+                               if (which == 0) {
+                                       comm_call(c, "cb", p, ch, NULL, n);
+                                       i = ud->len;
+                               }
+                       }
+               }
+               free(n);
+       }
+}
+
+static void unicode_free(struct command *c safe)
+{
+       struct unicode_data *ud = container_of(c, struct unicode_data, c);
+
+       if (ud->names)
+               munmap(ud->names, ud->len);
+}
+
+DEF_CMD(unicode_names)
+{
+       struct unicode_data *ud;
+       if (ci->comm == &unicode_names) {
+               /* This is the first call - need to allocate storage,
+                * load the NamesList file, and register a new command.
+                */
+               char *p;
+               int fd;
+
+               alloc(ud, pane);
+               ud->c = unicode_names;
+               ud->c.free = unicode_free;
+               call_comm("global-set-command", ci->home, &ud->c, 0, NULL,
+                         "Unicode-names");
+               p = call_ret(str, "xdg-find-edlib-file", ci->focus, 0, NULL,
+                            "NamesList.txt", 0, NULL, "data");
+               if (!p)
+                       return Efail;
+               fd = open(p, O_RDONLY);
+               free(p);
+               if (fd < 0)
+                       return Efail;
+               ud->len = lseek(fd, 0, 2);
+               ud->names = mmap(NULL, ud->len, PROT_READ, MAP_SHARED, fd, 0);
+               close(fd);
+       } else {
+               ud = container_of(ci->comm, struct unicode_data, c);
+       }
+       if (!ud->names)
+               return Efail;
+       if (ci->str && ci->comm2)
+               report_names(ud, ci->str, ci->num, ci->focus, ci->comm2);
+       return 1;
+}
+
+void edlib_init(struct pane *ed safe)
+{
+       call_comm("global-set-command", ed, &unicode_names,
+                 0, NULL, "Unicode-names");
+}
index db146a5073d013a8b1ebe30367571bb9ef13e70f..af23ff05678fad39a082b22754858474f790dba2 100644 (file)
@@ -3109,10 +3109,30 @@ DEF_CMD(emacs_quote)
                 (str = call_ret(strsave, "doc:get-str", ci->focus,
                                 0, NULL, NULL, 0, mk)) != NULL) {
                int x;
+               char *ep;
                if (*str == '#')
                        str ++;
-               if (sscanf(str, "%x", &x) == 1)
+               x = strtoul(str, &ep, 16);
+               if (ep && *ep == 0) {
                        wch = x;
+                       call("Message", ci->focus, 0, NULL,
+                            strconcat(ci->focus, "Hex code 0x", str));
+               } else {
+                       struct call_return cr;
+                       cr = call_ret(all, "Unicode-names", ci->focus,
+                                     1, NULL, str);
+                       if (cr.s && cr.i) {
+                               wch = cr.i;
+                               call("Message", ci->focus, 0, NULL,
+                                    strconcat(ci->focus,
+                                              "Unicode char <", cr.s, ">"));
+                       } else {
+                               call("Message", ci->focus, 0, NULL,
+                                    strconcat(ci->focus,
+                                              "Cannot find character <", str, ">"));
+                               return Efail;
+                       }
+               }
        }
        if (wch == WEOF) {
                call("Mode:set-all", ci->focus, ci->num, NULL, ":CQ", ci->num2);
index c188cb41cfb0263aa10821e4b37a6b910821f8d0..bf41b5174790b485b218165d334b1b0c5db4b936 100644 (file)
@@ -151,3 +151,5 @@ lib-view = attach-view
 lib-keymap = attach-global-keymap
 lib-glibevents = attach-glibevents
 lib-shellcmd = attach-shellcmd
+
+lib-unicode-names = Unicode-names