From: NeilBrown Date: Sun, 2 Jul 2023 05:14:58 +0000 (+1000) Subject: Add lib-unicode-names and use it for emacs :C-q X-Git-Url: http://git.neil.brown.name/?a=commitdiff_plain;h=a281ac36f02cc792da69db9ee49218a22475314f;p=edlib.git Add lib-unicode-names and use it for emacs :C-q :C-q can now be given a unicode name from NamesList.txt and it will insert the first match - which might not be the one you want. Signed-off-by: NeilBrown --- diff --git a/DOC/TODO.md b/DOC/TODO.md index dc8c1858..ec589cbb 100644 --- a/DOC/TODO.md +++ b/DOC/TODO.md @@ -25,7 +25,7 @@ the file. lines containing current content. - [X] emacs: Num-C-l doesn't work if it would require part of a wrapped line off top of screen -- [ ] emacs: :C-q to recognise names of Unicode chars: e.g. WASTEBASKET +- [X] emacs: :C-q to recognise names of Unicode chars: e.g. WASTEBASKET Possibly matches a list which continued :C-q cycles through - [ ] linecount 'view' mode improvements - [ ] allocate pane->data together with pane. A single allocation so @@ -279,8 +279,12 @@ Module features - [X] Num-C-l doesn't work if it would require part of a wrapped line off top of screen -- [ ] :C-q to recognise names of Unicode chars: e.g. WASTEBASKET +- [X] :C-q to recognise names of Unicode chars: e.g. WASTEBASKET Possibly matches a list which continued :C-q cycles through +- [ ] :CX-= to look-up unicode name of character if not ASCII +- [ ] ":C-q ?" or "- :C-q" to open a find dialog with list of unicode + chars. +- [ ] :C-q when finding unicode chars, allow repeated :C-q to find more - [ ] split some generic functionality like arrows and mouse clicks into a separate module to be shared with other edit modes. - [ ] sort the command names for command-completion? diff --git a/Makefile b/Makefile index 60258e01..f16bbed4 100644 --- a/Makefile +++ b/Makefile @@ -86,6 +86,7 @@ SHOBJ = O/doc-text.o O/doc-dir.o O/doc-docs.o \ O/lib-renderline.o O/lib-x11selection-gtk.o O/lib-autosave.o \ O/lib-x11selection-xcb.o O/display-x11-xcb.o \ O/lib-linefilter.o O/lib-wiggle.o O/lib-aspell.o O/lib-calc.o \ + O/lib-unicode-names.o \ O/lang-python.o \ O/mode-emacs.o O/emacs-search.o \ O/display-ncurses.o @@ -238,7 +239,7 @@ test-rexel: rexel ./rexel -T NamesList.txt: - wget http://www.unicode.org/Public/UCD/latest/ucd/NamesList.txt + wget https://unicode.org/Public/UNIDATA/NamesList.txt checksym: edlib @nm edlib | awk '$$2 == "T" {print $$3}' | while read a; do grep $$a *.h > /dev/null || echo $$a; done | grep -vE '^(_.*|main)$$' ||: diff --git a/lib-unicode-names.c b/lib-unicode-names.c new file mode 100644 index 00000000..8c4f94e4 --- /dev/null +++ b/lib-unicode-names.c @@ -0,0 +1,130 @@ +/* + * Copyright Neil Brown ©2023 + * May be distributed under terms of GPLv2 - see file:COPYING + * + * Parse the Unicode NamesList.txt file to find names for + * unicode characters. + */ + +#include +#include +#include +#include "core.h" + +struct unicode_data { + struct command c; + char *names; + int len; +}; + +static void report_names(struct unicode_data *ud safe, const char *name safe, + int which, + struct pane *p safe, struct command *c safe) +{ + /* name must be start of a word, as either primary or secondary + * name. Ignore case. + * If "which" is zero, return them all, else only return the + * nth one where which==n + */ + char *ptn = strconcat(p, "?i:^([0-9A-F]{4,5} | = ).*\\b", name); + int i; + + if (!ud->names) + return; + + for (i = 0; i < ud->len; ) { + int ch, s; + char *cp, *n, *eol; + + s = call("text-search", p, 0, NULL, ptn, + ud->len - i, NULL, ud->names + i); + if (s <= 0) + break; + i += s-1; + /* i is now the start of the match */ + cp = ud->names + i; + eol = strchr(cp, '\n'); + if (!eol) + break; + i = (eol - ud->names) + 1; + if (eol[-1] == '\r') + eol -= 1; + if (*cp == '\t') { + /* secondary name "\t= "*/ + n = strndup(cp+3, eol-cp-3); + /* find number */ + while (cp > ud->names && + (cp[-1] != '\n' || cp[0] == '\t')) + cp -= 1; + } else { + /* primary name "XXXXX?\t" */ + if (cp[4] == '\t') + n = strndup(cp+5, eol-cp-5); + else + n = strndup(cp+6, eol-cp-6); + } + ch = strtoul(cp, &eol, 16); + if (eol == cp+4 || eol == cp+5) { + if (which == 0) + comm_call(c, "cb", p, ch, NULL, n); + else { + which -= 1; + if (which == 0) { + comm_call(c, "cb", p, ch, NULL, n); + i = ud->len; + } + } + } + free(n); + } +} + +static void unicode_free(struct command *c safe) +{ + struct unicode_data *ud = container_of(c, struct unicode_data, c); + + if (ud->names) + munmap(ud->names, ud->len); +} + +DEF_CMD(unicode_names) +{ + struct unicode_data *ud; + if (ci->comm == &unicode_names) { + /* This is the first call - need to allocate storage, + * load the NamesList file, and register a new command. + */ + char *p; + int fd; + + alloc(ud, pane); + ud->c = unicode_names; + ud->c.free = unicode_free; + call_comm("global-set-command", ci->home, &ud->c, 0, NULL, + "Unicode-names"); + p = call_ret(str, "xdg-find-edlib-file", ci->focus, 0, NULL, + "NamesList.txt", 0, NULL, "data"); + if (!p) + return Efail; + fd = open(p, O_RDONLY); + free(p); + if (fd < 0) + return Efail; + ud->len = lseek(fd, 0, 2); + ud->names = mmap(NULL, ud->len, PROT_READ, MAP_SHARED, fd, 0); + close(fd); + } else { + ud = container_of(ci->comm, struct unicode_data, c); + } + if (!ud->names) + return Efail; + if (ci->str && ci->comm2) + report_names(ud, ci->str, ci->num, ci->focus, ci->comm2); + return 1; +} + +void edlib_init(struct pane *ed safe) +{ + call_comm("global-set-command", ed, &unicode_names, + 0, NULL, "Unicode-names"); +} diff --git a/mode-emacs.c b/mode-emacs.c index db146a50..af23ff05 100644 --- a/mode-emacs.c +++ b/mode-emacs.c @@ -3109,10 +3109,30 @@ DEF_CMD(emacs_quote) (str = call_ret(strsave, "doc:get-str", ci->focus, 0, NULL, NULL, 0, mk)) != NULL) { int x; + char *ep; if (*str == '#') str ++; - if (sscanf(str, "%x", &x) == 1) + x = strtoul(str, &ep, 16); + if (ep && *ep == 0) { wch = x; + call("Message", ci->focus, 0, NULL, + strconcat(ci->focus, "Hex code 0x", str)); + } else { + struct call_return cr; + cr = call_ret(all, "Unicode-names", ci->focus, + 1, NULL, str); + if (cr.s && cr.i) { + wch = cr.i; + call("Message", ci->focus, 0, NULL, + strconcat(ci->focus, + "Unicode char <", cr.s, ">")); + } else { + call("Message", ci->focus, 0, NULL, + strconcat(ci->focus, + "Cannot find character <", str, ">")); + return Efail; + } + } } if (wch == WEOF) { call("Mode:set-all", ci->focus, ci->num, NULL, ":CQ", ci->num2); diff --git a/modules.ini b/modules.ini index c188cb41..bf41b517 100644 --- a/modules.ini +++ b/modules.ini @@ -151,3 +151,5 @@ lib-view = attach-view lib-keymap = attach-global-keymap lib-glibevents = attach-glibevents lib-shellcmd = attach-shellcmd + +lib-unicode-names = Unicode-names