Fix up entry of multi-byte UTF-8

author NeilBrown <neil@brown.name>

Sun, 29 Nov 2015 21:50:01 +0000 (08:50 +1100)

committer NeilBrown <neil@brown.name>

Tue, 1 Dec 2015 05:23:49 +0000 (16:23 +1100)
author NeilBrown <neil@brown.name>
Sun, 29 Nov 2015 21:50:01 +0000 (08:50 +1100)
committer NeilBrown <neil@brown.name>
Tue, 1 Dec 2015 05:23:49 +0000 (16:23 +1100)
diff --git a/display-ncurses.c b/display-ncurses.c

index d4165e8ce8cb01fcf09eb26adb3bcb80008e5813..ade0a9e899e96f1ec71618c5054d2ea6a64736ba 100644 (file)
--- a/display-ncurses.c
+++ b/display-ncurses.c
@@ -345,7 +345,7 @@ static void send_key(int keytype, wint_t c, struct pane *p)
                 else if (c < ' ')
                         sprintf(k, "C-Chr-%c", c+64);
                 else
-                       sprintf(k, "Chr-%c", c);
+                       sprintf(k, "Chr-%lc", c);
         }
  
         ci.key = buf;
diff --git a/doc-text.c b/doc-text.c

index 59ab212a01f82c8b8f0605a601934cc5d46226c5..7557d1da9be700469aefb411fcced3760ac00af9 100644 (file)
--- a/doc-text.c
+++ b/doc-text.c
@@ -147,23 +147,24 @@ static struct map *text_map;
   * Unicode points.  This particularly affects adding new strings to
   * allocations.
   * There is no guarantee that a byte string is UTF-8 though, so
- * We only adjust the length if we can find an end-of-code-point in
+ * We only adjust the length if we can find a start-of-code-point in
   * the last 4 bytes. (longest UTF-8 encoding of 21bit unicode is 4 bytes).
+ * A start of codepoint starts with 0b0 or 0b11, not 0b10.
   */
  static int text_round_len(char *text, int len)
  {
         /* The string at 'text' is *longer* than 'len', or
          * at least text[len] is defined - it can be nul.  If
-        * len doesn't mark the end of a UTF-8 codepoint,
-        * and there is an end marker in the previous 4 bytes,
+        * [len] isn't the start of a new codepoint, and there
+        * is a start marker in the previous 4 bytes,
          * move back to there.
          */
         int i = 0;
-       while (i+1 < len && i <=4)
-               if ((text[len-i] & 0xC0) == 0x80 &&
-                   (text[len-i-1] & 0x80) == 0x80)
-               /* next byte is inside a UTF-8 code point, so this isn't a good
-                * spot to end. Try further back */
+       while (i <= len && i <=4)
+               if ((text[len-i] & 0xC0) == 0x80)
+                       /* next byte is inside a UTF-8 code point, so
+                        * this isn't a good spot to end. Try further
+                        * back */
                         i += 1;
                 else
                         return len-i;
diff --git a/mode-emacs.c b/mode-emacs.c

index 07051379e2cb22285b263c419c2b4d213af0a87b..a1b7e6e0f41af7d0b55a613d2cead2689d56b66b 100644 (file)
--- a/mode-emacs.c
+++ b/mode-emacs.c
@@ -622,6 +622,7 @@ static void emacs_init(void)
         }
  
         key_add_range(m, "Chr- ", "Chr-~", &emacs_insert);
+       key_add_range(m, "Chr-\200", "Chr-\377\377\377\377", &emacs_insert);
         key_add(m, "Tab", &emacs_insert_other);
         key_add(m, "LF", &emacs_insert_other);
         key_add(m, "Return", &emacs_insert_other);
author	NeilBrown <neil@brown.name>
	Sun, 29 Nov 2015 21:50:01 +0000 (08:50 +1100)
committer	NeilBrown <neil@brown.name>
	Tue, 1 Dec 2015 05:23:49 +0000 (16:23 +1100)
display-ncurses.c		patch \| blob \| history
doc-text.c		patch \| blob \| history
mode-emacs.c		patch \| blob \| history