]> git.neil.brown.name Git - edlib.git/commitdiff
search: various fixes for searching a string in a string
authorNeilBrown <neil@brown.name>
Sun, 2 Jul 2023 05:11:28 +0000 (15:11 +1000)
committerNeilBrown <neil@brown.name>
Wed, 12 Jul 2023 22:17:49 +0000 (08:17 +1000)
I guess I hadn't tested this, but searching for a string in a string
didn't work for various reasons.

Also allow max length to search to be given.

Signed-off-by: NeilBrown <neil@brown.name>
lib-search.c
rexel.h

index da7fa8620072644165d9756eac0daa4145a4f5b4..5c96a8291c1e9c6333bb7920a780ad848bd5d00e 100644 (file)
@@ -393,13 +393,13 @@ DEF_CMD(text_search)
                        rxl, strcmp(ci->key, "text-match") == 0 ? RXLF_ANCHORED : 0);
                int flags = RXL_SOL|RXL_SOD;
                const char *t = ci->str2;
-               int thelen, start;
+               int thelen = -1, start = 0;
                enum rxl_found r;
                wint_t prev_ch = WEOF;
 
                do {
                        wint_t wc = get_utf8(&t, NULL);
-                       if (wc >= WERR) {
+                       if (wc >= WERR|| (ci->num2 > 0 && t > ci->str2 + ci->num2)) {
                                rxl_advance(st, RXL_EOL|RXL_EOD);
                                break;
                        }
@@ -415,10 +415,26 @@ DEF_CMD(text_search)
                                flags |= RXL_EOW;
                                break;
                        }
+                       if (is_eol(wc))
+                               flags |= RXL_EOL;
+                       if (prev_ch == WEOF || is_eol(prev_ch))
+                               flags |= RXL_SOL;
+                       prev_ch = wc;
                        r = rxl_advance(st, wc | flags);
                        flags = 0;
+                       if (r >= RXL_MATCH) {
+                               /* "start" is in chars, not bytes, so we cannot.
+                                * use it.  Need since_start and then count
+                                * back.
+                                */
+                               rxl_info(st, &thelen, NULL, NULL, &since_start);
+                               start = t - ci->str2;
+                               while (since_start > 0) {
+                                       start = utf8_round_len(ci->str2, start-1);
+                                       since_start -= 1;
+                               }
+                       }
                } while (r != RXL_DONE);
-               rxl_info(st, &thelen, NULL, &start, NULL);
                rxl_free_state(st);
                if (thelen < 0)
                        ret = Efalse;
diff --git a/rexel.h b/rexel.h
index a1d56f0f1ed335d9456f75d1a3dedb90f81491f8..6dc193de160ff98938fc1b28fa48b4e7d28088da 100644 (file)
--- a/rexel.h
+++ b/rexel.h
@@ -7,7 +7,7 @@ enum rxl_found {
        RXL_NOMATCH,    /* No match has been found at all */
        RXL_CONTINUE,   /* No match here, but it is worth looking further */
        RXL_DONE,       /* A match was previously reported, but no further match
-                        * can be found was we are anchored on that match.
+                        * can be found as we are anchored on that match.
                         */
        RXL_MATCH,      /* There was a match once the char was processed */
        RXL_MATCH_FLAG, /* A match was found due to flags, but not once char was