From: NeilBrown Date: Sat, 29 Aug 2020 08:15:52 +0000 (+1000) Subject: Introduce --non-space option X-Git-Tag: v1.3~14 X-Git-Url: http://git.neil.brown.name/?a=commitdiff_plain;h=b78945cad0c9901a2b7b2792343cca5bca575187;p=wiggle.git Introduce --non-space option This can significantly reduce the number of words by treating punctuation as part of the surrounding word, rather than as single-char words. Fewer words can mean much faster comparisons. Signed-off-by: NeilBrown --- diff --git a/ReadMe.c b/ReadMe.c index 05d8ec2..847e697 100644 --- a/ReadMe.c +++ b/ReadMe.c @@ -58,9 +58,10 @@ struct option long_options[] = { {"no-ignore", 0, 0, 'i'}, {"show-wiggles",0, 0, 'W'}, {"ignore-blanks",0,0, 'b'}, - {"no-backup", 0, 0, NO_BACKUP }, - {"self-test", 0, 0, SELF_TEST}, + {"no-backup", 0, 0, NO_BACKUP }, + {"self-test", 0, 0, SELF_TEST}, {"report-wiggles", 0, 0, REPORT_WIGGLES}, + {"non-space", 0, 0, NON_SPACE}, {0, 0, 0, 0} }; @@ -84,6 +85,7 @@ char Help[] = "\n" "\n" " --words -w : word-wise diff and merge.\n" " --lines -l : line-wise diff and merge.\n" +" --non-space : words are separated by spaces.\n" "\n" " --patch -p : treat last file as a patch file.\n" " -1 -2 -3 : select which component of patch or merge to use.\n" diff --git a/split.c b/split.c index 27f9b7e..a9d66b0 100644 --- a/split.c +++ b/split.c @@ -58,7 +58,7 @@ static int split_internal(char *start, char *end, int type, char *cp2; int prefix = 0; - if (type == (ByWord | IgnoreBlanks)) + if ((type & ByWord) && (type & IgnoreBlanks)) while (cp < end && (*cp == ' ' || *cp == '\t')) { prefix++; @@ -79,24 +79,28 @@ static int split_internal(char *start, char *end, int type, cp++; break; case ByWord: - if (isalnum(*cp) || *cp == '_') { + if (*cp == ' ' || *cp == '\t') { do cp++; while (cp < end - && (isalnum(*cp) - || *cp == '_')); - } else if (*cp == ' ' || *cp == '\t') { + && (*cp == ' ' + || *cp == '\t')); + } else if ((type & WholeWord) || + isalnum(*cp) || *cp == '_') { do cp++; while (cp < end - && (*cp == ' ' - || *cp == '\t')); + && (((type & WholeWord) + && *cp != ' ' && *cp != '\t' + && *cp != '\n') + || isalnum(*cp) + || *cp == '_')); } else cp++; break; } cp2 = cp; - if (type == (ByWord | IgnoreBlanks) && + if ((type & ByWord) && (type & IgnoreBlanks) && *start && *start != '\n') while (cp2 < end && (*cp2 == ' ' || *cp2 == '\t' || *cp2 == '\n')) { diff --git a/wiggle.1 b/wiggle.1 index f14166f..89047cb 100644 --- a/wiggle.1 +++ b/wiggle.1 @@ -144,6 +144,11 @@ patch to be wiggled in to place. Request that all operations and display be word based. This is the default for the "diff" function. .TP +.BR \-\-non\-space +Request that words be defined as sequences of non-white-space. Without +this flag words are sequences of alphanumerics or single non-white-space +characters. +.TP .BR \-l ", " \-\-lines Request that all operations and display be line based. .TP diff --git a/wiggle.c b/wiggle.c index aef2d75..2347ffe 100644 --- a/wiggle.c +++ b/wiggle.c @@ -759,6 +759,10 @@ int main(int argc, char *argv[]) Cmd, mode, opt); exit(2); + case NON_SPACE: + ignore_blanks |= WholeWord; + continue; + case 'w': case 'l': if (obj == 0 || obj == opt) { @@ -784,7 +788,7 @@ int main(int argc, char *argv[]) continue; case 'b': - ignore_blanks = IgnoreBlanks; + ignore_blanks |= IgnoreBlanks; continue; case 'i': diff --git a/wiggle.h b/wiggle.h index 1befdd5..dfde4b7 100644 --- a/wiggle.h +++ b/wiggle.h @@ -199,8 +199,9 @@ extern char short_options[]; extern struct option long_options[]; enum other_options { SELF_TEST = 300, - REPORT_WIGGLES = 301, + REPORT_WIGGLES, NO_BACKUP, + NON_SPACE, }; extern char Usage[]; extern char Help[]; @@ -216,4 +217,5 @@ enum { ByWord = 1, ByMask = 3, IgnoreBlanks = 8, /* 'or'ed in */ + WholeWord = 16, };