From: Neil Brown Date: Wed, 24 Aug 2011 11:39:47 +0000 (+1000) Subject: Clean up split.c X-Git-Tag: v0.9~60 X-Git-Url: http://git.neil.brown.name/?a=commitdiff_plain;h=a584162354dfc1f9e70139f3f1f1708d54c81ffa;p=wiggle.git Clean up split.c Add/fix comments, remove unused function parameter. Signed-off-by: NeilBrown --- diff --git a/split.c b/split.c index f21c642..0562310 100644 --- a/split.c +++ b/split.c @@ -23,21 +23,18 @@ */ /* - * split a stream into words or line - * When splitting into words we can either be approximate or precise. - * Precise mode includes every char in a word. - * Approximate mode excluses white-space words and might unite some special chars + * Split a stream into words or line * - * In general, a word is one of: + * A word is one of: * string of [A-Za-z0-9_] * or string of [ \t] - * or single char. + * or single char (i.e. punctuation and newlines). * * A line is any string that ends with \n * * As a special case to allow proper aligning of multiple chunks - * in a patch, a word starting \0 will include 5 chars and a newline - * + * in a patch, a word starting \0 will include 20 chars with a newline + * second from the end. * * We make two passes through the stream. * Firstly we count the number of item so an array can be allocated, @@ -53,7 +50,8 @@ #include "hash.h" -static int split_internal(char *start, char *end, int type, struct elmnt *list, int reverse) +static int split_internal(char *start, char *end, int type, + struct elmnt *list) { int cnt = 0; @@ -63,49 +61,48 @@ static int split_internal(char *start, char *end, int type, struct elmnt *list, if (*cp == '\0' && cp+19 < end && cp[18] == '\n') { /* special word */ cp += 20; - } else switch (type) { - case ByLine: - while (cp < end && *cp != '\n') - cp++; - if (cp < end) - cp++; - break; - case ByWord: - case ApproxWord: - if (isalnum(*cp) || *cp == '_') { - do + } else + switch (type) { + case ByLine: + while (cp < end && *cp != '\n') cp++; - while (cp < end && (isalnum(*cp) || *cp == '_')); - } else if (*cp == ' ' || *cp == '\t') { - do + if (cp < end) cp++; - while (cp < end && (*cp == ' ' || *cp == '\t')); - } else - cp++; - break; - } - if (type != ApproxWord || *start == '\0' || - (isalnum(*start) || *start == '_')) { - if (list) { - if (reverse) - list--; - list->start = start; - list->len = cp-start; - if (*start) - list->hash = hash_mem(start, list->len, BITS_PER_LONG); - else - list->hash = atoi(start+1); - if (!reverse) - list++; + break; + case ByWord: + if (isalnum(*cp) || *cp == '_') { + do + cp++; + while (cp < end + && (isalnum(*cp) + || *cp == '_')); + } else if (*cp == ' ' || *cp == '\t') { + do + cp++; + while (cp < end + && (*cp == ' ' + || *cp == '\t')); + } else + cp++; + break; } - cnt++; + if (list) { + list->start = start; + list->len = cp-start; + if (*start) + list->hash = hash_mem(start, list->len, + BITS_PER_LONG); + else + list->hash = atoi(start+1); + list++; } + cnt++; start = cp; } return cnt; } -struct file split_stream(struct stream s, int type, int reverse) +struct file split_stream(struct stream s, int type) { int cnt; struct file f; @@ -115,10 +112,9 @@ struct file split_stream(struct stream s, int type, int reverse) end = s.body+s.len; c = s.body; - cnt = split_internal(c, end, type, NULL, reverse); -/* fprintf(stderr, "cnt %d\n", cnt);*/ + cnt = split_internal(c, end, type, NULL); f.list = malloc(cnt*sizeof(struct elmnt)); - f.elcnt = split_internal(c, end, type, f.list + reverse*cnt, reverse); + f.elcnt = split_internal(c, end, type, f.list); return f; } diff --git a/vpatch.c b/vpatch.c index 26b4756..eb3fd02 100644 --- a/vpatch.c +++ b/vpatch.c @@ -1115,9 +1115,9 @@ static void merge_window(struct plist *p, FILE *f, int reverse) sm = load_file(p->file); } } - fm = split_stream(sm, ByWord, 0); - fb = split_stream(sb, ByWord, 0); - fa = split_stream(sa, ByWord, 0); + fm = split_stream(sm, ByWord); + fb = split_stream(sb, ByWord); + fa = split_stream(sa, ByWord); if (ch) csl1 = pdiff(fm, fb, ch); @@ -2014,9 +2014,9 @@ static void calc_one(struct plist *pl, FILE *f, int reverse) struct file ff, fp1, fp2; struct csl *csl1, *csl2; struct ci ci; - ff = split_stream(sf, ByWord, 0); - fp1 = split_stream(s1, ByWord, 0); - fp2 = split_stream(s2, ByWord, 0); + ff = split_stream(sf, ByWord); + fp1 = split_stream(s1, ByWord); + fp2 = split_stream(s2, ByWord); if (pl->chunks) csl1 = pdiff(ff, fp1, pl->chunks); else diff --git a/wiggle.c b/wiggle.c index 4810e9c..d9b26b6 100644 --- a/wiggle.c +++ b/wiggle.c @@ -430,8 +430,8 @@ int main(int argc, char *argv[]) } if (obj == 'l') { int a, b; - fl[0] = split_stream(flist[0], ByLine, 0); - fl[1] = split_stream(flist[1], ByLine, 0); + fl[0] = split_stream(flist[0], ByLine); + fl[1] = split_stream(flist[1], ByLine); if (chunks2 && !chunks1) csl1 = pdiff(fl[0], fl[1], chunks2); else @@ -476,8 +476,8 @@ int main(int argc, char *argv[]) } else { int a, b; int sol = 1; /* start of line */ - fl[0] = split_stream(flist[0], ByWord, 0); - fl[1] = split_stream(flist[1], ByWord, 0); + fl[0] = split_stream(flist[0], ByWord); + fl[1] = split_stream(flist[1], ByWord); if (chunks2 && !chunks1) csl1 = pdiff(fl[0], fl[1], chunks2); else @@ -692,13 +692,13 @@ int main(int argc, char *argv[]) } if (obj == 'l') { - fl[0] = split_stream(flist[0], ByLine, 0); - fl[1] = split_stream(flist[1], ByLine, 0); - fl[2] = split_stream(flist[2], ByLine, 0); + fl[0] = split_stream(flist[0], ByLine); + fl[1] = split_stream(flist[1], ByLine); + fl[2] = split_stream(flist[2], ByLine); } else { - fl[0] = split_stream(flist[0], ByWord, 0); - fl[1] = split_stream(flist[1], ByWord, 0); - fl[2] = split_stream(flist[2], ByWord, 0); + fl[0] = split_stream(flist[0], ByWord); + fl[1] = split_stream(flist[1], ByWord); + fl[2] = split_stream(flist[2], ByWord); } if (chunks2 && !chunks1) csl1 = pdiff(fl[0], fl[1], chunks2); diff --git a/wiggle.h b/wiggle.h index 1006ed3..a58918a 100644 --- a/wiggle.h +++ b/wiggle.h @@ -108,7 +108,7 @@ extern struct stream load_file(char *name); extern int split_patch(struct stream, struct stream*, struct stream*); extern int split_merge(struct stream, struct stream*, struct stream*, struct stream*); -extern struct file split_stream(struct stream s, int type, int reverse); +extern struct file split_stream(struct stream s, int type); extern struct csl *pdiff(struct file a, struct file b, int chunks); extern struct csl *diff(struct file a, struct file b); extern struct csl *diff_partial(struct file a, struct file b, @@ -158,4 +158,3 @@ extern void cleanlist(struct file a, struct file b, struct csl *list); #define ByLine 0 #define ByWord 1 -#define ApproxWord 2