Add/fix comments, remove unused function parameter.
Signed-off-by: NeilBrown <neilb@suse.de>
*/
/*
- * split a stream into words or line
- * When splitting into words we can either be approximate or precise.
- * Precise mode includes every char in a word.
- * Approximate mode excluses white-space words and might unite some special chars
+ * Split a stream into words or line
*
- * In general, a word is one of:
+ * A word is one of:
* string of [A-Za-z0-9_]
* or string of [ \t]
- * or single char.
+ * or single char (i.e. punctuation and newlines).
*
* A line is any string that ends with \n
*
* As a special case to allow proper aligning of multiple chunks
- * in a patch, a word starting \0 will include 5 chars and a newline
- *
+ * in a patch, a word starting \0 will include 20 chars with a newline
+ * second from the end.
*
* We make two passes through the stream.
* Firstly we count the number of item so an array can be allocated,
#include "hash.h"
-static int split_internal(char *start, char *end, int type, struct elmnt *list, int reverse)
+static int split_internal(char *start, char *end, int type,
+ struct elmnt *list)
{
int cnt = 0;
if (*cp == '\0' && cp+19 < end && cp[18] == '\n') {
/* special word */
cp += 20;
- } else switch (type) {
- case ByLine:
- while (cp < end && *cp != '\n')
- cp++;
- if (cp < end)
- cp++;
- break;
- case ByWord:
- case ApproxWord:
- if (isalnum(*cp) || *cp == '_') {
- do
+ } else
+ switch (type) {
+ case ByLine:
+ while (cp < end && *cp != '\n')
cp++;
- while (cp < end && (isalnum(*cp) || *cp == '_'));
- } else if (*cp == ' ' || *cp == '\t') {
- do
+ if (cp < end)
cp++;
- while (cp < end && (*cp == ' ' || *cp == '\t'));
- } else
- cp++;
- break;
- }
- if (type != ApproxWord || *start == '\0' ||
- (isalnum(*start) || *start == '_')) {
- if (list) {
- if (reverse)
- list--;
- list->start = start;
- list->len = cp-start;
- if (*start)
- list->hash = hash_mem(start, list->len, BITS_PER_LONG);
- else
- list->hash = atoi(start+1);
- if (!reverse)
- list++;
+ break;
+ case ByWord:
+ if (isalnum(*cp) || *cp == '_') {
+ do
+ cp++;
+ while (cp < end
+ && (isalnum(*cp)
+ || *cp == '_'));
+ } else if (*cp == ' ' || *cp == '\t') {
+ do
+ cp++;
+ while (cp < end
+ && (*cp == ' '
+ || *cp == '\t'));
+ } else
+ cp++;
+ break;
}
- cnt++;
+ if (list) {
+ list->start = start;
+ list->len = cp-start;
+ if (*start)
+ list->hash = hash_mem(start, list->len,
+ BITS_PER_LONG);
+ else
+ list->hash = atoi(start+1);
+ list++;
}
+ cnt++;
start = cp;
}
return cnt;
}
-struct file split_stream(struct stream s, int type, int reverse)
+struct file split_stream(struct stream s, int type)
{
int cnt;
struct file f;
end = s.body+s.len;
c = s.body;
- cnt = split_internal(c, end, type, NULL, reverse);
-/* fprintf(stderr, "cnt %d\n", cnt);*/
+ cnt = split_internal(c, end, type, NULL);
f.list = malloc(cnt*sizeof(struct elmnt));
- f.elcnt = split_internal(c, end, type, f.list + reverse*cnt, reverse);
+ f.elcnt = split_internal(c, end, type, f.list);
return f;
}
sm = load_file(p->file);
}
}
- fm = split_stream(sm, ByWord, 0);
- fb = split_stream(sb, ByWord, 0);
- fa = split_stream(sa, ByWord, 0);
+ fm = split_stream(sm, ByWord);
+ fb = split_stream(sb, ByWord);
+ fa = split_stream(sa, ByWord);
if (ch)
csl1 = pdiff(fm, fb, ch);
struct file ff, fp1, fp2;
struct csl *csl1, *csl2;
struct ci ci;
- ff = split_stream(sf, ByWord, 0);
- fp1 = split_stream(s1, ByWord, 0);
- fp2 = split_stream(s2, ByWord, 0);
+ ff = split_stream(sf, ByWord);
+ fp1 = split_stream(s1, ByWord);
+ fp2 = split_stream(s2, ByWord);
if (pl->chunks)
csl1 = pdiff(ff, fp1, pl->chunks);
else
}
if (obj == 'l') {
int a, b;
- fl[0] = split_stream(flist[0], ByLine, 0);
- fl[1] = split_stream(flist[1], ByLine, 0);
+ fl[0] = split_stream(flist[0], ByLine);
+ fl[1] = split_stream(flist[1], ByLine);
if (chunks2 && !chunks1)
csl1 = pdiff(fl[0], fl[1], chunks2);
else
} else {
int a, b;
int sol = 1; /* start of line */
- fl[0] = split_stream(flist[0], ByWord, 0);
- fl[1] = split_stream(flist[1], ByWord, 0);
+ fl[0] = split_stream(flist[0], ByWord);
+ fl[1] = split_stream(flist[1], ByWord);
if (chunks2 && !chunks1)
csl1 = pdiff(fl[0], fl[1], chunks2);
else
}
if (obj == 'l') {
- fl[0] = split_stream(flist[0], ByLine, 0);
- fl[1] = split_stream(flist[1], ByLine, 0);
- fl[2] = split_stream(flist[2], ByLine, 0);
+ fl[0] = split_stream(flist[0], ByLine);
+ fl[1] = split_stream(flist[1], ByLine);
+ fl[2] = split_stream(flist[2], ByLine);
} else {
- fl[0] = split_stream(flist[0], ByWord, 0);
- fl[1] = split_stream(flist[1], ByWord, 0);
- fl[2] = split_stream(flist[2], ByWord, 0);
+ fl[0] = split_stream(flist[0], ByWord);
+ fl[1] = split_stream(flist[1], ByWord);
+ fl[2] = split_stream(flist[2], ByWord);
}
if (chunks2 && !chunks1)
csl1 = pdiff(fl[0], fl[1], chunks2);
extern int split_patch(struct stream, struct stream*, struct stream*);
extern int split_merge(struct stream, struct stream*, struct stream*,
struct stream*);
-extern struct file split_stream(struct stream s, int type, int reverse);
+extern struct file split_stream(struct stream s, int type);
extern struct csl *pdiff(struct file a, struct file b, int chunks);
extern struct csl *diff(struct file a, struct file b);
extern struct csl *diff_partial(struct file a, struct file b,
#define ByLine 0
#define ByWord 1
-#define ApproxWord 2