Clean up split.c

author Neil Brown <neilb@suse.de>

Wed, 24 Aug 2011 11:39:47 +0000 (21:39 +1000)

committer Neil Brown <neilb@suse.de>

Wed, 24 Aug 2011 11:39:47 +0000 (21:39 +1000)
author Neil Brown <neilb@suse.de>
Wed, 24 Aug 2011 11:39:47 +0000 (21:39 +1000)
committer Neil Brown <neilb@suse.de>
Wed, 24 Aug 2011 11:39:47 +0000 (21:39 +1000)
diff --git a/split.c b/split.c

index f21c642aac3a069375f575c47594696dad8011b1..0562310f74ce8c6a5b840f9395ab31b678769faa 100644 (file)
--- a/split.c
+++ b/split.c
@@ -23,21 +23,18 @@
   */
  
  /*
- * split a stream into words or line
- * When splitting into words we can either be approximate or precise.
- *  Precise mode includes every char in a word.
- *  Approximate mode excluses white-space words and might unite some special chars
+ * Split a stream into words or line
   *
- * In general, a word is one of:
+ * A word is one of:
   *    string of [A-Za-z0-9_]
   *    or string of [ \t]
- *    or single char.
+ *    or single char (i.e. punctuation and newlines).
   *
   * A line is any string that ends with \n
   *
   * As a special case to allow proper aligning of multiple chunks
- * in a patch, a word starting \0 will include 5 chars and a newline
- *
+ * in a patch, a word starting \0 will include 20 chars with a newline
+ * second from the end.
   *
   * We make two passes through the stream.
   * Firstly we count the number of item so an array can be allocated,
@@ -53,7 +50,8 @@
  
  #include "hash.h"
  
-static int split_internal(char *start, char *end, int type, struct elmnt *list, int reverse)
+static int split_internal(char *start, char *end, int type,
+                         struct elmnt *list)
  {
         int cnt = 0;
  
@@ -63,49 +61,48 @@ static int split_internal(char *start, char *end, int type, struct elmnt *list,
                 if (*cp == '\0' && cp+19 < end && cp[18] == '\n') {
                         /* special word */
                         cp += 20;
-               } else switch (type) {
-               case ByLine:
-                       while (cp < end && *cp != '\n')
-                               cp++;
-                       if (cp < end)
-                               cp++;
-                       break;
-               case ByWord:
-               case ApproxWord:
-                       if (isalnum(*cp) || *cp == '_') {
-                               do
+               } else
+                       switch (type) {
+                       case ByLine:
+                               while (cp < end && *cp != '\n')
                                         cp++;
-                               while (cp < end && (isalnum(*cp)  || *cp == '_'));
-                       } else if (*cp == ' ' || *cp == '\t') {
-                               do
+                               if (cp < end)
                                         cp++;
-                               while (cp < end && (*cp == ' ' || *cp == '\t'));
-                       } else
-                               cp++;
-                       break;
-               }
-               if (type != ApproxWord || *start == '\0' ||
-                   (isalnum(*start) || *start == '_')) {
-                       if (list) {
-                               if (reverse)
-                                       list--;
-                               list->start = start;
-                               list->len = cp-start;
-                               if (*start)
-                                       list->hash = hash_mem(start, list->len, BITS_PER_LONG);
-                               else
-                                       list->hash = atoi(start+1);
-                               if (!reverse)
-                                       list++;
+                               break;
+                       case ByWord:
+                               if (isalnum(*cp) || *cp == '_') {
+                                       do
+                                               cp++;
+                                       while (cp < end
+                                              && (isalnum(*cp)
+                                                  || *cp == '_'));
+                               } else if (*cp == ' ' || *cp == '\t') {
+                                       do
+                                               cp++;
+                                       while (cp < end
+                                              && (*cp == ' '
+                                                  || *cp == '\t'));
+                               } else
+                                       cp++;
+                               break;
                         }
-                       cnt++;
+               if (list) {
+                       list->start = start;
+                       list->len = cp-start;
+                       if (*start)
+                               list->hash = hash_mem(start, list->len,
+                                                     BITS_PER_LONG);
+                       else
+                               list->hash = atoi(start+1);
+                       list++;
                 }
+               cnt++;
                 start = cp;
         }
         return cnt;
  }
  
-struct file split_stream(struct stream s, int type, int reverse)
+struct file split_stream(struct stream s, int type)
  {
         int cnt;
         struct file f;
@@ -115,10 +112,9 @@ struct file split_stream(struct stream s, int type, int reverse)
         end = s.body+s.len;
         c = s.body;
  
-       cnt = split_internal(c, end, type, NULL, reverse);
-/*     fprintf(stderr, "cnt %d\n", cnt);*/
+       cnt = split_internal(c, end, type, NULL);
         f.list = malloc(cnt*sizeof(struct elmnt));
  
-       f.elcnt = split_internal(c, end, type, f.list + reverse*cnt, reverse);
+       f.elcnt = split_internal(c, end, type, f.list);
         return f;
  }
diff --git a/vpatch.c b/vpatch.c

index 26b4756604d59ac8008b66e42c166521a22d6267..eb3fd02d31ca139457fa259d339287b635382972 100644 (file)
--- a/vpatch.c
+++ b/vpatch.c
@@ -1115,9 +1115,9 @@ static void merge_window(struct plist *p, FILE *f, int reverse)
                         sm = load_file(p->file);
                 }
         }
-       fm = split_stream(sm, ByWord, 0);
-       fb = split_stream(sb, ByWord, 0);
-       fa = split_stream(sa, ByWord, 0);
+       fm = split_stream(sm, ByWord);
+       fb = split_stream(sb, ByWord);
+       fa = split_stream(sa, ByWord);
  
         if (ch)
                 csl1 = pdiff(fm, fb, ch);
@@ -2014,9 +2014,9 @@ static void calc_one(struct plist *pl, FILE *f, int reverse)
                 struct file ff, fp1, fp2;
                 struct csl *csl1, *csl2;
                 struct ci ci;
-               ff = split_stream(sf, ByWord, 0);
-               fp1 = split_stream(s1, ByWord, 0);
-               fp2 = split_stream(s2, ByWord, 0);
+               ff = split_stream(sf, ByWord);
+               fp1 = split_stream(s1, ByWord);
+               fp2 = split_stream(s2, ByWord);
                 if (pl->chunks)
                         csl1 = pdiff(ff, fp1, pl->chunks);
                 else
diff --git a/wiggle.c b/wiggle.c

index 4810e9c57a329f86873e167c983ac182fec262b9..d9b26b6176e0a2359df86b945226ce2e3aff76e7 100644 (file)
--- a/wiggle.c
+++ b/wiggle.c
@@ -430,8 +430,8 @@ int main(int argc, char *argv[])
                 }
                 if (obj == 'l') {
                         int a, b;
-                       fl[0] = split_stream(flist[0], ByLine, 0);
-                       fl[1] = split_stream(flist[1], ByLine, 0);
+                       fl[0] = split_stream(flist[0], ByLine);
+                       fl[1] = split_stream(flist[1], ByLine);
                         if (chunks2 && !chunks1)
                                 csl1 = pdiff(fl[0], fl[1], chunks2);
                         else
@@ -476,8 +476,8 @@ int main(int argc, char *argv[])
                 } else {
                         int a, b;
                         int sol = 1; /* start of line */
-                       fl[0] = split_stream(flist[0], ByWord, 0);
-                       fl[1] = split_stream(flist[1], ByWord, 0);
+                       fl[0] = split_stream(flist[0], ByWord);
+                       fl[1] = split_stream(flist[1], ByWord);
                         if (chunks2 && !chunks1)
                                 csl1 = pdiff(fl[0], fl[1], chunks2);
                         else
@@ -692,13 +692,13 @@ int main(int argc, char *argv[])
                 }
  
                 if (obj == 'l') {
-                       fl[0] = split_stream(flist[0], ByLine, 0);
-                       fl[1] = split_stream(flist[1], ByLine, 0);
-                       fl[2] = split_stream(flist[2], ByLine, 0);
+                       fl[0] = split_stream(flist[0], ByLine);
+                       fl[1] = split_stream(flist[1], ByLine);
+                       fl[2] = split_stream(flist[2], ByLine);
                 } else {
-                       fl[0] = split_stream(flist[0], ByWord, 0);
-                       fl[1] = split_stream(flist[1], ByWord, 0);
-                       fl[2] = split_stream(flist[2], ByWord, 0);
+                       fl[0] = split_stream(flist[0], ByWord);
+                       fl[1] = split_stream(flist[1], ByWord);
+                       fl[2] = split_stream(flist[2], ByWord);
                 }
                 if (chunks2 && !chunks1)
                         csl1 = pdiff(fl[0], fl[1], chunks2);
diff --git a/wiggle.h b/wiggle.h

index 1006ed38b3c5f9e8dab9022b79ebbbbd77facc52..a58918a870757a51dd4f07481f8affc43627743f 100644 (file)
--- a/wiggle.h
+++ b/wiggle.h
@@ -108,7 +108,7 @@ extern struct stream load_file(char *name);
  extern int split_patch(struct stream, struct stream*, struct stream*);
  extern int split_merge(struct stream, struct stream*, struct stream*,
                        struct stream*);
-extern struct file split_stream(struct stream s, int type, int reverse);
+extern struct file split_stream(struct stream s, int type);
  extern struct csl *pdiff(struct file a, struct file b, int chunks);
  extern struct csl *diff(struct file a, struct file b);
  extern struct csl *diff_partial(struct file a, struct file b,
@@ -158,4 +158,3 @@ extern void cleanlist(struct file a, struct file b, struct csl *list);
  
  #define        ByLine  0
  #define        ByWord  1
-#define        ApproxWord 2
author	Neil Brown <neilb@suse.de>
	Wed, 24 Aug 2011 11:39:47 +0000 (21:39 +1000)
committer	Neil Brown <neilb@suse.de>
	Wed, 24 Aug 2011 11:39:47 +0000 (21:39 +1000)
split.c		patch \| blob \| history
vpatch.c		patch \| blob \| history
wiggle.c		patch \| blob \| history
wiggle.h		patch \| blob \| history