There are several memory leaks in regparse.c. They all relate to incomplete pattern branches, where a branch node is not freed when branch parsing is aborted. I discovered this by successively shortening all patterns in my test suite (which is not in Ruby, nor in C, unfortunately) down to zero length.

Such leaks can accumulate in applications with allow users to entered their own regular expressions, where it is likely that regexes are wrong or incomplete. Maybe the Ruby test suite could be expanded to cover incomplete patterns or even throwing fuzzy patterns at the engine to make sure it does not choke?

Below is a patch against rev. 24387 which adresses all leaks uncovered and also improves code indentation in a few place.

Ralf

---------------------------------

--- regparse.c	Wed Aug 12 09:55:04 2009
+++ regparse.c	Fri Aug 14 18:11:38 2009
@@ -4437,8 +4437,9 @@
 	acc = NCCLASS(anode);
 	r = or_cclass(cc, acc, env);
 
+    cc_open_err: /* DI moved here from below. */
 	onig_node_free(anode);
-      cc_open_err:
+    /* DI moved above. cc_open_err: */
 	if (r != 0) goto err;
       }
       break;
@@ -4527,7 +4528,7 @@
  err:
   if (cc != NCCLASS(*np))
     bbuf_free(cc->mbuf);
-  onig_node_free(*np);
+  /* DI removed. onig_node_free(*np); */
   return r;
 }
 
@@ -4761,7 +4762,10 @@
   r = fetch_token(tok, &p, end, env);
   if (r < 0) return r;
   r = parse_subexp(&target, tok, term, &p, end, env);
-  if (r < 0) return r;
+  if (r < 0) {
+    onig_node_free(target); /* DI added */
+    return r;
+  }
 
   if (NTYPE(*np) == NT_ANCHOR)
     NANCHOR(*np)->target = target;
@@ -5101,7 +5105,10 @@
       if (r < 0) return r;
       r = parse_subexp(&target, tok, term, src, end, env);
       env->option = prev;
-      if (r < 0) return r;
+      if (r < 0) {
+	    /* DI added */ onig_node_free(target); /* DI */
+	    return r;
+	  }
       NENCLOSE(*np)->target = target;
       return tok->type;
     }
@@ -5122,12 +5129,12 @@
       CHECK_NULL_RETURN_MEMERR(*np);
 
       while (1) {
-	r = fetch_token(tok, src, end, env);
-	if (r < 0) return r;
-	if (r != TK_STRING) break;
+        r = fetch_token(tok, src, end, env);
+        if (r < 0) return r;
+        if (r != TK_STRING) break;
 
-	r = onig_node_str_cat(*np, tok->backp, *src);
-	if (r < 0) return r;
+        r = onig_node_str_cat(*np, tok->backp, *src);
+        if (r < 0) return r;
       }
 
     string_end:
@@ -5459,7 +5466,10 @@
 
   *top = NULL;
   r = parse_exp(&node, tok, term, src, end, env);
-  if (r < 0) return r;
+  if (r < 0) {
+    onig_node_free(node); /* DI added */
+    return r;
+  }
 
   if (r == TK_EOT || r == term || r == TK_ALT) {
     *top = node;
@@ -5469,16 +5479,19 @@
     headp = &(NCDR(*top));
     while (r != TK_EOT && r != term && r != TK_ALT) {
       r = parse_exp(&node, tok, term, src, end, env);
-      if (r < 0) return r;
+      if (r < 0) {
+        onig_node_free(node); /* DI added */
+        return r;
+      };
 
       if (NTYPE(node) == NT_LIST) {
-	*headp = node;
-	while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
-	headp = &(NCDR(node));
+        *headp = node;
+        while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
+        headp = &(NCDR(node));
       }
       else {
-	*headp = node_new_list(node, NULL);
-	headp = &(NCDR(*headp));
+        *headp = node_new_list(node, NULL);
+        headp = &(NCDR(*headp));
       }
     }
   }
@@ -5511,7 +5524,10 @@
       r = fetch_token(tok, src, end, env);
       if (r < 0) return r;
       r = parse_branch(&node, tok, term, src, end, env);
-      if (r < 0) return r;
+      if (r < 0) {
+        onig_node_free(node); /* DI added DI */
+        return r;
+      }
 
       *headp = onig_node_new_alt(node, NULL);
       headp = &(NCDR(*headp));
@@ -5521,7 +5537,8 @@
       goto err;
   }
   else {
-  err:
+    onig_node_free(node); /* DI added */
+    err:
     if (term == TK_SUBEXP_CLOSE)
       return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
     else