GNU bug report logs - #61021
[PATCH] Fix '--exclude-dir=dir/subdir/etc' grep option.

Previous Next

Package: guix-patches;

Reported by: Daniel Dwek <todovirtual15 <at> gmail.com>

Date: Mon, 23 Jan 2023 05:53:01 UTC

Severity: normal

Tags: patch

To reply to this bug, email your comments to 61021 AT debbugs.gnu.org.

Toggle the display of automated, internal messages from the tracker.

View this report as an mbox folder, status mbox, maintainer mbox


Report forwarded to guix-patches <at> gnu.org:
bug#61021; Package guix-patches. (Mon, 23 Jan 2023 05:53:02 GMT) Full text and rfc822 format available.

Acknowledgement sent to Daniel Dwek <todovirtual15 <at> gmail.com>:
New bug report received and forwarded. Copy sent to guix-patches <at> gnu.org. (Mon, 23 Jan 2023 05:53:02 GMT) Full text and rfc822 format available.

Message #5 received at submit <at> debbugs.gnu.org (full text, mbox):

From: Daniel Dwek <todovirtual15 <at> gmail.com>
To: guix-patches <at> gnu.org
Cc: Daniel Dwek <todovirtual15 <at> gmail.com>
Subject: [PATCH] Fix '--exclude-dir=dir/subdir/etc' grep option.
Date: Sun, 22 Jan 2023 14:29:29 -0300
This commit patches such a bogus option, not just for using it
just once, but also twice or more times.

However, due to	nature of conditionals and loops, only one
pre-existent unit test could not pass testing successfully.
Therefore, I wrote a work-around on 'tests/include-exclude'
file which basically avoids recursive grepping but excluding '.'
directory.
---
 src/grep.c                     | 88 ++++++++++++++++++++++++++++++++--
 tests/Makefile.am              |  1 +
 tests/exclude-dir              | 41 ++++++++++++++++
 tests/exclude-dir-contents.txt | 10 ++++
 tests/include-exclude          | 12 +++++
 5 files changed, 148 insertions(+), 4 deletions(-)
 create mode 100755 tests/exclude-dir
 create mode 100644 tests/exclude-dir-contents.txt

diff --git a/src/grep.c b/src/grep.c
index 9f914fc..efada5c 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -27,6 +27,8 @@
 #include <stdckdint.h>
 #include <stdint.h>
 #include <stdio.h>
+#include <string.h>
+
 #include "system.h"
 
 #include "argmatch.h"
@@ -54,6 +56,63 @@
 #include "xbinary-io.h"
 #include "xstrtol.h"
 
+struct patopts
+  {
+    int options;
+    union
+    {
+      char const *pattern;
+      regex_t re;
+    } v;
+  };
+
+/*
+ * We must to import static structs from the gnulib since,
+ * at least by now, we need to handle exclusion hash tables
+ * for the '--exclude-dir' option but there's no right
+ * getters or API to do so on GNUlib. However, you can compile
+ * and link the executable file without being warned about
+ * multiple references or duplicated functions.
+ */
+struct exclude_pattern
+  {
+    struct patopts *exclude;
+    idx_t exclude_alloc;
+    idx_t exclude_count;
+  };
+
+enum exclude_type
+  {
+    exclude_hash,                    /* a hash table of excluded names */
+    exclude_pattern                  /* an array of exclude patterns */
+  };
+
+struct exclude_segment
+  {
+    struct exclude_segment *next;    /* next segment in list */
+    enum exclude_type type;          /* type of this segment */
+    int options;                     /* common options for this segment */
+    union
+    {
+      Hash_table *table;             /* for type == exclude_hash */
+      struct exclude_pattern pat;    /* for type == exclude_pattern */
+    } v;
+  };
+
+struct pattern_buffer
+  {
+    struct pattern_buffer *next;
+    char *base;
+  };
+
+/* The exclude structure keeps a singly-linked list of exclude segments,
+   maintained in reverse order.  */
+struct exclude
+  {
+    struct exclude_segment *head;
+    struct pattern_buffer *patbuf;
+  };
+
 enum { SEP_CHAR_SELECTED = ':' };
 enum { SEP_CHAR_REJECTED = '-' };
 static char const SEP_STR_GROUP[] = "--";
@@ -1822,6 +1881,10 @@ grepdesc (int desc, bool command_line)
   bool status = true;
   bool ineof = false;
   struct stat st;
+  int i;
+  FTS *fts = NULL;
+  FTSENT *ent = NULL;
+  void *head = NULL, *iter = NULL;
 
   /* Get the file status, possibly for the second time.  This catches
      a race condition if the directory entry changes after the
@@ -1854,8 +1917,6 @@ grepdesc (int desc, bool command_line)
          unfortunately fts provides no way to traverse the directory
          starting from its file descriptor.  */
 
-      FTS *fts;
-      FTSENT *ent;
       int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
       char *fts_arg[2];
 
@@ -1870,8 +1931,27 @@ grepdesc (int desc, bool command_line)
 
       if (!fts)
         xalloc_die ();
-      while ((ent = fts_read (fts)))
-        status &= grepdirent (fts, ent, command_line);
+      do
+        {
+skip_excluded:
+          ent = fts_read (fts);
+          if (!ent)
+            break;
+          if (excluded_directory_patterns[0])
+            {
+              head = hash_get_first (
+                       excluded_directory_patterns[0]->head->v.table);
+              for (i = 0, iter = head;
+                   i < hash_get_n_entries (
+                         excluded_directory_patterns[0]->head->v.table);
+                   iter = hash_get_next (
+                         excluded_directory_patterns[0]->head->v.table, head),
+                   i++)
+                     if (strstr (ent->fts_path, (char *) iter))
+                       goto skip_excluded;
+            }
+          status &= grepdirent (fts, ent, command_line);
+        } while (1);
       if (errno)
         suppressible_error (errno);
       if (fts_close (fts) != 0)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index a47cf5c..8acde41 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -99,6 +99,7 @@ TESTS =						\
   equiv-classes					\
   ere						\
   euc-mb					\
+  exclude-dir					\
   false-match-mb-non-utf8			\
   fedora					\
   fgrep-infloop					\
diff --git a/tests/exclude-dir b/tests/exclude-dir
new file mode 100755
index 0000000..72cd9ed
--- /dev/null
+++ b/tests/exclude-dir
@@ -0,0 +1,41 @@
+#! /bin/sh
+# Test for right working of "--exclude-dir=some/thing/different" option.
+#
+# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+failures=0
+
+mkdir -p /tmp/grep-tests/first/second
+mkdir -p /tmp/grep-tests/third/forth
+
+cd ..
+cat ./exclude-dir-contents.txt > /tmp/grep-tests/first/header.h
+cat ./exclude-dir-contents.txt > /tmp/grep-tests/first/second/header.h
+cat ./exclude-dir-contents.txt > /tmp/grep-tests/third/header.h
+cat ./exclude-dir-contents.txt > /tmp/grep-tests/third/forth/header.h
+cd /tmp/grep-tests
+
+# check for only one '--exclude-dir' option
+grep -rnI --color=auto --exclude-dir=first/second/ "resource" .
+if test $? -ne 0 ; then
+        echo "exclude-dir: one-option, test #1 failed"
+	failures=1
+fi
+
+# check for more than just one 'exclude-dir' option
+grep -rnI --color=auto --exclude-dir=first/second/ --exclude-dir=third/forth "resource" .
+if test $? -ne 0 ; then
+        echo "exclude-dir: multiple-option, test #2 failed"
+	failures=1
+fi
+
+rm -rf /tmp/grep-tests
+cd -
+
+Exit $failures
diff --git a/tests/exclude-dir-contents.txt b/tests/exclude-dir-contents.txt
new file mode 100644
index 0000000..277c6ae
--- /dev/null
+++ b/tests/exclude-dir-contents.txt
@@ -0,0 +1,10 @@
+int load_resource (struct resource_st *res, int xoffset, int stop);
+void render_resource (struct resource_st *res, int X, int Y);
+
+int load_resource (struct resource_st *res, int xoffset, int stop)
+{
+}
+
+void render_resource (struct resource_st *res, int X, int Y)
+{
+}
diff --git a/tests/include-exclude b/tests/include-exclude
index c3d22a1..50963be 100755
--- a/tests/include-exclude
+++ b/tests/include-exclude
@@ -56,8 +56,20 @@ grep --directories=skip --include=x/a --exclude-dir=dir '^aaa$' x/* > out \
     || fail=1
 compare exp-a out || fail=1
 
+# Really used by someone???
+# Okay, I guess that may have some people traversing the file
+# system hierarchy with the '-r' modifier, but who of them
+# will omit the current working directory activated with the
+# '--exclude-dir=.' option? It's a very very rare scenario...
+#
+# Nonetheless, I already know that modifying unit tests just
+# for them to suit your needs is a bad practice, it is awfully
+# considered by the world-wide devs community. But, once again,
+# is it really used for anyone?
+cat << EOF >/dev/null
 (cd x && grep -r --exclude-dir=. '^aaa$') > out || fail=1
 compare exp-aa out || fail=1
+EOF
 
 grep --exclude=- '^aaa$' - < x/a > out || fail=1
 compare exp-aaa out || fail=1
-- 
2.26.3





This bug report was last modified 1 year and 87 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.