GNU bug report logs - #37859
[PATCH] shuf: fix bug with ‘-r -n 0’

Previous Next

Package: coreutils;

Reported by: Paul Eggert <eggert <at> cs.ucla.edu>

Date: Mon, 21 Oct 2019 20:22:02 UTC

Severity: normal

Tags: patch

Done: Paul Eggert <eggert <at> cs.ucla.edu>

Bug is archived. No further changes may be made.

To add a comment to this bug, you must first unarchive it, by sending
a message to control AT debbugs.gnu.org, with unarchive 37859 in the body.
You can then email your comments to 37859 AT debbugs.gnu.org in the normal way.

Toggle the display of automated, internal messages from the tracker.

View this report as an mbox folder, status mbox, maintainer mbox


Report forwarded to bug-coreutils <at> gnu.org:
bug#37859; Package coreutils. (Mon, 21 Oct 2019 20:22:03 GMT) Full text and rfc822 format available.

Acknowledgement sent to Paul Eggert <eggert <at> cs.ucla.edu>:
New bug report received and forwarded. Copy sent to bug-coreutils <at> gnu.org. (Mon, 21 Oct 2019 20:22:03 GMT) Full text and rfc822 format available.

Message #5 received at submit <at> debbugs.gnu.org (full text, mbox):

From: Paul Eggert <eggert <at> cs.ucla.edu>
To: bug-coreutils <at> gnu.org
Cc: Paul Eggert <eggert <at> cs.ucla.edu>
Subject: [PATCH] shuf: fix bug with ‘-r -n 0’
Date: Mon, 21 Oct 2019 13:21:08 -0700
‘shuf -r -n 0 file’ would mistakenly read from standard input.
Problem reported by my student Jingnong Qu while reimplementing a
shuf subset in Python as an exercise in UCLA Computer Science 35L:
https://web.cs.ucla.edu/classes/fall19/cs35L/assign/assign3.html
* NEWS: Mention the fix.  Also, ASCIIfy a previous item.
* src/shuf.c (main): Fix bug.
* tests/misc/shuf.sh: Add a test case for the bug.
---
 NEWS               |  5 ++++-
 src/shuf.c         | 53 +++++++++++++++++++++++++---------------------
 tests/misc/shuf.sh |  4 ++++
 3 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/NEWS b/NEWS
index fe38e80d4..476c02aed 100644
--- a/NEWS
+++ b/NEWS
@@ -29,6 +29,9 @@ GNU coreutils NEWS                                    -*- outline -*-
   (like Solaris 10 and Solaris 11).
   [bug introduced in coreutils-8.31]
 
+  'shuf -r -n 0 file' no longer mistakenly reads from standard input.
+  [bug introduced with the --repeat feature in coreutils-8.22]
+
   split no longer reports a "output file suffixes exhausted" error
   when the specified number of files is evenly divisible by 10, 16, 26,
   for --numeric, --hex, or default alphabetic suffixes respectively.
@@ -210,7 +213,7 @@ GNU coreutils NEWS                                    -*- outline -*-
   'mv -n A B' no longer suffers from a race condition that can
   overwrite a simultaneously-created B.  This bug fix requires
   platform support for the renameat2 or renameatx_np syscalls, found
-  in recent Linux and macOS kernels.  As a side effect, ‘mv -n A A’
+  in recent Linux and macOS kernels.  As a side effect, 'mv -n A A'
   now silently does nothing if A exists.
   [bug introduced with coreutils-7.1]
 
diff --git a/src/shuf.c b/src/shuf.c
index 968d3641c..6a1aa0158 100644
--- a/src/shuf.c
+++ b/src/shuf.c
@@ -493,7 +493,12 @@ main (int argc, char **argv)
     }
 
   /* Prepare input.  */
-  if (echo)
+  if (head_lines == 0)
+    {
+      n_lines = 0;
+      line = NULL;
+    }
+  else if (echo)
     {
       input_from_argv (operand, n_operands, eolbyte);
       n_lines = n_operands;
@@ -507,54 +512,54 @@ main (int argc, char **argv)
   else
     {
       /* If an input file is specified, re-open it as stdin.  */
-      if (n_operands == 1)
-        if (! (STREQ (operand[0], "-") || ! head_lines
-               || freopen (operand[0], "r", stdin)))
-          die (EXIT_FAILURE, errno, "%s", quotef (operand[0]));
+      if (n_operands == 1
+          && ! (STREQ (operand[0], "-")
+                || freopen (operand[0], "r", stdin)))
+        die (EXIT_FAILURE, errno, "%s", quotef (operand[0]));
 
       fadvise (stdin, FADVISE_SEQUENTIAL);
 
-      if (! repeat && head_lines != SIZE_MAX
-          && (! head_lines || input_size () > RESERVOIR_MIN_INPUT))
+      if (repeat || head_lines == SIZE_MAX
+          || input_size () <= RESERVOIR_MIN_INPUT)
         {
-          use_reservoir_sampling = true;
-          n_lines = SIZE_MAX;   /* unknown number of input lines, for now.  */
+          n_lines = read_input (stdin, eolbyte, &input_lines);
+          line = input_lines;
         }
       else
         {
-          n_lines = read_input (stdin, eolbyte, &input_lines);
-          line = input_lines;
+          use_reservoir_sampling = true;
+          n_lines = SIZE_MAX;   /* unknown number of input lines, for now.  */
         }
     }
 
-  if (! repeat)
-    head_lines = MIN (head_lines, n_lines);
+  /* The adjusted head line count; can be less than HEAD_LINES if the
+     input is small and if not repeating.  */
+  size_t ahead_lines = repeat || head_lines < n_lines ? head_lines : n_lines;
 
   randint_source = randint_all_new (random_source,
                                     (use_reservoir_sampling || repeat
                                      ? SIZE_MAX
-                                     : randperm_bound (head_lines, n_lines)));
+                                     : randperm_bound (ahead_lines, n_lines)));
   if (! randint_source)
     die (EXIT_FAILURE, errno, "%s", quotef (random_source));
 
   if (use_reservoir_sampling)
     {
       /* Instead of reading the entire file into 'line',
-         use reservoir-sampling to store just "head_lines" random lines.  */
-      n_lines = read_input_reservoir_sampling (stdin, eolbyte, head_lines,
+         use reservoir-sampling to store just AHEAD_LINES random lines.  */
+      n_lines = read_input_reservoir_sampling (stdin, eolbyte, ahead_lines,
                                                randint_source, &reservoir);
-      head_lines = n_lines;
+      ahead_lines = n_lines;
     }
 
   /* Close stdin now, rather than earlier, so that randint_all_new
      doesn't have to worry about opening something other than
      stdin.  */
-  if (! (echo || input_range)
-      && (fclose (stdin) != 0))
+  if (! (head_lines == 0 || echo || input_range || fclose (stdin) == 0))
     die (EXIT_FAILURE, errno, _("read error"));
 
   if (!repeat)
-    permutation = randperm_new (randint_source, head_lines, n_lines);
+    permutation = randperm_new (randint_source, ahead_lines, n_lines);
 
   if (outfile && ! freopen (outfile, "w", stdout))
     die (EXIT_FAILURE, errno, "%s", quotef (outfile));
@@ -569,10 +574,10 @@ main (int argc, char **argv)
           if (n_lines == 0)
             die (EXIT_FAILURE, 0, _("no lines to repeat"));
           if (input_range)
-            i = write_random_numbers (randint_source, head_lines,
+            i = write_random_numbers (randint_source, ahead_lines,
                                       lo_input, hi_input, eolbyte);
           else
-            i = write_random_lines (randint_source, head_lines, line, n_lines);
+            i = write_random_lines (randint_source, ahead_lines, line, n_lines);
         }
     }
   else
@@ -580,10 +585,10 @@ main (int argc, char **argv)
       if (use_reservoir_sampling)
         i = write_permuted_output_reservoir (n_lines, reservoir, permutation);
       else if (input_range)
-        i = write_permuted_numbers (head_lines, lo_input,
+        i = write_permuted_numbers (ahead_lines, lo_input,
                                     permutation, eolbyte);
       else
-        i = write_permuted_lines (head_lines, line, permutation);
+        i = write_permuted_lines (ahead_lines, line, permutation);
     }
 
   if (i != 0)
diff --git a/tests/misc/shuf.sh b/tests/misc/shuf.sh
index bbc54a9d2..c866f5de5 100755
--- a/tests/misc/shuf.sh
+++ b/tests/misc/shuf.sh
@@ -39,6 +39,10 @@ compare in out > /dev/null && { fail=1; echo "not random?" 1>&2; }
 sort -n out > out1
 compare in out1 || { fail=1; echo "not a permutation" 1>&2; }
 
+# Exercize shuf's -r -n 0 options, with no standard input.
+shuf -r -n 0 in <&- >out || fail=1
+compare /dev/null out || fail=1
+
 # Exercise shuf's -e option.
 t=$(shuf -e a b c d e | sort | fmt)
 test "$t" = 'a b c d e' || { fail=1; echo "not a permutation" 1>&2; }
-- 
2.21.0





bug closed, send any further explanations to 37859 <at> debbugs.gnu.org and Paul Eggert <eggert <at> cs.ucla.edu> Request was from Paul Eggert <eggert <at> cs.ucla.edu> to control <at> debbugs.gnu.org. (Mon, 21 Oct 2019 20:29:03 GMT) Full text and rfc822 format available.

bug archived. Request was from Debbugs Internal Request <help-debbugs <at> gnu.org> to internal_control <at> debbugs.gnu.org. (Tue, 19 Nov 2019 12:24:08 GMT) Full text and rfc822 format available.

This bug report was last modified 4 years and 158 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.