GNU bug report logs - #9428
Emacs mishandles file offsets > 2**29 bytes

Previous Next

Package: emacs;

Reported by: Paul Eggert <eggert <at> cs.ucla.edu>

Date: Sat, 3 Sep 2011 05:34:01 UTC

Severity: normal

Tags: patch

Found in version 24.0.50

Done: Paul Eggert <eggert <at> cs.ucla.edu>

Bug is archived. No further changes may be made.

To add a comment to this bug, you must first unarchive it, by sending
a message to control AT debbugs.gnu.org, with unarchive 9428 in the body.
You can then email your comments to 9428 AT debbugs.gnu.org in the normal way.

Toggle the display of automated, internal messages from the tracker.

View this report as an mbox folder, status mbox, maintainer mbox


Report forwarded to owner <at> debbugs.gnu.org, bug-gnu-emacs <at> gnu.org:
bug#9428; Package emacs. (Sat, 03 Sep 2011 05:34:01 GMT) Full text and rfc822 format available.

Acknowledgement sent to Paul Eggert <eggert <at> cs.ucla.edu>:
New bug report received and forwarded. Copy sent to bug-gnu-emacs <at> gnu.org. (Sat, 03 Sep 2011 05:34:01 GMT) Full text and rfc822 format available.

Message #5 received at submit <at> debbugs.gnu.org (full text, mbox):

From: Paul Eggert <eggert <at> cs.ucla.edu>
To: bug-gnu-emacs <at> gnu.org
Subject: Emacs mishandles file offsets > 2**29 bytes
Date: Fri, 02 Sep 2011 22:29:36 -0700
Package: Emacs
Version: 24.0.50
Tags: patch

This patch fixes some integer-overflow issues with large files, mostly
on 32-bit hosts.  I plan to test it a bit more before installing.

=== modified file 'src/ChangeLog'
--- src/ChangeLog	2011-08-30 22:43:43 +0000
+++ src/ChangeLog	2011-09-03 05:23:17 +0000
@@ -1,3 +1,22 @@
+2011-09-03  Paul Eggert  <eggert <at> cs.ucla.edu>
+
+	* fileio.c: Fix bugs with large file offsets.
+	The previous code assumed that file offsets (off_t values) fit in
+	EMACS_INT variables, which is not true on typical 32-bit hosts.
+	The code messed up by falsely reporting buffer overflow in cases
+	such as (insert-file-contents "big" nil 1 2) into an empty buffer
+	when "big" contains more than 2**29 bytes, even though this
+	inserts just one byte and does not overflow the buffer.
+	(Finsert_file_contents): Store file offsets as off_t
+	values, not as EMACS_INT values.  Check for overflow when
+	converting between EMACS_INT and off_t.  When checking for
+	buffer overflow or for overlap, take the offsets into account.
+	Don't use EMACS_INT for small values where int suffices.
+	When checking for overlap, fix a typo: ZV was used where
+	ZV_BYTE was intended.
+	(Fwrite_region): Don't assume off_t fits into 'long'.
+	* buffer.h (struct buffer.modtime_size): Now off_t, not EMACS_INT.
+
 2011-08-30  Chong Yidong  <cyd <at> stupidchicken.com>
 
 	* syntax.c (find_defun_start): Update all cache variables if

=== modified file 'src/buffer.h'
--- src/buffer.h	2011-07-06 21:53:56 +0000
+++ src/buffer.h	2011-09-03 05:23:17 +0000
@@ -559,7 +559,7 @@
      is still the same (since it's rounded up to seconds) but we're actually
      not up-to-date.  -1 means the size is unknown.  Only meaningful if
      modtime is actually set.  */
-  EMACS_INT modtime_size;
+  off_t modtime_size;
   /* The value of text->modiff at the last auto-save.  */
   int auto_save_modified;
   /* The value of text->modiff at the last display error.

=== modified file 'src/fileio.c'
--- src/fileio.c	2011-07-19 20:37:27 +0000
+++ src/fileio.c	2011-09-03 05:23:17 +0000
@@ -3179,6 +3179,7 @@
   EMACS_INT inserted = 0;
   int nochange = 0;
   register EMACS_INT how_much;
+  off_t beg_offset, end_offset;
   register EMACS_INT unprocessed;
   int count = SPECPDL_INDEX ();
   struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
@@ -3284,15 +3285,6 @@
   record_unwind_protect (close_file_unwind, make_number (fd));
 
 
-  /* Check whether the size is too large or negative, which can happen on a
-     platform that allows file sizes greater than the maximum off_t value.  */
-  if (! not_regular
-      && ! (0 <= st.st_size && st.st_size <= BUF_BYTES_MAX))
-    buffer_overflow ();
-
-  /* Prevent redisplay optimizations.  */
-  current_buffer->clip_changed = 1;
-
   if (!NILP (visit))
     {
       if (!NILP (beg) || !NILP (end))
@@ -3302,25 +3294,63 @@
     }
 
   if (!NILP (beg))
-    CHECK_NUMBER (beg);
+    {
+      if (! (RANGED_INTEGERP (0, beg, TYPE_MAXIMUM (off_t))))
+	wrong_type_argument (intern ("file-offset"), beg);
+      beg_offset = XFASTINT (beg);
+    }
   else
-    XSETFASTINT (beg, 0);
+    beg_offset = 0;
 
   if (!NILP (end))
-    CHECK_NUMBER (end);
+    {
+      if (! (RANGED_INTEGERP (0, end, TYPE_MAXIMUM (off_t))))
+	wrong_type_argument (intern ("file-offset"), end);
+      end_offset = XFASTINT (end);
+    }
   else
     {
-      if (! not_regular)
+      if (not_regular)
+	end_offset = TYPE_MAXIMUM (off_t);
+      else
 	{
-	  XSETINT (end, st.st_size);
+	  end_offset = st.st_size;
+
+	  /* A negative size can happen on a platform that allows file
+	     sizes greater than the maximum off_t value.  */
+	  if (end_offset < 0)
+	    buffer_overflow ();
 
 	  /* The file size returned from stat may be zero, but data
 	     may be readable nonetheless, for example when this is a
 	     file in the /proc filesystem.  */
-	  if (st.st_size == 0)
-	    XSETINT (end, READ_BUF_SIZE);
-	}
-    }
+	  if (end_offset == 0)
+	    end_offset = READ_BUF_SIZE;
+	}
+    }
+
+  /* Check now whether the buffer will become too large,
+     in the likely case where the file's length is not changing.
+     This saves a lot of needless work before a buffer overflow.  */
+  if (! not_regular)
+    {
+      /* The likely offset where we will stop reading.  We could read
+	 more (or less), if the file grows (or shrinks) as we read it.  */
+      off_t likely_end = min (end_offset, st.st_size);
+
+      if (beg_offset < likely_end)
+	{
+	  ptrdiff_t buf_bytes =
+	    Z_BYTE - (!NILP (replace) ? ZV_BYTE - BEGV_BYTE  : 0);
+	  ptrdiff_t buf_growth_max = BUF_BYTES_MAX - buf_bytes;
+	  off_t likely_growth = likely_end - beg_offset;
+	  if (buf_growth_max < likely_growth)
+	    buffer_overflow ();
+	}
+    }
+
+  /* Prevent redisplay optimizations.  */
+  current_buffer->clip_changed = 1;
 
   if (EQ (Vcoding_system_for_read, Qauto_save_coding))
     {
@@ -3465,9 +3495,9 @@
 	 give up on handling REPLACE in the optimized way.  */
       int giveup_match_end = 0;
 
-      if (XINT (beg) != 0)
+      if (beg_offset != 0)
 	{
-	  if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0)
+	  if (lseek (fd, beg_offset, SEEK_SET) < 0)
 	    report_file_error ("Setting file position",
 			       Fcons (orig_filename, Qnil));
 	}
@@ -3515,7 +3545,7 @@
       immediate_quit = 0;
       /* If the file matches the buffer completely,
 	 there's no need to replace anything.  */
-      if (same_at_start - BEGV_BYTE == XINT (end))
+      if (same_at_start - BEGV_BYTE == end_offset)
 	{
 	  emacs_close (fd);
 	  specpdl_ptr--;
@@ -3530,16 +3560,17 @@
 	 already found that decoding is necessary, don't waste time.  */
       while (!giveup_match_end)
 	{
-	  EMACS_INT total_read, nread, bufpos, curpos, trial;
+	  int total_read, nread, bufpos, trial;
+	  off_t curpos;
 
 	  /* At what file position are we now scanning?  */
-	  curpos = XINT (end) - (ZV_BYTE - same_at_end);
+	  curpos = end_offset - (ZV_BYTE - same_at_end);
 	  /* If the entire file matches the buffer tail, stop the scan.  */
 	  if (curpos == 0)
 	    break;
 	  /* How much can we scan in the next step?  */
 	  trial = min (curpos, sizeof buffer);
-	  if (emacs_lseek (fd, curpos - trial, SEEK_SET) < 0)
+	  if (lseek (fd, curpos - trial, SEEK_SET) < 0)
 	    report_file_error ("Setting file position",
 			       Fcons (orig_filename, Qnil));
 
@@ -3606,13 +3637,14 @@
 
 	  /* Don't try to reuse the same piece of text twice.  */
 	  overlap = (same_at_start - BEGV_BYTE
-		     - (same_at_end + st.st_size - ZV));
+		     - (same_at_end
+			+ (! NILP (end) ? end_offset : st.st_size) - ZV_BYTE));
 	  if (overlap > 0)
 	    same_at_end += overlap;
 
 	  /* Arrange to read only the nonmatching middle part of the file.  */
-	  XSETFASTINT (beg, XINT (beg) + (same_at_start - BEGV_BYTE));
-	  XSETFASTINT (end, XINT (end) - (ZV_BYTE - same_at_end));
+	  beg_offset += same_at_start - BEGV_BYTE;
+	  end_offset -= ZV_BYTE - same_at_end;
 
 	  del_range_byte (same_at_start, same_at_end, 0);
 	  /* Insert from the file at the proper position.  */
@@ -3657,7 +3689,7 @@
       /* First read the whole file, performing code conversion into
 	 CONVERSION_BUFFER.  */
 
-      if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0)
+      if (lseek (fd, beg_offset, SEEK_SET) < 0)
 	report_file_error ("Setting file position",
 			   Fcons (orig_filename, Qnil));
 
@@ -3824,7 +3856,7 @@
     }
 
   if (! not_regular)
-    total = XINT (end) - XINT (beg);
+    total = end_offset - beg_offset;
   else
     /* For a special file, all we can do is guess.  */
     total = READ_BUF_SIZE;
@@ -3845,9 +3877,9 @@
   if (GAP_SIZE < total)
     make_gap (total - GAP_SIZE);
 
-  if (XINT (beg) != 0 || !NILP (replace))
+  if (beg_offset != 0 || !NILP (replace))
     {
-      if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0)
+      if (lseek (fd, beg_offset, SEEK_SET) < 0)
 	report_file_error ("Setting file position",
 			   Fcons (orig_filename, Qnil));
     }
@@ -4576,7 +4608,7 @@
 
   if (!NILP (append) && !NILP (Ffile_regular_p (filename)))
     {
-      long ret;
+      off_t ret;
 
       if (NUMBERP (append))
 	ret = emacs_lseek (desc, XINT (append), SEEK_CUR);




Reply sent to Paul Eggert <eggert <at> cs.ucla.edu>:
You have taken responsibility. (Tue, 06 Sep 2011 15:45:02 GMT) Full text and rfc822 format available.

Notification sent to Paul Eggert <eggert <at> cs.ucla.edu>:
bug acknowledged by developer. (Tue, 06 Sep 2011 15:45:03 GMT) Full text and rfc822 format available.

Message #10 received at 9428-done <at> debbugs.gnu.org (full text, mbox):

From: Paul Eggert <eggert <at> cs.ucla.edu>
To: 9428-done <at> debbugs.gnu.org
Subject: fix committed to trunk
Date: Tue, 06 Sep 2011 08:40:55 -0700
I committed the fix into the trunk
as bzr 105665 and am marking this as done.




bug archived. Request was from Debbugs Internal Request <help-debbugs <at> gnu.org> to internal_control <at> debbugs.gnu.org. (Wed, 05 Oct 2011 11:24:04 GMT) Full text and rfc822 format available.

This bug report was last modified 12 years and 113 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.