GNU bug report logs - #48758
[PATCH] Elisp reader does not support non-decimal integers

Previous Next

Package: guile;

Reported by: Ivan Sokolov <ivan-p-sokolov <at> ya.ru>

Date: Mon, 31 May 2021 12:06:01 UTC

Severity: normal

Tags: patch

To reply to this bug, email your comments to 48758 AT debbugs.gnu.org.

Toggle the display of automated, internal messages from the tracker.

View this report as an mbox folder, status mbox, maintainer mbox


Report forwarded to bug-guile <at> gnu.org:
bug#48758; Package guile. (Mon, 31 May 2021 12:06:02 GMT) Full text and rfc822 format available.

Acknowledgement sent to Ivan Sokolov <ivan-p-sokolov <at> ya.ru>:
New bug report received and forwarded. Copy sent to bug-guile <at> gnu.org. (Mon, 31 May 2021 12:06:02 GMT) Full text and rfc822 format available.

Message #5 received at submit <at> debbugs.gnu.org (full text, mbox):

From: Ivan Sokolov <ivan-p-sokolov <at> ya.ru>
To: bug-guile <at> gnu.org
Subject: [PATCH] Elisp reader does not support non-decimal integers
Date: Mon, 31 May 2021 15:04:42 +0300
[Message part 1 (text/plain, inline)]
This patch improves Elisp compatibility.

I added support for binary, octal, hexadecimal and arbitrary radix
integer literals as described in Elisp manual [1], except for the final
period. It should not take long to add support for the final period, but
in this patch I did not do this because Emacs itself does not support
final period for non-decimal integers.

[1]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Integer-Basics.html

[elisp-numbers.diff (text/x-patch, inline)]
diff --git a/module/language/elisp/lexer.scm b/module/language/elisp/lexer.scm
index 5a0e6b3ff..1066ed0c2 100644
--- a/module/language/elisp/lexer.scm
+++ b/module/language/elisp/lexer.scm
@@ -186,9 +186,22 @@
 ;;; against regular expressions to determine if it is possibly an
 ;;; integer or a float.
 
-(define integer-regex (make-regexp "^[+-]?[0-9]+\\.?$"))
-
-(define float-regex
+(define make-integer-regexp
+  (let ((alphabet "0123456789abcdefghijklmnopqrstuvwxyz"))
+    (lambda (radix)
+      (unless (<= 2 radix 36)
+        (error "invalid radix" radix))
+      (let ((pat (string-append "^[+-]?[" (string-take alphabet radix) "]+$")))
+        (make-regexp pat regexp/icase)))))
+
+(define get-integer-regexp
+  (let ((ht (make-hash-table)))
+    (hash-set! ht 10 (make-regexp "^[+-]?[0-9]+\\.?$"))
+    (lambda (radix)
+      (or (hash-ref ht radix)
+          (hash-set! ht radix (make-integer-regexp radix))))))
+
+(define float-regexp
   (make-regexp
    "^[+-]?([0-9]+\\.?[0-9]*|[0-9]*\\.?[0-9]+)(e[+-]?[0-9]+)?$"))
 
@@ -197,7 +210,9 @@
 
 (define no-escape-punctuation (string->char-set "-+=*/_~!@$%^&:<>{}?."))
 
-(define (get-symbol-or-number port)
+(define (get-symbol-or-number port radix)
+  (define integer-regexp
+    (and=> radix get-integer-regexp))
   (let iterate ((result-chars '())
                 (had-escape #f))
     (let* ((c (read-char port))
@@ -207,10 +222,11 @@
                        (values
                         (cond
                          ((and (not had-escape)
-                               (regexp-exec integer-regex result))
+                               (regexp? integer-regexp)
+                               (regexp-exec integer-regexp result))
                           'integer)
                          ((and (not had-escape)
-                               (regexp-exec float-regex result))
+                               (regexp-exec float-regexp result))
                           'float)
                          (else 'symbol))
                         result))))
@@ -228,11 +244,20 @@
         (unread-char c port)
         (finish))))))
 
+(define (string->integer str radix)
+  (let* ((num (string->number str radix))
+         (exact (and=> num inexact->exact)))
+    (unless (number? num)
+      (error "expected number" str))
+    (unless (integer? exact)
+      (error "expected integer" str))
+    exact))
+
 ;;; Parse a circular structure marker without the leading # (which was
 ;;; already read and recognized), that is, a number as identifier and
 ;;; then either = or #.
 
-(define (get-circular-marker port)
+(define (get-circular-marker-or-number port)
   (call-with-values
       (lambda ()
         (let iterate ((result 0))
@@ -241,13 +266,20 @@
                 (let ((val (- (char->integer cur) (char->integer #\0))))
                   (iterate (+ (* result 10) val)))
                 (values result cur)))))
-    (lambda (id type)
-      (case type
-        ((#\#) `(circular-ref . ,id))
-        ((#\=) `(circular-def . ,id))
-        (else (lexer-error port
-                           "invalid circular marker character"
-                           type))))))
+    (lambda (result last-ch)
+      (case last-ch
+        ((#\#) `(circular-ref . ,result))
+        ((#\=) `(circular-def . ,result))
+        ((#\r)
+         (call-with-values
+             (lambda ()
+               (get-symbol-or-number port result))
+           (lambda (type str)
+             (unless (eq? type 'integer)
+               (error "invalid integer read syntax" type str result))
+             `(integer . ,(string->integer str result)))))
+        (else
+         (lexer-error port "invalid circular marker character" last-ch))))))
 
 ;;; Main lexer routine, which is given a port and does look for the next
 ;;; token.
@@ -334,13 +366,21 @@
           (case c
             ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
              (unread-char c port)
-             (let ((mark (get-circular-marker port)))
+             (let ((mark (get-circular-marker-or-number port)))
                (return (car mark) (cdr mark))))
             ((#\')
              (return 'function #f))
+            ((#\b #\o #\x)
+             (let ((radix (case c ((#\b) 2) ((#\o) 8) ((#\x) 16))))
+               (call-with-values
+                   (lambda () (get-symbol-or-number port radix))
+                 (lambda (type str)
+                   (unless (eq? type 'integer)
+                     (error "invalid integer read syntax" type str radix))
+                   (return 'integer (string->integer str radix))))))
             ((#\:)
              (call-with-values
-                 (lambda () (get-symbol-or-number port))
+                 (lambda () (get-symbol-or-number port #f))
                (lambda (type str)
                  (return 'symbol (make-symbol str))))))))
         ;; Parentheses and other special-meaning single characters.
@@ -363,7 +403,7 @@
         (else
          (unread-char c port)
          (call-with-values
-             (lambda () (get-symbol-or-number port))
+             (lambda () (get-symbol-or-number port 10))
            (lambda (type str)
              (case type
                ((symbol)
@@ -384,12 +424,7 @@
                 ;; string->number returns an inexact real.  Thus we need
                 ;; a conversion here, but it should always result in an
                 ;; integer!
-                (return
-                 'integer
-                 (let ((num (inexact->exact (string->number str))))
-                   (if (not (integer? num))
-                       (error "expected integer" str num))
-                   num)))
+                (return 'integer (string->integer str 10)))
                ((float)
                 (return 'float (let ((num (string->number str)))
                                  (if (exact? num)
diff --git a/test-suite/tests/elisp-reader.test b/test-suite/tests/elisp-reader.test
index 669c4d592..b84e4756f 100644
--- a/test-suite/tests/elisp-reader.test
+++ b/test-suite/tests/elisp-reader.test
@@ -75,21 +75,45 @@
               (symbol . abc) (paren-open . #f) (symbol . def)
               (paren-close . #f) (symbol . ghi) (symbol . .e5))))
 
-  ; Here we make use of the property that exact/inexact numbers are not equal?
-  ; even when they have the same numeric value!
-  (pass-if "integers (decimal)"
-    (equal? (lex-string "-1 1 1. +1 01234")
-            '((integer . -1) (integer . 1) (integer . 1) (integer . 1)
-              (integer . 1234))))
-  (pass-if "integers (binary)"
-    (equal? (lex-string "#b-1 #b1 #b+1 #b10011010010")
-            '((integer . -1) (integer . 1) (integer . 1) (integer . 1234))))
-  (pass-if "integers (octal)"
-    (equal? (lex-string "#o-1 #o1 #o+1 #o2322")
-            '((integer . -1) (integer . 1) (integer . 1) (integer . 1234))))
-  (pass-if "integers (hexadecimal)"
-    (equal? (lex-string "#x-1 #x1 #x+1 #x4d2")
-            '((integer . -1) (integer . 1) (integer . 1) (integer . 1234))))
+  (with-test-prefix "integers"
+    ;; Here we make use of the property that exact/inexact numbers are not equal?
+    ;; even when they have the same numeric value!
+
+    (with-test-prefix "decimal"
+      (pass-if-equal "simple" '((integer . 1)) (lex-string "1"))
+      (pass-if-equal "positive" '((integer . 1)) (lex-string "+1"))
+      (pass-if-equal "negative" '((integer . -1)) (lex-string "-1"))
+      (pass-if-equal "trailing dot" '((integer . 1)) (lex-string "1."))
+      (pass-if-equal "all digits"
+          '((integer . 123456789)) (lex-string "0123456789")))
+
+    (with-test-prefix "binary"
+      (pass-if-equal "simple" '((integer . 1)) (lex-string "#b1"))
+      (pass-if-equal "positive" '((integer . 1)) (lex-string "#b+1"))
+      (pass-if-equal "negative" '((integer . -1)) (lex-string "#b-1"))
+      (pass-if-equal "all digits"
+          '((integer . #b010101)) (lex-string "#b010101")))
+
+    (with-test-prefix "octal"
+      (pass-if-equal "simple" '((integer . 1)) (lex-string "#o1"))
+      (pass-if-equal "positive" '((integer . 1)) (lex-string "#o+1"))
+      (pass-if-equal "negative" '((integer . -1)) (lex-string "#o-1"))
+      (pass-if-equal "all digits"
+          '((integer . #o01234567)) (lex-string "#o01234567")))
+
+    (with-test-prefix "hexadecimal"
+      (pass-if-equal "simple" '((integer . 1)) (lex-string "#x1"))
+      (pass-if-equal "positive" '((integer . 1)) (lex-string "#x+1"))
+      (pass-if-equal "negative" '((integer . -1)) (lex-string "#x-1"))
+      (pass-if-equal "all digits"
+          '((integer . #x0123456789abcdef)) (lex-string "#x0123456789aBcDeF")))
+
+    (with-test-prefix "arbitrary radix"
+      (pass-if-equal "simple" '((integer . 44)) (lex-string "#24r1k"))
+      (pass-if-equal "positive" '((integer . 44)) (lex-string "#24r+1k"))
+      (pass-if-equal "negative" '((integer . -44)) (lex-string "#24r-1k"))
+      (pass-if-equal "max radix" '((integer . 35)) (lex-string "#36rz"))))
+
   (pass-if "floats"
     (equal? (lex-string "1500.0 15e2 15.e2 1.5e3 .15e4 -.345e-2")
             '((float . 1500.0) (float . 1500.0) (float . 1500.0)

This bug report was last modified 2 years and 330 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.