commit e18c24b35a7cf9bb1b91288b706fa448ed28a7c2 (HEAD, refs/remotes/origin/master) Author: Mattias Engdegård Date: Thu Apr 9 16:19:14 2020 +0200 Set last-coding-system-used upon ASCII conversion bypass (bug#40407) Spotted by Kazuhiro Ito. * src/coding.c (code_convert_string): Set Vlast_coding_system if appropriate. * test/src/coding-tests.el (coding-nocopy-ascii): Add test. diff --git a/src/coding.c b/src/coding.c index 450c498f1e..9848f983a8 100644 --- a/src/coding.c +++ b/src/coding.c @@ -9527,11 +9527,15 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system, && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix) || inhibit_eol_conversion || ! memchr (SDATA (string), encodep ? '\n' : '\r', bytes))) - return (nocopy - ? string - : (encodep - ? make_unibyte_string (SSDATA (string), bytes) - : make_multibyte_string (SSDATA (string), bytes, bytes))); + { + if (! norecord) + Vlast_coding_system_used = coding_system; + return (nocopy + ? string + : (encodep + ? make_unibyte_string (SSDATA (string), bytes) + : make_multibyte_string (SSDATA (string), bytes, bytes))); + } } else if (BUFFERP (dst_object)) { diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el index 9f6fac3edd..a741e233d3 100644 --- a/test/src/coding-tests.el +++ b/test/src/coding-tests.el @@ -393,7 +393,8 @@ (should-not (eq (decode-coding-string s coding nil) s)) (should-not (eq (encode-coding-string s coding nil) s)) (should (eq (decode-coding-string s coding t) s)) - (should (eq (encode-coding-string s coding t) s))) + (should (eq (encode-coding-string s coding t) s)) + (should (eq last-coding-system-used coding))) ;; With EOL conversion inhibited. (let ((inhibit-eol-conversion t)) commit 50618cc30381521f0e16c7813d48d0e5c821a076 Author: Eli Zaretskii Date: Thu Apr 9 16:38:02 2020 +0300 Fix display of boxed header-line * src/xdisp.c (init_iterator): Set IT->face_box_p flag for a boxed mode/header/tab line. (Bug#40521) diff --git a/src/xdisp.c b/src/xdisp.c index cbdef7ad11..193cc372b0 100644 --- a/src/xdisp.c +++ b/src/xdisp.c @@ -3218,7 +3218,10 @@ init_iterator (struct it *it, struct window *w, with a left box line. */ face = FACE_FROM_ID_OR_NULL (it->f, remapped_base_face_id); if (face && face->box != FACE_NO_BOX) - it->start_of_box_run_p = true; + { + it->face_box_p = true; + it->start_of_box_run_p = true; + } } /* If a buffer position was specified, set the iterator there, commit d3e2c88041b4844422bda64b1ee51678dc8a2e88 Author: Mattias Engdegård Date: Thu Apr 9 12:04:22 2020 +0200 Fix ASCII-only conversion logic (bug#40407) To sidestep conversion altogether when EOL conversion applies, we must either be encoding a string without NL, or decoding without CR. * src/coding.c (string_ascii_p): Revert to a pure predicate. (code_convert_string): Fix logic. Don't use uninitialised ascii_p (removed). Use memchr to detect CR or LF in string when needed. * test/src/coding-tests.el (coding-nocopy-ascii): Update tests to include encodings with explicit EOL conversions. diff --git a/src/coding.c b/src/coding.c index ffcb9cf0a1..450c498f1e 100644 --- a/src/coding.c +++ b/src/coding.c @@ -9474,22 +9474,15 @@ not fully specified.) */) return code_convert_region (start, end, coding_system, destination, 1, 0); } -/* Non-zero if STR contains only characters in the 0..127 range. - Positive if STR includes characters that don't need EOL conversion - on decoding, negative otherwise. */ -static int -string_ascii_p (Lisp_Object str) +/* Whether STRING only contains chars in the 0..127 range. */ +static bool +string_ascii_p (Lisp_Object string) { - ptrdiff_t nbytes = SBYTES (str); - bool CR_Seen = false; + ptrdiff_t nbytes = SBYTES (string); for (ptrdiff_t i = 0; i < nbytes; i++) - { - if (SREF (str, i) > 127) - return 0; - if (SREF (str, i) == '\r') - CR_Seen = true; - } - return CR_Seen ? -1 : 1; + if (SREF (string, i) > 127) + return false; + return true; } Lisp_Object @@ -9526,24 +9519,19 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system, if (EQ (dst_object, Qt)) { /* Fast path for ASCII-only input and an ASCII-compatible coding: - act as identity if no EOL conversion is neede. */ - int ascii_p; + act as identity if no EOL conversion is needed. */ Lisp_Object attrs = CODING_ID_ATTRS (coding.id); if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) && (STRING_MULTIBYTE (string) - ? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0))) - { - if (ascii_p > 0 - || (ascii_p < 0 - && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix) - || inhibit_eol_conversion))) - return (nocopy - ? string - : (encodep - ? make_unibyte_string (SSDATA (string), bytes) - : make_multibyte_string (SSDATA (string), - bytes, bytes))); - } + ? (chars == bytes) : string_ascii_p (string)) + && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix) + || inhibit_eol_conversion + || ! memchr (SDATA (string), encodep ? '\n' : '\r', bytes))) + return (nocopy + ? string + : (encodep + ? make_unibyte_string (SSDATA (string), bytes) + : make_multibyte_string (SSDATA (string), bytes, bytes))); } else if (BUFFERP (dst_object)) { diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el index 8d92bcdcd1..9f6fac3edd 100644 --- a/test/src/coding-tests.el +++ b/test/src/coding-tests.el @@ -388,29 +388,38 @@ (let* ((uni (apply #'string (number-sequence 0 127))) (multi (string-to-multibyte uni))) (dolist (s (list uni multi)) + ;; Encodings without EOL conversion. (dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix)) (should-not (eq (decode-coding-string s coding nil) s)) (should-not (eq (encode-coding-string s coding nil) s)) (should (eq (decode-coding-string s coding t) s)) - (should (eq (encode-coding-string s coding t) s))))) - (let* ((uni (apply #'string (number-sequence 15 127))) + (should (eq (encode-coding-string s coding t) s))) + + ;; With EOL conversion inhibited. + (let ((inhibit-eol-conversion t)) + (dolist (coding '(us-ascii iso-latin-1 utf-8)) + (should-not (eq (decode-coding-string s coding nil) s)) + (should-not (eq (encode-coding-string s coding nil) s)) + (should (eq (decode-coding-string s coding t) s)) + (should (eq (encode-coding-string s coding t) s)))))) + + ;; Check identity decoding with EOL conversion for ASCII except CR. + (let* ((uni (apply #'string (delq ?\r (number-sequence 0 127)))) (multi (string-to-multibyte uni))) (dolist (s (list uni multi)) - (dolist (coding '(us-ascii iso-latin-1 utf-8)) + (dolist (coding '(us-ascii-dos iso-latin-1-dos utf-8-dos mac-roman-mac)) (should-not (eq (decode-coding-string s coding nil) s)) - (should-not (eq (encode-coding-string s coding nil) s)) - (should (eq (decode-coding-string s coding t) s)) - (should (eq (encode-coding-string s coding t) s))))) - (let* ((uni (apply #'string (number-sequence 0 127))) - (multi (string-to-multibyte uni)) - (inhibit-eol-conversion t)) + (should (eq (decode-coding-string s coding t) s))))) + + ;; Check identity encoding with EOL conversion for ASCII except LF. + (let* ((uni (apply #'string (delq ?\n (number-sequence 0 127)))) + (multi (string-to-multibyte uni))) (dolist (s (list uni multi)) - (dolist (coding '(us-ascii iso-latin-1 utf-8)) - (should-not (eq (decode-coding-string s coding nil) s)) + (dolist (coding '(us-ascii-dos iso-latin-1-dos utf-8-dos mac-roman-mac)) (should-not (eq (encode-coding-string s coding nil) s)) - (should (eq (decode-coding-string s coding t) s)) (should (eq (encode-coding-string s coding t) s)))))) + (ert-deftest coding-check-coding-systems-region () (should (equal (check-coding-systems-region "aå" nil '(utf-8)) nil)) commit 95dd8de1df19a8529efb66257ac78789be62ca37 Author: Mattias Engdegård Date: Thu Apr 9 11:29:51 2020 +0200 chinese-hz is not ASCII compatible (bug#40407) * lisp/language/chinese.el: Disable :ascii-compatible-p for chinese-hz. * test/lisp/international/mule-tests.el (mule-hz): New test. diff --git a/lisp/language/chinese.el b/lisp/language/chinese.el index bc6969c139..4389db961d 100644 --- a/lisp/language/chinese.el +++ b/lisp/language/chinese.el @@ -103,6 +103,11 @@ (define-coding-system-alias 'hz-gb-2312 'chinese-hz) (define-coding-system-alias 'hz 'chinese-hz) +;; FIXME: 'define-coding-system' automatically sets :ascii-compatible-p, +;; to any encoding whose :coding-type is 'utf-8', but UTF-7 is not ASCII +;; compatible, so we override that here (bug#40407). +(coding-system-put 'chinese-hz :ascii-compatible-p nil) + (set-language-info-alist "Chinese-GB" '((charset chinese-gb2312 chinese-sisheng) (iso639-language . zh) diff --git a/test/lisp/international/mule-tests.el b/test/lisp/international/mule-tests.el index bb96943888..5f8e653d7c 100644 --- a/test/lisp/international/mule-tests.el +++ b/test/lisp/international/mule-tests.el @@ -65,6 +65,9 @@ (should (equal (encode-coding-string "あ" 'utf-7-imap) "&MEI-")) (should (equal (decode-coding-string "&MEI-" 'utf-7-imap) "あ"))) +(ert-deftest mule-hz () + ;; The chinese-hz encoding is not ASCII compatible. + (should-not (coding-system-get 'chinese-hz :ascii-compatible-p))) ;; Stop "Local Variables" above causing confusion when visiting this file. commit 786887cf439450ce7d8d6fbe624e8c434e50d469 Author: Mattias Engdegård Date: Wed Apr 8 17:13:39 2020 +0200 Don't crash with invalid argument in check-coding-systems-region * src/coding.c (Fcheck_coding_systems_region): Don't crash if the third arg contains something that isn't a coding system. * test/src/coding-tests.el (coding-check-coding-systems-region): New test. diff --git a/src/coding.c b/src/coding.c index c24c70c089..ffcb9cf0a1 100644 --- a/src/coding.c +++ b/src/coding.c @@ -9302,7 +9302,10 @@ is nil. */) for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail)) { elt = XCAR (tail); - attrs = AREF (CODING_SYSTEM_SPEC (elt), 0); + Lisp_Object spec = CODING_SYSTEM_SPEC (elt); + if (!VECTORP (spec)) + xsignal1 (Qcoding_system_error, elt); + attrs = AREF (spec, 0); ASET (attrs, coding_attr_trans_tbl, get_translation_table (attrs, 1, NULL)); list = Fcons (list2 (elt, attrs), list); diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el index 83a06b8179..8d92bcdcd1 100644 --- a/test/src/coding-tests.el +++ b/test/src/coding-tests.el @@ -411,6 +411,14 @@ (should (eq (decode-coding-string s coding t) s)) (should (eq (encode-coding-string s coding t) s)))))) +(ert-deftest coding-check-coding-systems-region () + (should (equal (check-coding-systems-region "aå" nil '(utf-8)) + nil)) + (should (equal (check-coding-systems-region "aåbγc" nil + '(utf-8 iso-latin-1 us-ascii)) + '((iso-latin-1 3) (us-ascii 1 3)))) + (should-error (check-coding-systems-region "å" nil '(bad-coding-system)))) + ;; Local Variables: ;; byte-compile-warnings: (not obsolete) ;; End: commit 402cbc5be26827244075dbe14288e7722290f83a Author: Mattias Engdegård Date: Mon Apr 6 15:20:08 2020 +0200 Don't rely on copying in {EN,DE}CODE_FILE Callers of ENCODE_FILE and DECODE_FILE should not assume that these functions always return a new string (bug#40407). * src/w32fns.c (Fw32_shell_execute): * src/w32proc.c (Fw32_application_type): Sink taking the address of a Lisp string past GC points. Copy values returned from ENCODE_FILE before mutating them. diff --git a/src/w32fns.c b/src/w32fns.c index 9bb4e27b01..8d714f0b8d 100644 --- a/src/w32fns.c +++ b/src/w32fns.c @@ -8258,7 +8258,6 @@ a ShowWindow flag: /* Encode filename, current directory and parameters. */ current_dir = GUI_ENCODE_FILE (current_dir); document = GUI_ENCODE_FILE (document); - doc_w = GUI_SDATA (document); if (STRINGP (parameters)) { parameters = GUI_ENCODE_SYSTEM (parameters); @@ -8269,6 +8268,7 @@ a ShowWindow flag: operation = GUI_ENCODE_SYSTEM (operation); ops_w = GUI_SDATA (operation); } + doc_w = GUI_SDATA (document); result = (intptr_t) ShellExecuteW (NULL, ops_w, doc_w, params_w, GUI_SDATA (current_dir), (FIXNUMP (show_flag) @@ -8353,7 +8353,7 @@ a ShowWindow flag: handler = Ffind_file_name_handler (absdoc, Qfile_exists_p); if (NILP (handler)) { - Lisp_Object absdoc_encoded = ENCODE_FILE (absdoc); + Lisp_Object absdoc_encoded = Fcopy_sequence (ENCODE_FILE (absdoc)); if (faccessat (AT_FDCWD, SSDATA (absdoc_encoded), F_OK, AT_EACCESS) == 0) { diff --git a/src/w32proc.c b/src/w32proc.c index de33726905..16e32e4c58 100644 --- a/src/w32proc.c +++ b/src/w32proc.c @@ -3231,7 +3231,7 @@ such programs cannot be invoked by Emacs anyway. */) char *progname, progname_a[MAX_PATH]; program = Fexpand_file_name (program, Qnil); - encoded_progname = ENCODE_FILE (program); + encoded_progname = Fcopy_sequence (ENCODE_FILE (program)); progname = SSDATA (encoded_progname); unixtodos_filename (progname); filename_to_ansi (progname, progname_a); commit 20eb4247cdc2eeada43c3de8abf9c577be38c98f Author: Eli Zaretskii Date: Thu Apr 9 12:20:24 2020 +0300 Fix decoding of ASCII strings with embedded CR characters * src/coding.c (string_ascii_p): Return a negative value if an all-ASCII string STR includes the CR character, otherwise a positive value. (code_convert_string): If the string is ASCII, but includes CR characters, use the fast path only if EOL doesn't need to be decoded. (Bug#40519) * test/src/coding-tests.el (coding-nocopy-ascii): Add more tests for bug#40519. diff --git a/src/coding.c b/src/coding.c index 24a832ff3e..c24c70c089 100644 --- a/src/coding.c +++ b/src/coding.c @@ -9471,7 +9471,7 @@ not fully specified.) */) return code_convert_region (start, end, coding_system, destination, 1, 0); } -/* Non-zero if STR contains only characterss in the 0..127 range. +/* Non-zero if STR contains only characters in the 0..127 range. Positive if STR includes characters that don't need EOL conversion on decoding, negative otherwise. */ static int @@ -9523,7 +9523,7 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system, if (EQ (dst_object, Qt)) { /* Fast path for ASCII-only input and an ASCII-compatible coding: - act as identity. */ + act as identity if no EOL conversion is neede. */ int ascii_p; Lisp_Object attrs = CODING_ID_ATTRS (coding.id); if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) commit faf996dc6e963a8dd74e9e794ded0467dd78ea18 Author: Eli Zaretskii Date: Thu Apr 9 12:18:30 2020 +0300 Fix decoding ASCII strings with embedded CR characters * src/coding.c (string_ascii_p): Return a negative value if an all-ASCII string STR includes the CR character, otherwise a positive value. (code_convert_string): If the string is ASCII, but includes CR characters, use the fast path only if EOL doesn't need to be decoded. (Bug#40519) * test/src/coding-tests.el (coding-nocopy-ascii): Add tests for bug#40519. diff --git a/src/coding.c b/src/coding.c index 49c1e625d5..24a832ff3e 100644 --- a/src/coding.c +++ b/src/coding.c @@ -9471,15 +9471,22 @@ not fully specified.) */) return code_convert_region (start, end, coding_system, destination, 1, 0); } -/* Whether a string only contains chars in the 0..127 range. */ -static bool +/* Non-zero if STR contains only characterss in the 0..127 range. + Positive if STR includes characters that don't need EOL conversion + on decoding, negative otherwise. */ +static int string_ascii_p (Lisp_Object str) { ptrdiff_t nbytes = SBYTES (str); + bool CR_Seen = false; for (ptrdiff_t i = 0; i < nbytes; i++) - if (SREF (str, i) > 127) - return false; - return true; + { + if (SREF (str, i) > 127) + return 0; + if (SREF (str, i) == '\r') + CR_Seen = true; + } + return CR_Seen ? -1 : 1; } Lisp_Object @@ -9517,15 +9524,23 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system, { /* Fast path for ASCII-only input and an ASCII-compatible coding: act as identity. */ + int ascii_p; Lisp_Object attrs = CODING_ID_ATTRS (coding.id); if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) && (STRING_MULTIBYTE (string) - ? (chars == bytes) : string_ascii_p (string))) - return (nocopy - ? string - : (encodep - ? make_unibyte_string (SSDATA (string), bytes) - : make_multibyte_string (SSDATA (string), bytes, bytes))); + ? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0))) + { + if (ascii_p > 0 + || (ascii_p < 0 + && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix) + || inhibit_eol_conversion))) + return (nocopy + ? string + : (encodep + ? make_unibyte_string (SSDATA (string), bytes) + : make_multibyte_string (SSDATA (string), + bytes, bytes))); + } } else if (BUFFERP (dst_object)) { diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el index 93e6709d44..83a06b8179 100644 --- a/test/src/coding-tests.el +++ b/test/src/coding-tests.el @@ -387,6 +387,23 @@ "Check that the NOCOPY parameter works for ASCII-only strings." (let* ((uni (apply #'string (number-sequence 0 127))) (multi (string-to-multibyte uni))) + (dolist (s (list uni multi)) + (dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix)) + (should-not (eq (decode-coding-string s coding nil) s)) + (should-not (eq (encode-coding-string s coding nil) s)) + (should (eq (decode-coding-string s coding t) s)) + (should (eq (encode-coding-string s coding t) s))))) + (let* ((uni (apply #'string (number-sequence 15 127))) + (multi (string-to-multibyte uni))) + (dolist (s (list uni multi)) + (dolist (coding '(us-ascii iso-latin-1 utf-8)) + (should-not (eq (decode-coding-string s coding nil) s)) + (should-not (eq (encode-coding-string s coding nil) s)) + (should (eq (decode-coding-string s coding t) s)) + (should (eq (encode-coding-string s coding t) s))))) + (let* ((uni (apply #'string (number-sequence 0 127))) + (multi (string-to-multibyte uni)) + (inhibit-eol-conversion t)) (dolist (s (list uni multi)) (dolist (coding '(us-ascii iso-latin-1 utf-8)) (should-not (eq (decode-coding-string s coding nil) s))