commit 21af3a9d9706baa417298e70260efa3fce72c6f1 (HEAD, refs/remotes/origin/master) Author: Po Lu Date: Sat Mar 30 15:05:30 2024 +0800 Disable VC in special directories on Android * lisp/vc/vc-hooks.el (vc-registered, vc-backend): Return nil without invoking any backend if FILE or FILE-OR-LIST sits within /content or /assets. diff --git a/lisp/vc/vc-hooks.el b/lisp/vc/vc-hooks.el index 75f68dd80d1..8f212e96933 100644 --- a/lisp/vc/vc-hooks.el +++ b/lisp/vc/vc-hooks.el @@ -326,30 +326,37 @@ This function performs the check each time it is called. To rely on the result of a previous call, use `vc-backend' instead. If the file was previously registered under a certain backend, then that backend is tried first." - (let (handler) - (cond - ((and (file-name-directory file) - (string-match vc-ignore-dir-regexp (file-name-directory file))) - nil) - ((setq handler (find-file-name-handler file 'vc-registered)) - ;; handler should set vc-backend and return t if registered - (funcall handler 'vc-registered file)) - (t - ;; There is no file name handler. - ;; Try vc-BACKEND-registered for each handled BACKEND. - (catch 'found - (let ((backend (vc-file-getprop file 'vc-backend))) - (mapc - (lambda (b) - (and (vc-call-backend b 'registered file) - (vc-file-setprop file 'vc-backend b) - (throw 'found t))) - (if (or (not backend) (eq backend 'none)) - vc-handled-backends - (cons backend vc-handled-backends)))) - ;; File is not registered. - (vc-file-setprop file 'vc-backend 'none) - nil))))) + ;; Subprocesses (and with them, VC backends) can't run from /contents + ;; or /actions, which are fictions maintained by Emacs that do not + ;; exist in the filesystem. + (if (and (eq system-type 'android) + (string-match-p "/\\(content\\|assets\\)[/$]" + (expand-file-name file))) + nil + (let (handler) + (cond + ((and (file-name-directory file) + (string-match vc-ignore-dir-regexp (file-name-directory file))) + nil) + ((setq handler (find-file-name-handler file 'vc-registered)) + ;; handler should set vc-backend and return t if registered + (funcall handler 'vc-registered file)) + (t + ;; There is no file name handler. + ;; Try vc-BACKEND-registered for each handled BACKEND. + (catch 'found + (let ((backend (vc-file-getprop file 'vc-backend))) + (mapc + (lambda (b) + (and (vc-call-backend b 'registered file) + (vc-file-setprop file 'vc-backend b) + (throw 'found t))) + (if (or (not backend) (eq backend 'none)) + vc-handled-backends + (cons backend vc-handled-backends)))) + ;; File is not registered. + (vc-file-setprop file 'vc-backend 'none) + nil)))))) (defun vc-backend (file-or-list) "Return the version control type of FILE-OR-LIST, nil if it's not registered. @@ -357,15 +364,22 @@ If the argument is a list, the files must all have the same back end." ;; `file' can be nil in several places (typically due to the use of ;; code like (vc-backend buffer-file-name)). (cond ((stringp file-or-list) - (let ((property (vc-file-getprop file-or-list 'vc-backend))) - ;; Note that internally, Emacs remembers unregistered - ;; files by setting the property to `none'. - (cond ((eq property 'none) nil) - (property) - ;; vc-registered sets the vc-backend property - (t (if (vc-registered file-or-list) - (vc-file-getprop file-or-list 'vc-backend) - nil))))) + ;; Subprocesses (and with them, VC backends) can't run from + ;; /contents or /actions, which are fictions maintained by + ;; Emacs that do not exist in the filesystem. + (if (and (eq system-type 'android) + (string-match-p "/\\(content\\|assets\\)[/$]" + (expand-file-name file-or-list))) + nil + (let ((property (vc-file-getprop file-or-list 'vc-backend))) + ;; Note that internally, Emacs remembers unregistered + ;; files by setting the property to `none'. + (cond ((eq property 'none) nil) + (property) + ;; vc-registered sets the vc-backend property + (t (if (vc-registered file-or-list) + (vc-file-getprop file-or-list 'vc-backend) + nil)))))) ((and file-or-list (listp file-or-list)) (vc-backend (car file-or-list))) (t commit 4c9926fed157810199695167ba8542af13b04ad3 Author: Po Lu Date: Sat Mar 30 09:59:36 2024 +0800 Rationalize java/Makefile.in * java/Makefile.in: (emacs.apk-in): Bring commentary up to date, and package classes.dex at this stage of the process. ($(APK_NAME)): Adjust to match. diff --git a/java/Makefile.in b/java/Makefile.in index 60bd2ea086b..c23b52ed44e 100644 --- a/java/Makefile.in +++ b/java/Makefile.in @@ -256,15 +256,15 @@ install_temp/assets/build_info: install_temp emacs.apk-in: install_temp install_temp/assets/directory-tree \ AndroidManifest.xml install_temp/assets/version \ - install_temp/assets/build_info -# Package everything. Specifying the assets on this command line is -# necessary for AAssetManager_getNextFileName to work on old versions -# of Android. Make sure not to generate R.java, as it's already been -# generated. + install_temp/assets/build_info classes.dex +# Package everything. Redirect the generated R.java to install_temp, as +# it must already have been generated as a prerequisite of +# classes.dex's. $(AM_V_AAPT) $(AAPT) p -I "$(ANDROID_JAR)" -F $@ \ -f -M AndroidManifest.xml $(AAPT_ASSET_ARGS) \ -A install_temp/assets \ -S $(top_srcdir)/java/res -J install_temp + $(AM_V_SILENT) $(AAPT) a $@ classes.dex $(AM_V_SILENT) pushd install_temp &> /dev/null; \ $(AAPT) add ../$@ `find lib -type f`; \ popd &> /dev/null @@ -311,10 +311,9 @@ classes.dex: $(CLASS_FILES) .PHONY: clean maintainer-clean -$(APK_NAME): classes.dex emacs.apk-in $(srcdir)/emacs.keystore +$(APK_NAME): emacs.apk-in $(srcdir)/emacs.keystore $(AM_V_GEN) $(AM_V_SILENT) cp -f emacs.apk-in $@.unaligned - $(AM_V_SILENT) $(AAPT) add $@.unaligned classes.dex $(AM_V_SILENT) $(JARSIGNER) $(SIGN_EMACS) $@.unaligned "Emacs keystore" $(AM_V_SILENT) $(ZIPALIGN) -f 4 $@.unaligned $@ # Signing must happen after alignment! commit bfbddf65245e179ef25c3b9b2699515b2d33ecca Author: Po Lu Date: Sat Mar 30 08:58:59 2024 +0800 Fix building the SFNT font driver * src/sfntfont.c (sfntfont_list, sfntfont_list_family): Update calls to Fsort for the new calling convention. diff --git a/src/sfntfont.c b/src/sfntfont.c index 3be770f650e..fb3feaeaf79 100644 --- a/src/sfntfont.c +++ b/src/sfntfont.c @@ -2029,7 +2029,7 @@ sfntfont_list (struct frame *f, Lisp_Object font_spec) caller) ordered first. */ XSETSUBR (compare_font_entities, &Scompare_font_entities.s); - matching = Fsort (matching, compare_font_entities); + matching = CALLN (Fsort, matching, compare_font_entities); return matching; } @@ -3779,7 +3779,7 @@ sfntfont_list_family (struct frame *f) families = Fcons (desc->family, families); /* Sort families in preparation for removing duplicates. */ - families = Fsort (families, Qstring_lessp); + families = CALLN (Fsort, families, Qstring_lessp); /* Remove each duplicate within families. */ commit 717e7edc2ac1e4e04019319da19c5386077dfbea Author: Stefan Monnier Date: Fri Mar 29 15:36:45 2024 -0400 * lisp/emacs-lisp/comp.el (comp--add-cond-cstrs): Consolidate 2 cases diff --git a/lisp/emacs-lisp/comp.el b/lisp/emacs-lisp/comp.el index 2544be85bb2..2ec55ed98ee 100644 --- a/lisp/emacs-lisp/comp.el +++ b/lisp/emacs-lisp/comp.el @@ -2036,37 +2036,23 @@ TARGET-BB-SYM is the symbol name of the target block." (,(pred comp--call-op-p) ,(and (pred comp--known-predicate-p) fun) ,op)) - ;; (comment ,_comment-str) - (cond-jump ,cmp-res ,(pred comp-mvar-p) . ,blocks)) - (cl-loop - with target-mvar = (comp--cond-cstrs-target-mvar op (car insns-seq) b) - for branch-target-cell on blocks - for branch-target = (car branch-target-cell) - for negated in '(t nil) - when (comp--mvar-used-p target-mvar) - do - (let ((block-target (comp--add-cond-cstrs-target-block b branch-target))) - (setf (car branch-target-cell) (comp-block-name block-target)) - (comp--emit-assume 'and target-mvar (if negated - (comp--pred-to-neg-cstr fun) - (comp--pred-to-pos-cstr fun)) - block-target nil)) - finally (cl-return-from in-the-basic-block))) - ;; Match predicate on the negated branch (unless). - (`((set ,(and (pred comp-mvar-p) cmp-res) - (,(pred comp--call-op-p) - ,(and (pred comp--known-predicate-p) fun) - ,op)) - (set ,neg-cmp-res (call eq ,cmp-res ,(pred comp-cstr-null-p))) - (cond-jump ,neg-cmp-res ,(pred comp-mvar-p) . ,blocks)) + . ,(or + ;; (comment ,_comment-str) + (and `((cond-jump ,cmp-res ,(pred comp-mvar-p) . ,blocks)) + (let negated-branch nil)) + (and `((set ,neg-cmp-res + (call eq ,cmp-res ,(pred comp-cstr-null-p))) + (cond-jump ,neg-cmp-res ,(pred comp-mvar-p) . ,blocks)) + (let negated-branch t)))) (cl-loop with target-mvar = (comp--cond-cstrs-target-mvar op (car insns-seq) b) for branch-target-cell on blocks for branch-target = (car branch-target-cell) - for negated in '(nil t) + for negated in (if negated-branch '(nil t) '(t nil)) when (comp--mvar-used-p target-mvar) do - (let ((block-target (comp--add-cond-cstrs-target-block b branch-target))) + (let ((block-target (comp--add-cond-cstrs-target-block + b branch-target))) (setf (car branch-target-cell) (comp-block-name block-target)) (comp--emit-assume 'and target-mvar (if negated (comp--pred-to-neg-cstr fun) commit dd3e13469d75851f3d7907e3373d45032382a5f5 Author: Stefan Monnier Date: Fri Mar 29 15:32:48 2024 -0400 * lisp/cedet/semantic/tag.el (semantic-tag): New type diff --git a/lisp/cedet/semantic/tag.el b/lisp/cedet/semantic/tag.el index 18a0b4caee2..a0843dd5df9 100644 --- a/lisp/cedet/semantic/tag.el +++ b/lisp/cedet/semantic/tag.el @@ -349,6 +349,9 @@ If TAG is unlinked, but has a :filename property, then that is used." ;; If an error occurs, then it most certainly is not a tag. (error nil))) +;; Used in `semantic-utest-ia.el'. +(cl-deftype semantic-tag () `(satisfies semantic-tag-p)) + (defsubst semantic-tag-of-class-p (tag class) "Return non-nil if class of TAG is CLASS." (eq (semantic-tag-class tag) class)) commit 42322257ba9abdb8bcc2aceb34a27f261df070aa Author: Eli Zaretskii Date: Fri Mar 29 18:26:38 2024 +0300 ; * lisp/shell.el (w32-application-type): Fix 'declare-function'. diff --git a/lisp/shell.el b/lisp/shell.el index 8a5218ae847..cd49d289403 100644 --- a/lisp/shell.el +++ b/lisp/shell.el @@ -606,7 +606,7 @@ Shell buffers. It implements `shell-completion-execonly' for (defvar sh-shell-file) -(declare-function w32-application-type nil ; "src/w32proc.c" +(declare-function w32-application-type "w32proc.c" (program) t) (define-derived-mode shell-mode comint-mode "Shell" commit f04bd5568708f96dfad0e8240c7f8f23c90b6813 Author: Mattias Engdegård Date: Fri Mar 29 15:25:22 2024 +0100 `value<` manual entry adjustments (bug#69709) * doc/lispref/sequences.texi (Sequence Functions): Explain lexicographical ordering. Note the dual nature of `nil`. Mention the depth limit. diff --git a/doc/lispref/sequences.texi b/doc/lispref/sequences.texi index 4a4241b92c9..c9e47624878 100644 --- a/doc/lispref/sequences.texi +++ b/doc/lispref/sequences.texi @@ -461,7 +461,7 @@ This function returns non-@code{nil} if @var{a} comes before @var{b} in the standard sorting order; this means that it returns @code{nil} when @var{b} comes before @var{a}, or if they are equal or unordered. -the arguments @var{a} and @var{b} must have the same type. +The arguments @var{a} and @var{b} must have the same type. Specifically: @itemize @bullet @@ -471,7 +471,11 @@ Numbers are compared using @code{<} (@pxref{definition of <}). Strings are compared using @code{string<} (@pxref{definition of string<}) and symbols are compared by comparing their names as strings. @item -Conses, lists, vectors and records are compared lexicographically. +Conses, lists, vectors and records are compared lexicographically. This +means that the two sequences are compared element-wise from left to +right until they differ, and the result is then that of @code{value<} on +the first pair of differing elements. If one sequence runs out of +elements before the other, the shorter sequence comes before the longer. @item Markers are compared first by buffer, then by position. @item @@ -489,8 +493,22 @@ Examples: (value< "dog" "cat") @result{} nil (value< 'yip 'yip) @result{} nil (value< '(3 2) '(3 2 0)) @result{} t -(value< [3 2 1] [3 2 0]) @result{} nil +(value< [3 2 "a"] [3 2 "b"]) @result{} t @end example + +@noindent +Note that @code{nil} is treated as either a symbol or an empty list, +depending on what it is compared against: + +@example +(value< nil '(0)) @result{} t +(value< 'nib nil) @result{} t +@end example + +@noindent +There is no limit to the length of sequences (lists, vectors and so on) +that can be compared, but @code{value<} may fail with an error if used +to compare circular or deeply nested data structures. @end defun @cindex sequence functions in seq commit d2d5e514397c453bbaa6e7fc3441af2d538eb3cf Author: Mattias Engdegård Date: Fri Mar 29 15:23:56 2024 +0100 * src/fns.c (Fvaluelt): More generous depth limit (20 -> 200). This gives `value<` the same limit as `equal` which seems about right. diff --git a/src/fns.c b/src/fns.c index 8d8783713ab..db5e856d5bd 100644 --- a/src/fns.c +++ b/src/fns.c @@ -3201,7 +3201,7 @@ Buffers and processes are compared by name. Other types are considered unordered and the return value will be `nil'. */) (Lisp_Object a, Lisp_Object b) { - int maxdepth = 20; /* FIXME: arbitrary value */ + int maxdepth = 200; /* FIXME: arbitrary value */ return value_cmp (a, b, maxdepth) < 0 ? Qt : Qnil; } commit 6f7cb96543285dc8e37135abaec87d0b9a40e2e2 Author: Sam Steingold Date: Fri Mar 29 10:36:43 2024 -0400 Support `shell-resync-dirs' on msys bash (Bug#70012) * lisp/shell.el (w32-application-type): Declare. (shell-mode): Set `shell-dirstack-query' to `pwd -W` when using msys bash. diff --git a/lisp/shell.el b/lisp/shell.el index c5cfbd985ed..8a5218ae847 100644 --- a/lisp/shell.el +++ b/lisp/shell.el @@ -606,6 +606,9 @@ Shell buffers. It implements `shell-completion-execonly' for (defvar sh-shell-file) +(declare-function w32-application-type nil ; "src/w32proc.c" + (program) t) + (define-derived-mode shell-mode comint-mode "Shell" "Major mode for interacting with an inferior shell. \\ @@ -754,6 +757,11 @@ command." ((string-equal shell "ksh") "echo $PWD ~-") ;; Bypass any aliases. TODO all shells could use this. ((string-equal shell "bash") "command dirs") + ((and (string-equal shell "bash.exe") + (eq system-type 'windows-nt) + (eq (w32-application-type (executable-find "bash.exe")) + 'msys)) + "command pwd -W") ((string-equal shell "zsh") "dirs -l") (t "dirs"))) ;; Bypass a bug in certain versions of bash. commit 1f19ddec5b06720086c67d5d8b7d2184e9eef288 Author: Eli Zaretskii Date: Fri Mar 29 15:03:44 2024 +0300 ; * doc/lispref/sequences.texi (Sequence Functions): Fix markup and examples. diff --git a/doc/lispref/sequences.texi b/doc/lispref/sequences.texi index de83b96d748..4a4241b92c9 100644 --- a/doc/lispref/sequences.texi +++ b/doc/lispref/sequences.texi @@ -359,7 +359,7 @@ returns a sorted sequence of the same type. The sort is stable, which means that elements with equal sort keys maintain their relative order. It takes the following optional keyword arguments: -@table @asis +@table @code @item :key @var{keyfunc} Use @var{keyfunc}, a function that takes a single element from @var{sequence} and returns its key value, to generate the keys used in @@ -373,7 +373,7 @@ that takes two sort keys as arguments and returns non-@code{nil} if the first should come before the second. If this argument is absent or @var{predicate} is @code{nil}, then @code{value<} is used, which is applicable to many different Lisp types and generally sorts in -ascending order (@pxref{definition of value<}). +ascending order (@pxref{definition of value<}, below). For consistency, any predicate must obey the following rules: @itemize @bullet @@ -402,19 +402,24 @@ easier and faster to supply a new @code{:key} function than a different @code{:lessp} predicate. For example, consider sorting these strings: @example +@group (setq numbers '("one" "two" "three" "four" "five" "six")) (sort numbers) @result{} ("five" "four" "one" "six" "three" "two") +@end group @end example You can sort the strings by length instead by supplying a different key function: @example +@group (sort numbers :key #'length) @result{} ("one" "two" "six" "four" "five" "three") +@end group @end example +@noindent Note how strings of the same length keep their original order, thanks to the sorting stability. Now suppose you want to sort by length, but use the string contents to break ties. The easiest way is to specify a key @@ -423,19 +428,23 @@ Since @code{value<} orders compound objects (conses, lists, vectors and records) lexicographically, you could do: @example +@group (sort numbers :key (lambda (x) (cons (length x) x))) @result{} ("one" "six" "two" "five" "four" "three") +@end group @end example +@noindent because @code{(3 . "six")} is ordered before @code{(3 . "two")} and so on. -For compatibility with old versions of Emacs, the @code{sort} function -can also be called using the fixed two-argument form +For compatibility with previous versions of Emacs, the @code{sort} +function can also be called using the fixed two-argument form: @example (@code{sort} @var{sequence} @var{predicate}) @end example +@noindent where @var{predicate} is the @code{:lessp} argument. When using this form, sorting is always done in-place. @end defun @@ -452,22 +461,26 @@ This function returns non-@code{nil} if @var{a} comes before @var{b} in the standard sorting order; this means that it returns @code{nil} when @var{b} comes before @var{a}, or if they are equal or unordered. -@var{a} and @var{b} must have the same type. Specifically: +the arguments @var{a} and @var{b} must have the same type. +Specifically: @itemize @bullet @item Numbers are compared using @code{<} (@pxref{definition of <}). @item -Strings and symbols are compared using @code{string<} -(@pxref{definition of string<}). +Strings are compared using @code{string<} (@pxref{definition of +string<}) and symbols are compared by comparing their names as strings. @item Conses, lists, vectors and records are compared lexicographically. @item Markers are compared first by buffer, then by position. @item -Buffers and processes are compared by name. +Buffers and processes are compared by comparing their names as strings. +Dead buffers (whose name is @code{nil}) will compare before any live +buffer. @item -Other types are considered unordered and the return value will be @code{nil}. +Other types are considered unordered and the return value will be +@code{nil}. @end itemize Examples: commit 2f0df93d8ca0a8d4d6b040458661b8eb21fc39e9 Author: Mattias Engdegård Date: Fri Mar 29 11:53:56 2024 +0100 ; * test/lisp/vc/vc-git-tests.el: bend doc string quote diff --git a/test/lisp/vc/vc-git-tests.el b/test/lisp/vc/vc-git-tests.el index bbf0c4277dd..f15a0f52e8c 100644 --- a/test/lisp/vc/vc-git-tests.el +++ b/test/lisp/vc/vc-git-tests.el @@ -91,7 +91,7 @@ will be bound to that directory's file name. Once BODY exits, the directory will be deleted. Some dummy environment variables will be set for the duration of BODY to -allow 'git commit' to determine identities for authors and committers." +allow `git commit' to determine identities for authors and committers." (declare (indent 1)) `(ert-with-temp-directory ,name (let ((default-directory ,name) commit b20866c4b3aa1446efda252bd5c3fa54f68c5d7f Author: Mattias Engdegård Date: Sun Mar 24 18:18:41 2024 +0100 Better `sort` ignored-return-value warning * lisp/emacs-lisp/bytecomp.el (byte-compile-form) (bytecomp--actually-important-return-value-p): Special handling of `sort` that takes into account that it may return an important value depending on the :in-place keyword argument. diff --git a/lisp/emacs-lisp/bytecomp.el b/lisp/emacs-lisp/bytecomp.el index 7af568cfe34..2b5eb34e571 100644 --- a/lisp/emacs-lisp/bytecomp.el +++ b/lisp/emacs-lisp/bytecomp.el @@ -3445,6 +3445,7 @@ lambda-expression." ((and (or sef (function-get (car form) 'important-return-value)) ;; Don't warn for arguments to `ignore'. (not (eq byte-compile--for-effect 'for-effect-no-warn)) + (bytecomp--actually-important-return-value-p form) (byte-compile-warning-enabled-p 'ignored-return-value (car form))) (byte-compile-warn-x @@ -3471,6 +3472,15 @@ lambda-expression." (if byte-compile--for-effect (byte-compile-discard))))) +(defun bytecomp--actually-important-return-value-p (form) + "Whether FORM is really a call with a return value that should not go unused. +This assumes the function has the `important-return-value' property." + (cond ((eq (car form) 'sort) + ;; For `sort', we only care about non-destructive uses. + (and (zerop (% (length form) 2)) ; new-style call + (not (plist-get (cddr form) :in-place)))) + (t t))) + (let ((important-return-value-fns '( ;; These functions are side-effect-free except for the @@ -3478,9 +3488,11 @@ lambda-expression." mapcar mapcan mapconcat assoc plist-get plist-member - ;; It's safe to ignore the value of `sort' and `nreverse' + ;; It's safe to ignore the value of `nreverse' ;; when used on arrays, but most calls pass lists. - nreverse sort + nreverse + + sort ; special handling (non-destructive calls only) match-data commit cbd862865ff0a08d1214ac33590e7af80d10a0ac Author: Mattias Engdegård Date: Fri Mar 22 15:06:27 2024 +0100 Remove `sort-on` (bug#69709) * lisp/sort.el (sort-on): * doc/lispref/sequences.texi (Sequence Functions): * etc/NEWS: Remove the `sort-on` function which is now completely superseded by the extended `sort` in features, ease of use, and performance. diff --git a/doc/lispref/sequences.texi b/doc/lispref/sequences.texi index 6322f17e77b..de83b96d748 100644 --- a/doc/lispref/sequences.texi +++ b/doc/lispref/sequences.texi @@ -440,6 +440,10 @@ where @var{predicate} is the @code{:lessp} argument. When using this form, sorting is always done in-place. @end defun +@xref{Sorting}, for more functions that perform sorting. See +@code{documentation} in @ref{Accessing Documentation}, for a useful +example of @code{sort}. + @cindex comparing values @cindex standard sorting order @anchor{definition of value<} @@ -476,42 +480,6 @@ Examples: @end example @end defun -Sometimes, computation of sort keys of list or vector elements is -expensive, and therefore it is important to perform it the minimum -number of times. By contrast, computing the sort keys of elements -inside the @var{predicate} function passed to @code{sort} will generally -perform this computation each time @var{predicate} is called with some -element. If you can separate the computation of the sort key of an -element into a function of its own, you can use the following sorting -function, which guarantees that the key will be computed for each list -or vector element exactly once. - -@cindex decorate-sort-undecorate -@cindex Schwartzian transform -@defun sort-on sequence predicate accessor -This function stably sorts @var{sequence}, which can be a list, a -vector, a bool-vector, or a string. It sorts by comparing the sort -keys of the elements using @var{predicate}. The comparison function -@var{predicate} accepts two arguments, the sort keys to compare, and -should return non-@code{nil} if the element corresponding to the first -key should sort before the element corresponding to the second key. The -function computes a sort key of each element by calling the -@var{accessor} function on that element; it does so exactly once for -each element of @var{sequence}. The @var{accessor} function is called -with a single argument, an element of @var{sequence}. - -This function implements what is known as @dfn{decorate-sort-undecorate} -paradigm, or the Schwartzian transform. It basically trades CPU for -memory, creating a temporary list with the computed sort keys, then -mapping @code{car} over the result of sorting that temporary list. -Unlike with @code{sort}, the return value is always a new list; the -original @var{sequence} is left intact. -@end defun - -@xref{Sorting}, for more functions that perform sorting. See -@code{documentation} in @ref{Accessing Documentation}, for a useful -example of @code{sort}. - @cindex sequence functions in seq @cindex seq library @cindex sequences, generalized diff --git a/etc/NEWS b/etc/NEWS index 4018df1fecb..6cefe11a2cc 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1795,11 +1795,6 @@ sorts by the return value of 'age', then by 'size', then by 'cost'. The old signature, '(sort SEQ PREDICATE)', can still be used and sorts its input in-place as before. -** New function 'sort-on'. -This function implements the Schwartzian transform, and is appropriate -for sorting lists when the computation of the sort key of a list -element can be expensive. - ** New API for 'derived-mode-p' and control of the graph of major modes. *** 'derived-mode-p' now takes the list of modes as a single argument. diff --git a/lisp/sort.el b/lisp/sort.el index 4f0d759ef8a..2ee76b6e1e3 100644 --- a/lisp/sort.el +++ b/lisp/sort.el @@ -478,27 +478,6 @@ sRegexp specifying key within record: \nr") ;; if there was no such register (error (throw 'key nil)))))))))) -;;;###autoload -(defun sort-on (sequence predicate accessor) - "Sort SEQUENCE by calling PREDICATE on sort keys produced by ACCESSOR. -SEQUENCE should be the input sequence to sort. -Elements of SEQUENCE are sorted by keys which are obtained by -calling ACCESSOR on each element. ACCESSOR should be a function of -one argument, an element of SEQUENCE, and should return the key -value to be compared by PREDICATE for sorting the element. -PREDICATE is the function for comparing keys; it is called with two -arguments, the keys to compare, and should return non-nil if the -first key should sort before the second key. -The return value is always a new list. -This function has the performance advantage of evaluating -ACCESSOR only once for each element in the input SEQUENCE, and is -therefore appropriate when computing the key by ACCESSOR is an -expensive operation. This is known as the \"decorate-sort-undecorate\" -paradigm, or the Schwartzian transform." - (mapcar #'car - (sort (mapcar #'(lambda (x) (cons x (funcall accessor x))) sequence) - #'(lambda (x y) (funcall predicate (cdr x) (cdr y)))))) - (defvar sort-columns-subprocess t) commit 92d659ce6cd2e79231f1011202abb39606d6f06b Author: Mattias Engdegård Date: Fri Mar 22 15:08:50 2024 +0100 Use new-style `sort` signature in Lisp manual examples * doc/lispref/help.texi (Accessing Documentation): * doc/lispref/strings.texi (Text Comparison): Use the new sort calling convention (bug#69709). diff --git a/doc/lispref/help.texi b/doc/lispref/help.texi index a76bac011b7..4236fa75bf0 100644 --- a/doc/lispref/help.texi +++ b/doc/lispref/help.texi @@ -231,7 +231,7 @@ in the *Help* buffer." (help-setup-xref (list 'describe-symbols pattern) (called-interactively-p 'interactive)) (with-help-window (help-buffer) - (mapcar describe-func (sort sym-list 'string<))))) + (mapcar describe-func (sort sym-list))))) @end group @end smallexample diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi index 6a9dd589237..7f640255a7a 100644 --- a/doc/lispref/strings.texi +++ b/doc/lispref/strings.texi @@ -692,7 +692,8 @@ for sorting (@pxref{Sequence Functions}): @example @group -(sort (list "11" "12" "1 1" "1 2" "1.1" "1.2") 'string-collate-lessp) +(sort '("11" "12" "1 1" "1 2" "1.1" "1.2") + :lessp #'string-collate-lessp) @result{} ("11" "1 1" "1.1" "12" "1 2" "1.2") @end group @end example @@ -709,8 +710,8 @@ systems. The @var{locale} value of @code{"POSIX"} or @code{"C"} lets @example @group -(sort (list "11" "12" "1 1" "1 2" "1.1" "1.2") - (lambda (s1 s2) (string-collate-lessp s1 s2 "POSIX"))) +(sort '("11" "12" "1 1" "1 2" "1.1" "1.2") + :lessp (lambda (s1 s2) (string-collate-lessp s1 s2 "POSIX"))) @result{} ("1 1" "1 2" "1.1" "1.2" "11" "12") @end group @end example commit 45941a62c799f9685fae296079304ae0898920cc Author: Mattias Engdegård Date: Fri Mar 22 11:54:09 2024 +0100 Faster non-destructive list sorting Postpone the creation of a new list to after sorting which turns out to be a lot faster (1.1x - 1.5x speedup). * src/fns.c (sort_list, sort_vector, Fsort): Create the new list when moving the data out from the temporary array. diff --git a/src/fns.c b/src/fns.c index bf7c0920750..8d8783713ab 100644 --- a/src/fns.c +++ b/src/fns.c @@ -2347,18 +2347,17 @@ See also the function `nreverse', which is used more often. */) } -/* Stably sort LIST ordered by PREDICATE using the TIMSORT - algorithm. This converts the list to a vector, sorts the vector, - and returns the result converted back to a list. The input list - is destructively reused to hold the sorted result. */ - +/* Stably sort LIST ordered by PREDICATE and KEYFUNC, optionally reversed. + This converts the list to a vector, sorts the vector, and returns the + result converted back to a list. If INPLACE, the input list is + reused to hold the sorted result; otherwise a new list is returned. */ static Lisp_Object sort_list (Lisp_Object list, Lisp_Object predicate, Lisp_Object keyfunc, - bool reverse) + bool reverse, bool inplace) { ptrdiff_t length = list_length (list); if (length < 2) - return list; + return inplace ? list : list1 (XCAR (list)); else { Lisp_Object *result; @@ -2372,31 +2371,40 @@ sort_list (Lisp_Object list, Lisp_Object predicate, Lisp_Object keyfunc, } tim_sort (predicate, keyfunc, result, length, reverse); - ptrdiff_t i = 0; - tail = list; - while (CONSP (tail)) + if (inplace) { - XSETCAR (tail, result[i]); - tail = XCDR (tail); - i++; + /* Copy sorted vector contents back onto the original list. */ + ptrdiff_t i = 0; + tail = list; + while (CONSP (tail)) + { + XSETCAR (tail, result[i]); + tail = XCDR (tail); + i++; + } + } + else + { + /* Create a new list for the sorted vector contents. */ + list = Qnil; + for (ptrdiff_t i = length - 1; i >= 0; i--) + list = Fcons (result[i], list); } SAFE_FREE (); return list; } } -/* Stably sort VECTOR ordered by PREDICATE using the TIMSORT - algorithm. */ - -static void +/* Stably sort VECTOR in-place ordered by PREDICATE and KEYFUNC, + optionally reversed. */ +static Lisp_Object sort_vector (Lisp_Object vector, Lisp_Object predicate, Lisp_Object keyfunc, bool reverse) { ptrdiff_t length = ASIZE (vector); - if (length < 2) - return; - - tim_sort (predicate, keyfunc, XVECTOR (vector)->contents, length, reverse); + if (length >= 2) + tim_sort (predicate, keyfunc, XVECTOR (vector)->contents, length, reverse); + return vector; } DEFUN ("sort", Fsort, Ssort, 1, MANY, 0, @@ -2455,18 +2463,15 @@ usage: (sort SEQ &key KEY LESSP REVERSE IN-PLACE) */) signal_error ("Invalid keyword argument", args[i]); } - /* FIXME: for lists it may be slightly faster to make the copy after - sorting? Measure. */ - if (!inplace) - seq = Fcopy_sequence (seq); - if (CONSP (seq)) - seq = sort_list (seq, lessp, key, reverse); + return sort_list (seq, lessp, key, reverse, inplace); + else if (NILP (seq)) + return seq; else if (VECTORP (seq)) - sort_vector (seq, lessp, key, reverse); - else if (!NILP (seq)) + return sort_vector (inplace ? seq : Fcopy_sequence (seq), + lessp, key, reverse); + else wrong_type_argument (Qlist_or_vector_p, seq); - return seq; } Lisp_Object commit deae311281522864ebabaf56adafbe37032cc8a9 Author: Mattias Engdegård Date: Thu Mar 21 19:35:15 2024 +0100 Speed up `sort` by special-casing the `value<` ordering This gives a 1.5x-2x speed-up when using the default :lessp value, by eliminating the Ffuncall overhead. * src/sort.c (order_pred_lisp, order_pred_valuelt): New. (merge_state, inorder, binarysort, count_run, gallop_left, gallop_right) (merge_init, merge_lo, merge_hi, tim_sort): * src/fns.c (Fsort): When using value<, call it directly. diff --git a/src/fns.c b/src/fns.c index 7eacf99cbba..bf7c0920750 100644 --- a/src/fns.c +++ b/src/fns.c @@ -2455,11 +2455,6 @@ usage: (sort SEQ &key KEY LESSP REVERSE IN-PLACE) */) signal_error ("Invalid keyword argument", args[i]); } - if (NILP (lessp)) - /* FIXME: normalise it as Qnil instead, and special-case it in tim_sort? - That would remove the funcall overhead for the common case. */ - lessp = Qvaluelt; - /* FIXME: for lists it may be slightly faster to make the copy after sorting? Measure. */ if (!inplace) diff --git a/src/sort.c b/src/sort.c index a0f127c35b3..527d5550342 100644 --- a/src/sort.c +++ b/src/sort.c @@ -152,7 +152,7 @@ struct reloc }; -typedef struct +typedef struct merge_state { Lisp_Object *basekeys; Lisp_Object *allocated_keys; /* heap-alloc'ed key array or NULL */ @@ -187,20 +187,32 @@ typedef struct struct reloc reloc; - /* PREDICATE is the lisp comparison predicate for the sort. */ + /* The C ordering (less-than) predicate. */ + bool (*pred_fun) (struct merge_state *ms, Lisp_Object a, Lisp_Object b); + /* The Lisp ordering predicate; Qnil means value<. */ Lisp_Object predicate; } merge_state; -/* Return true iff (PREDICATE A B) is non-nil. */ +static bool +order_pred_lisp (merge_state *ms, Lisp_Object a, Lisp_Object b) +{ + return !NILP (call2 (ms->predicate, a, b)); +} -static inline bool -inorder (const Lisp_Object predicate, const Lisp_Object a, const Lisp_Object b) +static bool +order_pred_valuelt (merge_state *ms, Lisp_Object a, Lisp_Object b) { - return !NILP (call2 (predicate, a, b)); + return !NILP (Fvaluelt (a, b)); } +/* Return true iff A < B according to the order predicate. */ +static inline bool +inorder (merge_state *ms, Lisp_Object a, Lisp_Object b) +{ + return ms->pred_fun (ms, a, b); +} /* Sort the list starting at LO and ending at HI using a stable binary insertion sort algorithm. On entry the sublist [LO, START) (with @@ -212,8 +224,6 @@ static void binarysort (merge_state *ms, sortslice lo, const Lisp_Object *hi, Lisp_Object *start) { - Lisp_Object pred = ms->predicate; - eassume (lo.keys <= start && start <= hi); if (lo.keys == start) ++start; @@ -226,7 +236,7 @@ binarysort (merge_state *ms, sortslice lo, const Lisp_Object *hi, eassume (l < r); do { Lisp_Object *p = l + ((r - l) >> 1); - if (inorder (pred, pivot, *p)) + if (inorder (ms, pivot, *p)) r = p; else l = p + 1; @@ -263,8 +273,6 @@ static ptrdiff_t count_run (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi, bool *descending) { - Lisp_Object pred = ms->predicate; - eassume (lo < hi); *descending = 0; ++lo; @@ -273,12 +281,12 @@ count_run (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi, return n; n = 2; - if (inorder (pred, lo[0], lo[-1])) + if (inorder (ms, lo[0], lo[-1])) { *descending = 1; for (lo = lo + 1; lo < hi; ++lo, ++n) { - if (!inorder (pred, lo[0], lo[-1])) + if (!inorder (ms, lo[0], lo[-1])) break; } } @@ -286,7 +294,7 @@ count_run (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi, { for (lo = lo + 1; lo < hi; ++lo, ++n) { - if (inorder (pred, lo[0], lo[-1])) + if (inorder (ms, lo[0], lo[-1])) break; } } @@ -319,21 +327,19 @@ static ptrdiff_t gallop_left (merge_state *ms, const Lisp_Object key, Lisp_Object *a, const ptrdiff_t n, const ptrdiff_t hint) { - Lisp_Object pred = ms->predicate; - eassume (a && n > 0 && hint >= 0 && hint < n); a += hint; ptrdiff_t lastofs = 0; ptrdiff_t ofs = 1; - if (inorder (pred, *a, key)) + if (inorder (ms, *a, key)) { /* When a[hint] < key, gallop right until a[hint + lastofs] < key <= a[hint + ofs]. */ const ptrdiff_t maxofs = n - hint; /* This is one after the end of a. */ while (ofs < maxofs) { - if (inorder (pred, a[ofs], key)) + if (inorder (ms, a[ofs], key)) { lastofs = ofs; eassume (ofs <= (PTRDIFF_MAX - 1) / 2); @@ -355,7 +361,7 @@ gallop_left (merge_state *ms, const Lisp_Object key, Lisp_Object *a, const ptrdiff_t maxofs = hint + 1; /* Here &a[0] is lowest. */ while (ofs < maxofs) { - if (inorder (pred, a[-ofs], key)) + if (inorder (ms, a[-ofs], key)) break; /* Here key <= a[hint - ofs]. */ lastofs = ofs; @@ -380,7 +386,7 @@ gallop_left (merge_state *ms, const Lisp_Object key, Lisp_Object *a, { ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1); - if (inorder (pred, a[m], key)) + if (inorder (ms, a[m], key)) lastofs = m + 1; /* Here a[m] < key. */ else ofs = m; /* Here key <= a[m]. */ @@ -403,21 +409,19 @@ static ptrdiff_t gallop_right (merge_state *ms, const Lisp_Object key, Lisp_Object *a, const ptrdiff_t n, const ptrdiff_t hint) { - Lisp_Object pred = ms->predicate; - eassume (a && n > 0 && hint >= 0 && hint < n); a += hint; ptrdiff_t lastofs = 0; ptrdiff_t ofs = 1; - if (inorder (pred, key, *a)) + if (inorder (ms, key, *a)) { /* When key < a[hint], gallop left until a[hint - ofs] <= key < a[hint - lastofs]. */ const ptrdiff_t maxofs = hint + 1; /* Here &a[0] is lowest. */ while (ofs < maxofs) { - if (inorder (pred, key, a[-ofs])) + if (inorder (ms, key, a[-ofs])) { lastofs = ofs; eassume (ofs <= (PTRDIFF_MAX - 1) / 2); @@ -440,7 +444,7 @@ gallop_right (merge_state *ms, const Lisp_Object key, Lisp_Object *a, const ptrdiff_t maxofs = n - hint; /* Here &a[n-1] is highest. */ while (ofs < maxofs) { - if (inorder (pred, key, a[ofs])) + if (inorder (ms, key, a[ofs])) break; /* Here a[hint + ofs] <= key. */ lastofs = ofs; @@ -464,7 +468,7 @@ gallop_right (merge_state *ms, const Lisp_Object key, Lisp_Object *a, { ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1); - if (inorder (pred, key, a[m])) + if (inorder (ms, key, a[m])) ofs = m; /* Here key < a[m]. */ else lastofs = m + 1; /* Here a[m] <= key. */ @@ -509,6 +513,7 @@ merge_init (merge_state *ms, const ptrdiff_t list_size, ms->listlen = list_size; ms->basekeys = lo->keys; ms->allocated_keys = allocated_keys; + ms->pred_fun = NILP (predicate) ? order_pred_valuelt : order_pred_lisp; ms->predicate = predicate; ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; ms->count = make_invalid_specpdl_ref (); @@ -637,8 +642,6 @@ static void merge_lo (merge_state *ms, sortslice ssa, ptrdiff_t na, sortslice ssb, ptrdiff_t nb) { - Lisp_Object pred = ms->predicate; - eassume (ms && ssa.keys && ssb.keys && na > 0 && nb > 0); eassume (ssa.keys + na == ssb.keys); needmem (ms, na); @@ -665,7 +668,7 @@ merge_lo (merge_state *ms, sortslice ssa, ptrdiff_t na, for (;;) { eassume (na > 1 && nb > 0); - if (inorder (pred, ssb.keys[0], ssa.keys[0])) + if (inorder (ms, ssb.keys[0], ssa.keys[0])) { sortslice_copy_incr (&dest, &ssb); ++bcount; @@ -762,8 +765,6 @@ static void merge_hi (merge_state *ms, sortslice ssa, ptrdiff_t na, sortslice ssb, ptrdiff_t nb) { - Lisp_Object pred = ms->predicate; - eassume (ms && ssa.keys && ssb.keys && na > 0 && nb > 0); eassume (ssa.keys + na == ssb.keys); needmem (ms, nb); @@ -793,7 +794,7 @@ merge_hi (merge_state *ms, sortslice ssa, ptrdiff_t na, for (;;) { eassume (na > 0 && nb > 1); - if (inorder (pred, ssb.keys[0], ssa.keys[0])) + if (inorder (ms, ssb.keys[0], ssa.keys[0])) { sortslice_copy_decr (&dest, &ssa); ++acount; @@ -1078,19 +1079,19 @@ void tim_sort (Lisp_Object predicate, Lisp_Object keyfunc, Lisp_Object *seq, const ptrdiff_t length, bool reverse) { - /* FIXME: optimise for the predicate being value<; at the very - least we'd go without the Lisp funcall overhead. */ - predicate = resolve_fun (predicate); + /* FIXME: hoist this to the caller? */ + if (EQ (predicate, Qvaluelt)) + predicate = Qnil; + if (!NILP (predicate)) + predicate = resolve_fun (predicate); + if (EQ (keyfunc, Qidentity)) + keyfunc = Qnil; sortslice lo; Lisp_Object *keys; Lisp_Object *allocated_keys = NULL; merge_state ms; - /* FIXME: hoist this to the caller? */ - if (EQ (keyfunc, Qidentity)) - keyfunc = Qnil; - if (reverse) reverse_slice (seq, seq + length); /* preserve stability */ commit ae5f2c02bd2fc269e2cc32c8039d95fbf4225e69 Author: Mattias Engdegård Date: Tue Mar 19 13:03:47 2024 +0100 New `sort` keyword arguments (bug#69709) Add the :key, :lessp, :reverse and :in-place keyword arguments. The old calling style remains available and is unchanged. * src/fns.c (sort_list, sort_vector, Fsort): * src/sort.c (tim_sort): Add keyword arguments with associated new features. All callers of Fsort adapted. * test/src/fns-tests.el (fns-tests--shuffle-vector, fns-tests-sort-kw): New test. * doc/lispref/sequences.texi (Sequence Functions): Update manual. * etc/NEWS: Announce. diff --git a/doc/lispref/sequences.texi b/doc/lispref/sequences.texi index 5bdf71fe02e..6322f17e77b 100644 --- a/doc/lispref/sequences.texi +++ b/doc/lispref/sequences.texi @@ -350,94 +350,99 @@ encouraged to treat strings as immutable even when they are mutable. @end defun -@defun sort sequence predicate +@defun sort sequence &rest keyword-args @cindex stable sort @cindex sorting lists @cindex sorting vectors -This function sorts @var{sequence} stably. Note that this function doesn't work -for all sequences; it may be used only for lists and vectors. If @var{sequence} -is a list, it is modified destructively. This functions returns the sorted -@var{sequence} and compares elements using @var{predicate}. A stable sort is -one in which elements with equal sort keys maintain their relative order before -and after the sort. Stability is important when successive sorts are used to -order elements according to different criteria. +This function sorts @var{sequence}, which must be a list or vector, and +returns a sorted sequence of the same type. +The sort is stable, which means that elements with equal sort keys maintain +their relative order. It takes the following optional keyword arguments: -The argument @var{predicate} must be a function that accepts two -arguments. It is called with two elements of @var{sequence}. To get an -increasing order sort, the @var{predicate} should return non-@code{nil} if the -first element is ``less'' than the second, or @code{nil} if not. +@table @asis +@item :key @var{keyfunc} +Use @var{keyfunc}, a function that takes a single element from +@var{sequence} and returns its key value, to generate the keys used in +comparison. If this argument is absent or if @var{keyfunc} is +@code{nil} then @code{identity} is assumed; that is, the elements +themselves are used as sorting keys. + +@item :lessp @var{predicate} +Use @var{predicate} to order the keys. @var{predicate} is a function +that takes two sort keys as arguments and returns non-@code{nil} if the +first should come before the second. If this argument is absent or +@var{predicate} is @code{nil}, then @code{value<} is used, which +is applicable to many different Lisp types and generally sorts in +ascending order (@pxref{definition of value<}). + +For consistency, any predicate must obey the following rules: +@itemize @bullet +@item +It must be @dfn{antisymmetric}: it cannot both order @var{a} before +@var{b} and @var{b} before @var{a}. +@item +It must be @dfn{transitive}: if it orders @var{a} before @var{b} and +@var{b} before @var{c}, then it must also order @var{a} before @var{c}. +@end itemize -The comparison function @var{predicate} must give reliable results for -any given pair of arguments, at least within a single call to -@code{sort}. It must be @dfn{antisymmetric}; that is, if @var{a} is -less than @var{b}, @var{b} must not be less than @var{a}. It must be -@dfn{transitive}---that is, if @var{a} is less than @var{b}, and @var{b} -is less than @var{c}, then @var{a} must be less than @var{c}. If you -use a comparison function which does not meet these requirements, the -result of @code{sort} is unpredictable. +@item :reverse @var{flag} +If @var{flag} is non-@code{nil}, the sorting order is reversed. With +the default @code{:lessp} predicate this means sorting in descending order. -The destructive aspect of @code{sort} for lists is that it reuses the -cons cells forming @var{sequence} by changing their contents, possibly -rearranging them in a different order. This means that the value of -the input list is undefined after sorting; only the list returned by -@code{sort} has a well-defined value. Example: +@item :in-place @var{flag} +If @var{flag} is non-@code{nil}, then @var{sequence} is sorted in-place +(destructively) and returned. If @code{nil}, or if this argument is not +given, a sorted copy of the input is returned and @var{sequence} itself +remains unmodified. In-place sorting is slightly faster, but the +original sequence is lost. +@end table + +If the default behaviour is not suitable for your needs, it is usually +easier and faster to supply a new @code{:key} function than a different +@code{:lessp} predicate. For example, consider sorting these strings: @example -@group -(setq nums (list 2 1 4 3 0)) -(sort nums #'<) - @result{} (0 1 2 3 4) - ; nums is unpredictable at this point -@end group +(setq numbers '("one" "two" "three" "four" "five" "six")) +(sort numbers) + @result{} ("five" "four" "one" "six" "three" "two") @end example -Most often we store the result back into the variable that held the -original list: +You can sort the strings by length instead by supplying a different key +function: @example -(setq nums (sort nums #'<)) +(sort numbers :key #'length) + @result{} ("one" "two" "six" "four" "five" "three") @end example -If you wish to make a sorted copy without destroying the original, -copy it first and then sort: +Note how strings of the same length keep their original order, thanks to +the sorting stability. Now suppose you want to sort by length, but use +the string contents to break ties. The easiest way is to specify a key +function that transforms an element to a value that is sorted this way. +Since @code{value<} orders compound objects (conses, lists, +vectors and records) lexicographically, you could do: @example -@group -(setq nums (list 2 1 4 3 0)) -(sort (copy-sequence nums) #'<) - @result{} (0 1 2 3 4) -@end group -@group -nums - @result{} (2 1 4 3 0) -@end group +(sort numbers :key (lambda (x) (cons (length x) x))) + @result{} ("one" "six" "two" "five" "four" "three") @end example -For the better understanding of what stable sort is, consider the following -vector example. After sorting, all items whose @code{car} is 8 are grouped -at the beginning of @code{vector}, but their relative order is preserved. -All items whose @code{car} is 9 are grouped at the end of @code{vector}, -but their relative order is also preserved: +because @code{(3 . "six")} is ordered before @code{(3 . "two")} and so on. + +For compatibility with old versions of Emacs, the @code{sort} function +can also be called using the fixed two-argument form @example -@group -(setq - vector - (vector '(8 . "xxx") '(9 . "aaa") '(8 . "bbb") '(9 . "zzz") - '(9 . "ppp") '(8 . "ttt") '(8 . "eee") '(9 . "fff"))) - @result{} [(8 . "xxx") (9 . "aaa") (8 . "bbb") (9 . "zzz") - (9 . "ppp") (8 . "ttt") (8 . "eee") (9 . "fff")] -@end group -@group -(sort vector (lambda (x y) (< (car x) (car y)))) - @result{} [(8 . "xxx") (8 . "bbb") (8 . "ttt") (8 . "eee") - (9 . "aaa") (9 . "zzz") (9 . "ppp") (9 . "fff")] -@end group +(@code{sort} @var{sequence} @var{predicate}) @end example + +where @var{predicate} is the @code{:lessp} argument. When using this +form, sorting is always done in-place. @end defun @cindex comparing values @cindex standard sorting order +@anchor{definition of value<} @defun value< a b This function returns non-@code{nil} if @var{a} comes before @var{b} in the standard sorting order; this means that it returns @code{nil} when diff --git a/etc/NEWS b/etc/NEWS index 73ffff9f2d3..4018df1fecb 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1770,6 +1770,31 @@ lexicographically. It is intended as a convenient ordering predicate for sorting, and is likely to be faster than hand-written Lisp functions. ++++ +** New 'sort' arguments and features. +The 'sort' function can now be called using the signature + + (sort SEQ &rest KEYWORD-ARGUMENTS) + +where arguments after the first are keyword/value pairs, all optional: +':key' specifies a function that produces the sorting key from an element, +':lessp' specifies the ordering predicate, defaulting to 'value<', +':reverse' is used to reverse the sorting order, +':in-place is used for in-place sorting, as the default is now to +sort a copy of the input. + +The new signature is less error-prone and reduces the need to write +ordering predicates by hand. We recommend that you use the ':key' +argument instead of ':lessp' unless a suitable ordering predicate is +already available. This can also be used for multi-key sorting: + + (sort seq :key (lambda (x) (list (age x) (size x) (cost x)))) + +sorts by the return value of 'age', then by 'size', then by 'cost'. + +The old signature, '(sort SEQ PREDICATE)', can still be used and sorts +its input in-place as before. + ** New function 'sort-on'. This function implements the Schwartzian transform, and is appropriate for sorting lists when the computation of the sort key of a list diff --git a/src/dired.c b/src/dired.c index 9a372201ae0..bfbacf70917 100644 --- a/src/dired.c +++ b/src/dired.c @@ -351,7 +351,7 @@ directory_files_internal (Lisp_Object directory, Lisp_Object full, specpdl_ptr = specpdl_ref_to_ptr (count); if (NILP (nosort)) - list = Fsort (Fnreverse (list), + list = CALLN (Fsort, Fnreverse (list), attrs ? Qfile_attributes_lessp : Qstring_lessp); (void) directory_volatile; diff --git a/src/fns.c b/src/fns.c index a3ef99f67a8..7eacf99cbba 100644 --- a/src/fns.c +++ b/src/fns.c @@ -2353,7 +2353,8 @@ See also the function `nreverse', which is used more often. */) is destructively reused to hold the sorted result. */ static Lisp_Object -sort_list (Lisp_Object list, Lisp_Object predicate, Lisp_Object keyfunc) +sort_list (Lisp_Object list, Lisp_Object predicate, Lisp_Object keyfunc, + bool reverse) { ptrdiff_t length = list_length (list); if (length < 2) @@ -2369,7 +2370,7 @@ sort_list (Lisp_Object list, Lisp_Object predicate, Lisp_Object keyfunc) result[i] = Fcar (tail); tail = XCDR (tail); } - tim_sort (predicate, keyfunc, result, length); + tim_sort (predicate, keyfunc, result, length, reverse); ptrdiff_t i = 0; tail = list; @@ -2388,27 +2389,86 @@ sort_list (Lisp_Object list, Lisp_Object predicate, Lisp_Object keyfunc) algorithm. */ static void -sort_vector (Lisp_Object vector, Lisp_Object predicate, Lisp_Object keyfunc) +sort_vector (Lisp_Object vector, Lisp_Object predicate, Lisp_Object keyfunc, + bool reverse) { ptrdiff_t length = ASIZE (vector); if (length < 2) return; - tim_sort (predicate, keyfunc, XVECTOR (vector)->contents, length); + tim_sort (predicate, keyfunc, XVECTOR (vector)->contents, length, reverse); } -DEFUN ("sort", Fsort, Ssort, 2, 2, 0, - doc: /* Sort SEQ, stably, comparing elements using PREDICATE. -Returns the sorted sequence. SEQ should be a list or vector. SEQ is -modified by side effects. PREDICATE is called with two elements of -SEQ, and should return non-nil if the first element should sort before -the second. */) - (Lisp_Object seq, Lisp_Object predicate) +DEFUN ("sort", Fsort, Ssort, 1, MANY, 0, + doc: /* Sort SEQ, stably, and return the sorted sequence. +SEQ should be a list or vector. +Optional arguments are specified as keyword/argument pairs. The following +arguments are defined: + +:key FUNC -- FUNC is a function that takes a single element from SEQ and + returns the key value to be used in comparison. If absent or nil, + `identity' is used. + +:lessp FUNC -- FUNC is a function that takes two arguments and returns + non-nil if the first element should come before the second. + If absent or nil, `value<' is used. + +:reverse BOOL -- if BOOL is non-nil, the sorting order implied by FUNC is + reversed. This does not affect stability: equal elements still retain + their order in the input sequence. + +:in-place BOOL -- if BOOL is non-nil, SEQ is sorted in-place and returned. + Otherwise, a sorted copy of SEQ is returned and SEQ remains unmodified; + this is the default. + +For compatibility, the calling convention (sort SEQ LESSP) can also be used; +in this case, sorting is always done in-place. + +usage: (sort SEQ &key KEY LESSP REVERSE IN-PLACE) */) + (ptrdiff_t nargs, Lisp_Object *args) { + Lisp_Object seq = args[0]; + Lisp_Object key = Qnil; + Lisp_Object lessp = Qnil; + bool inplace = false; + bool reverse = false; + if (nargs == 2) + { + /* old-style invocation without keywords */ + lessp = args[1]; + inplace = true; + } + else if ((nargs & 1) == 0) + error ("Invalid argument list"); + else + for (ptrdiff_t i = 1; i < nargs - 1; i += 2) + { + if (EQ (args[i], QCkey)) + key = args[i + 1]; + else if (EQ (args[i], QClessp)) + lessp = args[i + 1]; + else if (EQ (args[i], QCin_place)) + inplace = !NILP (args[i + 1]); + else if (EQ (args[i], QCreverse)) + reverse = !NILP (args[i + 1]); + else + signal_error ("Invalid keyword argument", args[i]); + } + + if (NILP (lessp)) + /* FIXME: normalise it as Qnil instead, and special-case it in tim_sort? + That would remove the funcall overhead for the common case. */ + lessp = Qvaluelt; + + /* FIXME: for lists it may be slightly faster to make the copy after + sorting? Measure. */ + if (!inplace) + seq = Fcopy_sequence (seq); + if (CONSP (seq)) - seq = sort_list (seq, predicate, Qnil); + seq = sort_list (seq, lessp, key, reverse); else if (VECTORP (seq)) - sort_vector (seq, predicate, Qnil); + sort_vector (seq, lessp, key, reverse); else if (!NILP (seq)) wrong_type_argument (Qlist_or_vector_p, seq); return seq; @@ -6860,4 +6920,10 @@ For best results this should end in a space. */); DEFSYM (Qfrom__tty_menu_p, "from--tty-menu-p"); DEFSYM (Qyes_or_no_p, "yes-or-no-p"); DEFSYM (Qy_or_n_p, "y-or-n-p"); + + DEFSYM (QCkey, ":key"); + DEFSYM (QClessp, ":lessp"); + DEFSYM (QCin_place, ":in-place"); + DEFSYM (QCreverse, ":reverse"); + DEFSYM (Qvaluelt, "value<"); } diff --git a/src/lisp.h b/src/lisp.h index 14c0b8e4d1c..6226ab33244 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -4299,7 +4299,8 @@ extern void syms_of_fns (void); extern void mark_fns (void); /* Defined in sort.c */ -extern void tim_sort (Lisp_Object, Lisp_Object, Lisp_Object *, const ptrdiff_t); +extern void tim_sort (Lisp_Object, Lisp_Object, Lisp_Object *, const ptrdiff_t, + bool); /* Defined in floatfns.c. */ verify (FLT_RADIX == 2 || FLT_RADIX == 16); diff --git a/src/pdumper.c b/src/pdumper.c index c7ebb38dea5..ac8bf6f31f4 100644 --- a/src/pdumper.c +++ b/src/pdumper.c @@ -3368,7 +3368,7 @@ dump_sort_copied_objects (struct dump_context *ctx) file and the copy into Emacs in-order, where prefetch will be most effective. */ ctx->copied_queue = - Fsort (Fnreverse (ctx->copied_queue), + CALLN (Fsort, Fnreverse (ctx->copied_queue), Qdump_emacs_portable__sort_predicate_copied); } @@ -3935,7 +3935,7 @@ drain_reloc_list (struct dump_context *ctx, { struct dump_flags old_flags = ctx->flags; ctx->flags.pack_objects = true; - Lisp_Object relocs = Fsort (Fnreverse (*reloc_list), + Lisp_Object relocs = CALLN (Fsort, Fnreverse (*reloc_list), Qdump_emacs_portable__sort_predicate); *reloc_list = Qnil; dump_align_output (ctx, max (alignof (struct dump_reloc), @@ -4057,7 +4057,7 @@ static void dump_do_fixups (struct dump_context *ctx) { dump_off saved_offset = ctx->offset; - Lisp_Object fixups = Fsort (Fnreverse (ctx->fixups), + Lisp_Object fixups = CALLN (Fsort, Fnreverse (ctx->fixups), Qdump_emacs_portable__sort_predicate); Lisp_Object prev_fixup = Qnil; ctx->fixups = Qnil; diff --git a/src/sort.c b/src/sort.c index d91993c8c65..a0f127c35b3 100644 --- a/src/sort.c +++ b/src/sort.c @@ -1072,11 +1072,11 @@ resolve_fun (Lisp_Object fun) } /* Sort the array SEQ with LENGTH elements in the order determined by - PREDICATE. */ - + PREDICATE (where Qnil means value<) and KEYFUNC (where Qnil means identity), + optionally reversed. */ void tim_sort (Lisp_Object predicate, Lisp_Object keyfunc, - Lisp_Object *seq, const ptrdiff_t length) + Lisp_Object *seq, const ptrdiff_t length, bool reverse) { /* FIXME: optimise for the predicate being value<; at the very least we'd go without the Lisp funcall overhead. */ @@ -1091,9 +1091,8 @@ tim_sort (Lisp_Object predicate, Lisp_Object keyfunc, if (EQ (keyfunc, Qidentity)) keyfunc = Qnil; - /* FIXME: consider a built-in reverse sorting flag: we would reverse - the input in-place here and reverse it back just before - returning. */ + if (reverse) + reverse_slice (seq, seq + length); /* preserve stability */ if (NILP (keyfunc)) { @@ -1159,6 +1158,9 @@ tim_sort (Lisp_Object predicate, Lisp_Object keyfunc, eassume (ms.pending[0].len == length); lo = ms.pending[0].base; + if (reverse) + reverse_slice (seq, seq + length); + if (ms.a.keys != ms.temparray || allocated_keys != NULL) unbind_to (ms.count, Qnil); } diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el index 844000cdc76..1b13785a9fc 100644 --- a/test/src/fns-tests.el +++ b/test/src/fns-tests.el @@ -375,6 +375,49 @@ (should (equal (should-error (sort "cba" #'<) :type 'wrong-type-argument) '(wrong-type-argument list-or-vector-p "cba")))) +(defun fns-tests--shuffle-vector (vect) + "Shuffle VECT in place." + (let ((n (length vect))) + (dotimes (i (1- n)) + (let* ((j (+ i (random (- n i)))) + (vi (aref vect i))) + (aset vect i (aref vect j)) + (aset vect j vi))))) + +(ert-deftest fns-tests-sort-kw () + ;; Test the `sort' keyword calling convention by comparing with + ;; the results from using the old (positional) style tested above. + (random "my seed") + (dolist (size '(0 1 2 3 10 100 1000)) + ;; Use a vector with both positive and negative numbers (asymmetric). + (let ((numbers (vconcat + (number-sequence (- (/ size 3)) (- size 1 (/ size 3)))))) + (fns-tests--shuffle-vector numbers) + ;; Test both list and vector input. + (dolist (input (list (append numbers nil) numbers)) + (dolist (in-place '(nil t)) + (dolist (reverse '(nil t)) + (dolist (key '(nil abs)) + (dolist (lessp '(nil >)) + (let* ((seq (copy-sequence input)) + (res (sort seq :key key :lessp lessp + :in-place in-place :reverse reverse)) + (pred (or lessp #'value<)) + (exp-in (copy-sequence input)) + (exp-out + (sort (if reverse (reverse exp-in) exp-in) + (if key + (lambda (a b) + (funcall pred + (funcall key a) (funcall key b))) + pred))) + (expected (if reverse (reverse exp-out) exp-out))) + (should (equal res expected)) + (if in-place + (should (eq res seq)) + (should-not (and (> size 0) (eq res seq))) + (should (equal seq input)))))))))))) + (defvar w32-collate-ignore-punctuation) (ert-deftest fns-tests-collate-sort () commit a52f1121a3589af8f89828e04d66f1215c361bcf Author: Mattias Engdegård Date: Mon Mar 18 19:56:20 2024 +0100 Add back timsort key function handling (bug#69709) The original timsort code did provide for a key (accessor) function along with the necessary storage management, but we dropped it because our `sort` function didn't need it. Now it's been put back since it seems that it will come in handy after all. * src/fns.c (sort_list, sort_vector, Fsort): Pass Qnil as key function to tim_sort. * src/sort.c (reverse_slice, sortslice_copy) (sortslice_copy_incr, sortslice_copy_decr, sortslice_memcpy) (sortslice_memmove, sortslice_advance): New functions. (sortslice): New type. (struct stretch, struct reloc, merge_state) (binarysort, merge_init, merge_markmem, cleanup_mem) (merge_register_cleanup, merge_getmem, merge_lo, merge_hi, merge_at) (found_new_run, reverse_sortslice, resolve_fun, tim_sort): Merge back previously discarded parts from the upstreams timsort code that dealt with key functions, and adapt them to fit in. diff --git a/src/fns.c b/src/fns.c index 7faf25b9088..a3ef99f67a8 100644 --- a/src/fns.c +++ b/src/fns.c @@ -2353,7 +2353,7 @@ See also the function `nreverse', which is used more often. */) is destructively reused to hold the sorted result. */ static Lisp_Object -sort_list (Lisp_Object list, Lisp_Object predicate) +sort_list (Lisp_Object list, Lisp_Object predicate, Lisp_Object keyfunc) { ptrdiff_t length = list_length (list); if (length < 2) @@ -2369,7 +2369,7 @@ sort_list (Lisp_Object list, Lisp_Object predicate) result[i] = Fcar (tail); tail = XCDR (tail); } - tim_sort (predicate, result, length); + tim_sort (predicate, keyfunc, result, length); ptrdiff_t i = 0; tail = list; @@ -2388,13 +2388,13 @@ sort_list (Lisp_Object list, Lisp_Object predicate) algorithm. */ static void -sort_vector (Lisp_Object vector, Lisp_Object predicate) +sort_vector (Lisp_Object vector, Lisp_Object predicate, Lisp_Object keyfunc) { ptrdiff_t length = ASIZE (vector); if (length < 2) return; - tim_sort (predicate, XVECTOR (vector)->contents, length); + tim_sort (predicate, keyfunc, XVECTOR (vector)->contents, length); } DEFUN ("sort", Fsort, Ssort, 2, 2, 0, @@ -2406,9 +2406,9 @@ the second. */) (Lisp_Object seq, Lisp_Object predicate) { if (CONSP (seq)) - seq = sort_list (seq, predicate); + seq = sort_list (seq, predicate, Qnil); else if (VECTORP (seq)) - sort_vector (seq, predicate); + sort_vector (seq, predicate, Qnil); else if (!NILP (seq)) wrong_type_argument (Qlist_or_vector_p, seq); return seq; diff --git a/src/lisp.h b/src/lisp.h index 5583a7e2e8e..14c0b8e4d1c 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -4299,7 +4299,7 @@ extern void syms_of_fns (void); extern void mark_fns (void); /* Defined in sort.c */ -extern void tim_sort (Lisp_Object, Lisp_Object *, const ptrdiff_t); +extern void tim_sort (Lisp_Object, Lisp_Object, Lisp_Object *, const ptrdiff_t); /* Defined in floatfns.c. */ verify (FLT_RADIX == 2 || FLT_RADIX == 16); diff --git a/src/sort.c b/src/sort.c index 2f98bfa648c..d91993c8c65 100644 --- a/src/sort.c +++ b/src/sort.c @@ -34,6 +34,90 @@ along with GNU Emacs. If not, see . */ #include "lisp.h" +/* Reverse a slice of a vector in place, from lo up to (exclusive) hi. */ +static void +reverse_slice(Lisp_Object *lo, Lisp_Object *hi) +{ + eassert (lo && hi); + + --hi; + while (lo < hi) { + Lisp_Object t = *lo; + *lo = *hi; + *hi = t; + ++lo; + --hi; + } +} + +/* A sortslice contains a pointer to an array of keys and a pointer to + an array of corresponding values. In other words, keys[i] + corresponds with values[i]. If values == NULL, then the keys are + also the values. + + Several convenience routines are provided here, so that keys and + values are always moved in sync. */ + +typedef struct { + Lisp_Object *keys; + Lisp_Object *values; +} sortslice; + +/* FIXME: Instead of values=NULL, can we set values=keys, so that they + are both moved in lockstep and we avoid a lot of branches? + We may do some useless work but it might be cheaper overall. */ + +static inline void +sortslice_copy (sortslice *s1, ptrdiff_t i, sortslice *s2, ptrdiff_t j) +{ + s1->keys[i] = s2->keys[j]; + if (s1->values != NULL) + s1->values[i] = s2->values[j]; +} + +static inline void +sortslice_copy_incr (sortslice *dst, sortslice *src) +{ + *dst->keys++ = *src->keys++; + if (dst->values != NULL) + *dst->values++ = *src->values++; +} + +static inline void +sortslice_copy_decr (sortslice *dst, sortslice *src) +{ + *dst->keys-- = *src->keys--; + if (dst->values != NULL) + *dst->values-- = *src->values--; +} + + +static inline void +sortslice_memcpy (sortslice *s1, ptrdiff_t i, sortslice *s2, ptrdiff_t j, + ptrdiff_t n) +{ + memcpy (&s1->keys[i], &s2->keys[j], sizeof s1->keys[0] * n); + if (s1->values != NULL) + memcpy (&s1->values[i], &s2->values[j], sizeof s1->values[0] * n); +} + +static inline void +sortslice_memmove (sortslice *s1, ptrdiff_t i, sortslice *s2, ptrdiff_t j, + ptrdiff_t n) +{ + memmove (&s1->keys[i], &s2->keys[j], sizeof s1->keys[0] * n); + if (s1->values != NULL) + memmove (&s1->values[i], &s2->values[j], sizeof s1->values[0] * n); +} + +static inline void +sortslice_advance (sortslice *slice, ptrdiff_t n) +{ + slice->keys += n; + if (slice->values != NULL) + slice->values += n; +} + /* MAX_MERGE_PENDING is the maximum number of entries in merge_state's pending-stretch stack. For a list with n elements, this needs at most floor(log2(n)) + 1 entries even if we didn't force runs to a @@ -54,15 +138,15 @@ along with GNU Emacs. If not, see . */ struct stretch { - Lisp_Object *base; + sortslice base; ptrdiff_t len; int power; }; struct reloc { - Lisp_Object **src; - Lisp_Object **dst; + sortslice *src; + sortslice *dst; ptrdiff_t *size; int order; /* -1 while in merge_lo; +1 while in merg_hi; 0 otherwise. */ }; @@ -70,7 +154,8 @@ struct reloc typedef struct { - Lisp_Object *listbase; + Lisp_Object *basekeys; + Lisp_Object *allocated_keys; /* heap-alloc'ed key array or NULL */ ptrdiff_t listlen; /* PENDING is a stack of N pending stretches yet to be merged. @@ -91,7 +176,7 @@ typedef struct with merges. 'A' initially points to TEMPARRAY, and subsequently to newly allocated memory if needed. */ - Lisp_Object *a; + sortslice a; ptrdiff_t alloced; specpdl_ref count; Lisp_Object temparray[MERGESTATE_TEMP_SIZE]; @@ -124,17 +209,17 @@ inorder (const Lisp_Object predicate, const Lisp_Object a, const Lisp_Object b) permutation of the input (nothing is lost or duplicated). */ static void -binarysort (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi, +binarysort (merge_state *ms, sortslice lo, const Lisp_Object *hi, Lisp_Object *start) { Lisp_Object pred = ms->predicate; - eassume (lo <= start && start <= hi); - if (lo == start) + eassume (lo.keys <= start && start <= hi); + if (lo.keys == start) ++start; for (; start < hi; ++start) { - Lisp_Object *l = lo; + Lisp_Object *l = lo.keys; Lisp_Object *r = start; Lisp_Object pivot = *r; @@ -150,6 +235,17 @@ binarysort (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi, for (Lisp_Object *p = start; p > l; --p) p[0] = p[-1]; *l = pivot; + + if (lo.values != NULL) + { + ptrdiff_t offset = lo.values - lo.keys; + Lisp_Object *p = start + offset; + pivot = *p; + l += offset; + for (Lisp_Object *p = start + offset; p > l; --p) + p[0] = p[-1]; + *l = pivot; + } } } @@ -378,21 +474,46 @@ gallop_right (merge_state *ms, const Lisp_Object key, Lisp_Object *a, } +static void merge_register_cleanup (merge_state *ms); + static void -merge_init (merge_state *ms, const ptrdiff_t list_size, Lisp_Object *lo, - const Lisp_Object predicate) +merge_init (merge_state *ms, const ptrdiff_t list_size, + Lisp_Object *allocated_keys, sortslice *lo, Lisp_Object predicate) { eassume (ms != NULL); - ms->a = ms->temparray; - ms->alloced = MERGESTATE_TEMP_SIZE; + if (lo->values != NULL) + { + /* The temporary space for merging will need at most half the list + size rounded up. Use the minimum possible space so we can use the + rest of temparray for other things. In particular, if there is + enough extra space, if will be used to store the keys. */ + ms->alloced = (list_size + 1) / 2; + + /* ms->alloced describes how many keys will be stored at + ms->temparray, but we also need to store the values. Hence, + ms->alloced is capped at half of MERGESTATE_TEMP_SIZE. */ + if (MERGESTATE_TEMP_SIZE / 2 < ms->alloced) + ms->alloced = MERGESTATE_TEMP_SIZE / 2; + ms->a.values = &ms->temparray[ms->alloced]; + } + else + { + ms->alloced = MERGESTATE_TEMP_SIZE; + ms->a.values = NULL; + } + ms->a.keys = ms->temparray; ms->n = 0; ms->min_gallop = GALLOP_WIN_MIN; ms->listlen = list_size; - ms->listbase = lo; + ms->basekeys = lo->keys; + ms->allocated_keys = allocated_keys; ms->predicate = predicate; ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; + ms->count = make_invalid_specpdl_ref (); + if (allocated_keys != NULL) + merge_register_cleanup (ms); } @@ -408,8 +529,10 @@ merge_markmem (void *arg) if (ms->reloc.size != NULL && *ms->reloc.size > 0) { - eassume (ms->reloc.src != NULL); - mark_objects (*ms->reloc.src, *ms->reloc.size); + Lisp_Object *src = (ms->reloc.src->values + ? ms->reloc.src->values : ms->reloc.src->keys); + eassume (src != NULL); + mark_objects (src, *ms->reloc.size); } } @@ -432,16 +555,37 @@ cleanup_mem (void *arg) if (ms->reloc.order != 0 && *ms->reloc.size > 0) { - eassume (*ms->reloc.src != NULL && *ms->reloc.dst != NULL); + Lisp_Object *src = (ms->reloc.src->values + ? ms->reloc.src->values : ms->reloc.src->keys); + Lisp_Object *dst = (ms->reloc.dst->values + ? ms->reloc.dst->values : ms->reloc.dst->keys); + eassume (src != NULL && dst != NULL); ptrdiff_t n = *ms->reloc.size; ptrdiff_t shift = ms->reloc.order == -1 ? 0 : n - 1; - memcpy (*ms->reloc.dst - shift, *ms->reloc.src, n * word_size); + memcpy (dst - shift, src, n * word_size); } /* Free any remaining temp storage. */ - xfree (ms->a); + if (ms->a.keys != ms->temparray) + { + xfree (ms->a.keys); + ms->a.keys = NULL; + } + + if (ms->allocated_keys != NULL) + { + xfree (ms->allocated_keys); + ms->allocated_keys = NULL; + } } +static void +merge_register_cleanup (merge_state *ms) +{ + specpdl_ref count = SPECPDL_INDEX (); + record_unwind_protect_ptr_mark (cleanup_mem, ms, merge_markmem); + ms->count = count; +} /* Allocate enough temp memory for NEED array slots. Any previously allocated memory is first freed, and a cleanup routine is @@ -453,13 +597,12 @@ merge_getmem (merge_state *ms, const ptrdiff_t need) { eassume (ms != NULL); - if (ms->a == ms->temparray) + if (ms->a.keys == ms->temparray) { /* We only get here if alloc is needed and this is the first time, so we set up the unwind protection. */ - specpdl_ref count = SPECPDL_INDEX (); - record_unwind_protect_ptr_mark (cleanup_mem, ms, merge_markmem); - ms->count = count; + if (!specpdl_ref_valid_p (ms->count)) + merge_register_cleanup (ms); } else { @@ -467,10 +610,13 @@ merge_getmem (merge_state *ms, const ptrdiff_t need) what's in the block we don't use realloc which would waste cycles copying the old data. We just free and alloc again. */ - xfree (ms->a); + xfree (ms->a.keys); } - ms->a = xmalloc (need * word_size); + ptrdiff_t bytes = (need * word_size) << (ms->a.values != NULL ? 1 : 0); + ms->a.keys = xmalloc (bytes); ms->alloced = need; + if (ms->a.values != NULL) + ms->a.values = &ms->a.keys[need]; } @@ -488,21 +634,21 @@ needmem (merge_state *ms, ptrdiff_t na) NB. */ static void -merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb, - ptrdiff_t nb) +merge_lo (merge_state *ms, sortslice ssa, ptrdiff_t na, + sortslice ssb, ptrdiff_t nb) { Lisp_Object pred = ms->predicate; - eassume (ms && ssa && ssb && na > 0 && nb > 0); - eassume (ssa + na == ssb); + eassume (ms && ssa.keys && ssb.keys && na > 0 && nb > 0); + eassume (ssa.keys + na == ssb.keys); needmem (ms, na); - memcpy (ms->a, ssa, na * word_size); - Lisp_Object *dest = ssa; + sortslice_memcpy (&ms->a, 0, &ssa, 0, na); + sortslice dest = ssa; ssa = ms->a; ms->reloc = (struct reloc){&ssa, &dest, &na, -1}; - *dest++ = *ssb++; + sortslice_copy_incr (&dest, &ssb); --nb; if (nb == 0) goto Succeed; @@ -519,9 +665,9 @@ merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb, for (;;) { eassume (na > 1 && nb > 0); - if (inorder (pred, *ssb, *ssa)) + if (inorder (pred, ssb.keys[0], ssa.keys[0])) { - *dest++ = *ssb++ ; + sortslice_copy_incr (&dest, &ssb); ++bcount; acount = 0; --nb; @@ -532,7 +678,7 @@ merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb, } else { - *dest++ = *ssa++; + sortslice_copy_incr (&dest, &ssa); ++acount; bcount = 0; --na; @@ -552,13 +698,13 @@ merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb, eassume (na > 1 && nb > 0); min_gallop -= min_gallop > 1; ms->min_gallop = min_gallop; - ptrdiff_t k = gallop_right (ms, ssb[0], ssa, na, 0); + ptrdiff_t k = gallop_right (ms, ssb.keys[0], ssa.keys, na, 0); acount = k; if (k) { - memcpy (dest, ssa, k * word_size); - dest += k; - ssa += k; + sortslice_memcpy (&dest, 0, &ssa, 0, k); + sortslice_advance (&dest, k); + sortslice_advance (&ssa, k); na -= k; if (na == 1) goto CopyB; @@ -567,23 +713,23 @@ merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb, if (na == 0) goto Succeed; } - *dest++ = *ssb++ ; + sortslice_copy_incr (&dest, &ssb); --nb; if (nb == 0) goto Succeed; - k = gallop_left (ms, ssa[0], ssb, nb, 0); + k = gallop_left (ms, ssa.keys[0], ssb.keys, nb, 0); bcount = k; if (k) { - memmove (dest, ssb, k * word_size); - dest += k; - ssb += k; + sortslice_memmove (&dest, 0, &ssb, 0, k); + sortslice_advance (&dest, k); + sortslice_advance (&ssb, k); nb -= k; if (nb == 0) goto Succeed; } - *dest++ = *ssa++; + sortslice_copy_incr (&dest, &ssa); --na; if (na == 1) goto CopyB; @@ -595,15 +741,15 @@ merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb, ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; if (na) - memcpy (dest, ssa, na * word_size); + sortslice_memcpy(&dest, 0, &ssa, 0, na); return; CopyB: eassume (na == 1 && nb > 0); ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; /* The last element of ssa belongs at the end of the merge. */ - memmove (dest, ssb, nb * word_size); - dest[nb] = ssa[0]; + sortslice_memmove (&dest, 0, &ssb, 0, nb); + sortslice_copy (&dest, nb, &ssa, 0); } @@ -613,25 +759,27 @@ merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb, NB. */ static void -merge_hi (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, - Lisp_Object *ssb, ptrdiff_t nb) +merge_hi (merge_state *ms, sortslice ssa, ptrdiff_t na, + sortslice ssb, ptrdiff_t nb) { Lisp_Object pred = ms->predicate; - eassume (ms && ssa && ssb && na > 0 && nb > 0); - eassume (ssa + na == ssb); + eassume (ms && ssa.keys && ssb.keys && na > 0 && nb > 0); + eassume (ssa.keys + na == ssb.keys); needmem (ms, nb); - Lisp_Object *dest = ssb; - dest += nb - 1; - memcpy(ms->a, ssb, nb * word_size); - Lisp_Object *basea = ssa; - Lisp_Object *baseb = ms->a; - ssb = ms->a + nb - 1; - ssa += na - 1; + sortslice dest = ssb; + sortslice_advance (&dest, nb-1); + sortslice_memcpy (&ms->a, 0, &ssb, 0, nb); + sortslice basea = ssa; + sortslice baseb = ms->a; + ssb.keys = ms->a.keys + nb - 1; + if (ssb.values != NULL) + ssb.values = ms->a.values + nb - 1; + sortslice_advance (&ssa, na - 1); ms->reloc = (struct reloc){&baseb, &dest, &nb, 1}; - *dest-- = *ssa--; + sortslice_copy_decr (&dest, &ssa); --na; if (na == 0) goto Succeed; @@ -645,9 +793,9 @@ merge_hi (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, for (;;) { eassume (na > 0 && nb > 1); - if (inorder (pred, *ssb, *ssa)) + if (inorder (pred, ssb.keys[0], ssa.keys[0])) { - *dest-- = *ssa--; + sortslice_copy_decr (&dest, &ssa); ++acount; bcount = 0; --na; @@ -658,7 +806,7 @@ merge_hi (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, } else { - *dest-- = *ssb--; + sortslice_copy_decr (&dest, &ssb); ++bcount; acount = 0; --nb; @@ -677,31 +825,31 @@ merge_hi (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, eassume (na > 0 && nb > 1); min_gallop -= min_gallop > 1; ms->min_gallop = min_gallop; - ptrdiff_t k = gallop_right (ms, ssb[0], basea, na, na - 1); + ptrdiff_t k = gallop_right (ms, ssb.keys[0], basea.keys, na, na - 1); k = na - k; acount = k; if (k) { - dest += -k; - ssa += -k; - memmove(dest + 1, ssa + 1, k * word_size); + sortslice_advance (&dest, -k); + sortslice_advance (&ssa, -k); + sortslice_memmove (&dest, 1, &ssa, 1, k); na -= k; if (na == 0) goto Succeed; } - *dest-- = *ssb--; + sortslice_copy_decr(&dest, &ssb); --nb; if (nb == 1) goto CopyA; - k = gallop_left (ms, ssa[0], baseb, nb, nb - 1); + k = gallop_left (ms, ssa.keys[0], baseb.keys, nb, nb - 1); k = nb - k; bcount = k; if (k) { - dest += -k; - ssb += -k; - memcpy(dest + 1, ssb + 1, k * word_size); + sortslice_advance (&dest, -k); + sortslice_advance (&ssb, -k); + sortslice_memcpy (&dest, 1, &ssb, 1, k); nb -= k; if (nb == 1) goto CopyA; @@ -710,7 +858,7 @@ merge_hi (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, if (nb == 0) goto Succeed; } - *dest-- = *ssa--; + sortslice_copy_decr (&dest, &ssa); --na; if (na == 0) goto Succeed; @@ -721,16 +869,16 @@ merge_hi (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Succeed: ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; if (nb) - memcpy (dest - nb + 1, baseb, nb * word_size); + sortslice_memcpy (&dest, -(nb-1), &baseb, 0, nb); return; CopyA: eassume (nb == 1 && na > 0); ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; /* The first element of ssb belongs at the front of the merge. */ - memmove (dest + 1 - na, ssa + 1 - na, na * word_size); - dest += -na; - ssa += -na; - dest[0] = ssb[0]; + sortslice_memmove (&dest, 1-na, &ssa, 1-na, na); + sortslice_advance (&dest, -na); + sortslice_advance (&ssa, -na); + sortslice_copy (&dest, 0, &ssb, 0); } @@ -744,12 +892,12 @@ merge_at (merge_state *ms, const ptrdiff_t i) eassume (i >= 0); eassume (i == ms->n - 2 || i == ms->n - 3); - Lisp_Object *ssa = ms->pending[i].base; + sortslice ssa = ms->pending[i].base; ptrdiff_t na = ms->pending[i].len; - Lisp_Object *ssb = ms->pending[i + 1].base; + sortslice ssb = ms->pending[i + 1].base; ptrdiff_t nb = ms->pending[i + 1].len; eassume (na > 0 && nb > 0); - eassume (ssa + na == ssb); + eassume (ssa.keys + na == ssb.keys); /* Record the length of the combined runs. The current run i+1 goes away after the merge. If i is the 3rd-last run now, slide the @@ -761,16 +909,16 @@ merge_at (merge_state *ms, const ptrdiff_t i) /* Where does b start in a? Elements in a before that can be ignored (they are already in place). */ - ptrdiff_t k = gallop_right (ms, *ssb, ssa, na, 0); + ptrdiff_t k = gallop_right (ms, *ssb.keys, ssa.keys, na, 0); eassume (k >= 0); - ssa += k; + sortslice_advance (&ssa, k); na -= k; if (na == 0) return; /* Where does a end in b? Elements in b after that can be ignored (they are already in place). */ - nb = gallop_left (ms, ssa[na - 1], ssb, nb, nb - 1); + nb = gallop_left (ms, ssa.keys[na - 1], ssb.keys, nb, nb - 1); if (nb == 0) return; eassume (nb > 0); @@ -841,7 +989,7 @@ found_new_run (merge_state *ms, const ptrdiff_t n2) { eassume (ms->n > 0); struct stretch *p = ms->pending; - ptrdiff_t s1 = p[ms->n - 1].base - ms->listbase; + ptrdiff_t s1 = p[ms->n - 1].base.keys - ms->basekeys; ptrdiff_t n1 = p[ms->n - 1].len; int power = powerloop (s1, n1, n2, ms->listlen); while (ms->n > 1 && p[ms->n - 2].power > power) @@ -898,39 +1046,81 @@ merge_compute_minrun (ptrdiff_t n) static void -reverse_vector (Lisp_Object *s, const ptrdiff_t n) +reverse_sortslice (sortslice *s, const ptrdiff_t n) { - for (ptrdiff_t i = 0; i < n >> 1; i++) + reverse_slice(s->keys, &s->keys[n]); + if (s->values != NULL) + reverse_slice(s->values, &s->values[n]); +} + +static Lisp_Object +resolve_fun (Lisp_Object fun) +{ + if (SYMBOLP (fun)) { - Lisp_Object tem = s[i]; - s[i] = s[n - i - 1]; - s[n - i - 1] = tem; + /* Attempt to resolve the function as far as possible ahead of time, + to avoid having to do it for each call. */ + Lisp_Object f = XSYMBOL (fun)->u.s.function; + if (SYMBOLP (f)) + /* Function was an alias; use slow-path resolution. */ + f = indirect_function (f); + /* Don't resolve to an autoload spec; that would be very slow. */ + if (!NILP (f) && !(CONSP (f) && EQ (XCAR (f), Qautoload))) + fun = f; } + return fun; } /* Sort the array SEQ with LENGTH elements in the order determined by PREDICATE. */ void -tim_sort (Lisp_Object predicate, Lisp_Object *seq, const ptrdiff_t length) +tim_sort (Lisp_Object predicate, Lisp_Object keyfunc, + Lisp_Object *seq, const ptrdiff_t length) { - if (SYMBOLP (predicate)) + /* FIXME: optimise for the predicate being value<; at the very + least we'd go without the Lisp funcall overhead. */ + predicate = resolve_fun (predicate); + + sortslice lo; + Lisp_Object *keys; + Lisp_Object *allocated_keys = NULL; + merge_state ms; + + /* FIXME: hoist this to the caller? */ + if (EQ (keyfunc, Qidentity)) + keyfunc = Qnil; + + /* FIXME: consider a built-in reverse sorting flag: we would reverse + the input in-place here and reverse it back just before + returning. */ + + if (NILP (keyfunc)) { - /* Attempt to resolve the function as far as possible ahead of time, - to avoid having to do it for each call. */ - Lisp_Object fun = XSYMBOL (predicate)->u.s.function; - if (SYMBOLP (fun)) - /* Function was an alias; use slow-path resolution. */ - fun = indirect_function (fun); - /* Don't resolve to an autoload spec; that would be very slow. */ - if (!NILP (fun) && !(CONSP (fun) && EQ (XCAR (fun), Qautoload))) - predicate = fun; + keys = NULL; + lo.keys = seq; + lo.values = NULL; } + else + { + keyfunc = resolve_fun (keyfunc); + if (length < MERGESTATE_TEMP_SIZE / 2) + keys = &ms.temparray[length + 1]; + else + keys = allocated_keys = xmalloc (length * word_size); - merge_state ms; - Lisp_Object *lo = seq; + for (ptrdiff_t i = 0; i < length; i++) + keys[i] = call1 (keyfunc, seq[i]); + + lo.keys = keys; + lo.values = seq; + } + + /* FIXME: This is where we would check the keys for interesting + properties for more optimised comparison (such as all being fixnums + etc). */ - merge_init (&ms, length, lo, predicate); + merge_init (&ms, length, allocated_keys, &lo, predicate); /* March over the array once, left to right, finding natural runs, and extending short natural runs to minrun elements. */ @@ -940,18 +1130,19 @@ tim_sort (Lisp_Object predicate, Lisp_Object *seq, const ptrdiff_t length) bool descending; /* Identify the next run. */ - ptrdiff_t n = count_run (&ms, lo, lo + nremaining, &descending); + ptrdiff_t n = count_run (&ms, lo.keys, lo.keys + nremaining, &descending); if (descending) - reverse_vector (lo, n); + reverse_sortslice (&lo, n); /* If the run is short, extend it to min(minrun, nremaining). */ if (n < minrun) { const ptrdiff_t force = min (nremaining, minrun); - binarysort (&ms, lo, lo + force, lo + n); + binarysort (&ms, lo, lo.keys + force, lo.keys + n); n = force; } - eassume (ms.n == 0 || ms.pending[ms.n - 1].base + - ms.pending[ms.n - 1].len == lo); + eassume (ms.n == 0 + || (ms.pending[ms.n - 1].base.keys + ms.pending[ms.n - 1].len + == lo.keys)); found_new_run (&ms, n); /* Push the new run on to the stack. */ eassume (ms.n < MAX_MERGE_PENDING); @@ -959,7 +1150,7 @@ tim_sort (Lisp_Object predicate, Lisp_Object *seq, const ptrdiff_t length) ms.pending[ms.n].len = n; ++ms.n; /* Advance to find the next run. */ - lo += n; + sortslice_advance(&lo, n); nremaining -= n; } while (nremaining); @@ -968,6 +1159,6 @@ tim_sort (Lisp_Object predicate, Lisp_Object *seq, const ptrdiff_t length) eassume (ms.pending[0].len == length); lo = ms.pending[0].base; - if (ms.a != ms.temparray) + if (ms.a.keys != ms.temparray || allocated_keys != NULL) unbind_to (ms.count, Qnil); } commit 1232ab31c656b8564984a758957466f90ac10501 Author: Mattias Engdegård Date: Sun Mar 10 13:18:22 2024 +0100 Add `value<` (bug#69709) It's a general-purpose polymorphic ordering function, like `<` but for any two values of the same type. * src/data.c (syms_of_data): Add the `type-mismatch` error. (bits_word_to_host_endian): Move... * src/lisp.h (bits_word_to_host_endian): ...here, and declare inline. * src/fns.c (Fstring_lessp): Extract the bulk of this function to... (string_cmp): ...this 3-way comparison function, for use elsewhere. (bool_vector_cmp, value_cmp, Fvaluelt): New. * lisp/emacs-lisp/byte-opt.el (side-effect-free-fns, pure-fns): Add `value<`, which is pure and side-effect-free. * test/src/fns-tests.el (fns-value<-ordered, fns-value<-unordered) (fns-value<-type-mismatch, fns-value<-symbol-with-pos) (fns-value<-circle, ert-deftest fns-value<-bool-vector): New tests. * doc/lispref/sequences.texi (Sequence Functions): * doc/lispref/numbers.texi (Comparison of Numbers): * doc/lispref/strings.texi (Text Comparison): Document the new value< function. * etc/NEWS: Announce. diff --git a/doc/lispref/numbers.texi b/doc/lispref/numbers.texi index 99b456043b9..2c093ccd6bd 100644 --- a/doc/lispref/numbers.texi +++ b/doc/lispref/numbers.texi @@ -476,6 +476,7 @@ This function tests whether its arguments are numerically equal, and returns @code{t} if they are not, and @code{nil} if they are. @end defun +@anchor{definition of <} @defun < number-or-marker &rest number-or-markers This function tests whether each argument is strictly less than the following argument. It returns @code{t} if so, @code{nil} otherwise. diff --git a/doc/lispref/sequences.texi b/doc/lispref/sequences.texi index 74719d4779f..5bdf71fe02e 100644 --- a/doc/lispref/sequences.texi +++ b/doc/lispref/sequences.texi @@ -436,6 +436,41 @@ but their relative order is also preserved: @end example @end defun +@cindex comparing values +@cindex standard sorting order +@defun value< a b +This function returns non-@code{nil} if @var{a} comes before @var{b} in +the standard sorting order; this means that it returns @code{nil} when +@var{b} comes before @var{a}, or if they are equal or unordered. + +@var{a} and @var{b} must have the same type. Specifically: + +@itemize @bullet +@item +Numbers are compared using @code{<} (@pxref{definition of <}). +@item +Strings and symbols are compared using @code{string<} +(@pxref{definition of string<}). +@item +Conses, lists, vectors and records are compared lexicographically. +@item +Markers are compared first by buffer, then by position. +@item +Buffers and processes are compared by name. +@item +Other types are considered unordered and the return value will be @code{nil}. +@end itemize + +Examples: +@example +(value< -4 3.5) @result{} t +(value< "dog" "cat") @result{} nil +(value< 'yip 'yip) @result{} nil +(value< '(3 2) '(3 2 0)) @result{} t +(value< [3 2 1] [3 2 0]) @result{} nil +@end example +@end defun + Sometimes, computation of sort keys of list or vector elements is expensive, and therefore it is important to perform it the minimum number of times. By contrast, computing the sort keys of elements diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi index a2285098aad..6a9dd589237 100644 --- a/doc/lispref/strings.texi +++ b/doc/lispref/strings.texi @@ -612,6 +612,7 @@ that collation implements. @end defun @cindex lexical comparison of strings +@anchor{definition of string<} @defun string< string1 string2 @c (findex string< causes problems for permuted index!!) This function compares two strings a character at a time. It diff --git a/etc/NEWS b/etc/NEWS index 696d744e342..73ffff9f2d3 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1760,6 +1760,16 @@ precedence over the variable when present. Mostly used internally to do a kind of topological sort of inheritance hierarchies. ++++ +** New polymorphic comparison function 'value<'. +This function returns non-nil if the first argument is less than the +second. It works for any two values of the same type with reasonable +ordering for numbers, strings, symbols, bool-vectors, markers, buffers +and processes. Conses, lists, vectors and records are ordered +lexicographically. +It is intended as a convenient ordering predicate for sorting, and is +likely to be faster than hand-written Lisp functions. + ** New function 'sort-on'. This function implements the Schwartzian transform, and is appropriate for sorting lists when the computation of the sort key of a list diff --git a/lisp/emacs-lisp/byte-opt.el b/lisp/emacs-lisp/byte-opt.el index 54997205edb..ea163723a3e 100644 --- a/lisp/emacs-lisp/byte-opt.el +++ b/lisp/emacs-lisp/byte-opt.el @@ -1772,7 +1772,7 @@ See Info node `(elisp) Integer Basics'." string-version-lessp substring substring-no-properties sxhash-eq sxhash-eql sxhash-equal sxhash-equal-including-properties - take vconcat + take value< vconcat ;; frame.c frame-ancestor-p frame-bottom-divider-width frame-char-height frame-char-width frame-child-frame-border-width frame-focus @@ -1973,7 +1973,7 @@ See Info node `(elisp) Integer Basics'." hash-table-p identity length length< length= length> member memq memql nth nthcdr proper-list-p rassoc rassq safe-length string-bytes string-distance string-equal string-lessp - string-search string-version-lessp take + string-search string-version-lessp take value< ;; search.c regexp-quote ;; syntax.c diff --git a/src/data.c b/src/data.c index 69b990bed76..a86f86c52f5 100644 --- a/src/data.c +++ b/src/data.c @@ -3835,30 +3835,6 @@ count_trailing_zero_bits (bits_word val) } } -static bits_word -bits_word_to_host_endian (bits_word val) -{ -#ifndef WORDS_BIGENDIAN - return val; -#else - if (BITS_WORD_MAX >> 31 == 1) - return bswap_32 (val); - if (BITS_WORD_MAX >> 31 >> 31 >> 1 == 1) - return bswap_64 (val); - { - int i; - bits_word r = 0; - for (i = 0; i < sizeof val; i++) - { - r = ((r << 1 << (CHAR_BIT - 1)) - | (val & ((1u << 1 << (CHAR_BIT - 1)) - 1))); - val = val >> 1 >> (CHAR_BIT - 1); - } - return r; - } -#endif -} - DEFUN ("bool-vector-exclusive-or", Fbool_vector_exclusive_or, Sbool_vector_exclusive_or, 2, 3, 0, doc: /* Return A ^ B, bitwise exclusive or. @@ -4072,6 +4048,7 @@ syms_of_data (void) DEFSYM (Qminibuffer_quit, "minibuffer-quit"); DEFSYM (Qwrong_length_argument, "wrong-length-argument"); DEFSYM (Qwrong_type_argument, "wrong-type-argument"); + DEFSYM (Qtype_mismatch, "type-mismatch") DEFSYM (Qargs_out_of_range, "args-out-of-range"); DEFSYM (Qvoid_function, "void-function"); DEFSYM (Qcyclic_function_indirection, "cyclic-function-indirection"); @@ -4163,6 +4140,7 @@ syms_of_data (void) PUT_ERROR (Quser_error, error_tail, ""); PUT_ERROR (Qwrong_length_argument, error_tail, "Wrong length argument"); PUT_ERROR (Qwrong_type_argument, error_tail, "Wrong type argument"); + PUT_ERROR (Qtype_mismatch, error_tail, "Types do not match"); PUT_ERROR (Qargs_out_of_range, error_tail, "Args out of range"); PUT_ERROR (Qvoid_function, error_tail, "Symbol's function definition is void"); diff --git a/src/fns.c b/src/fns.c index 0a64e515402..7faf25b9088 100644 --- a/src/fns.c +++ b/src/fns.c @@ -27,6 +27,7 @@ along with GNU Emacs. If not, see . */ #include #include #include +#include #include "lisp.h" #include "bignum.h" @@ -466,21 +467,10 @@ load_unaligned_size_t (const void *p) return x; } -DEFUN ("string-lessp", Fstring_lessp, Sstring_lessp, 2, 2, 0, - doc: /* Return non-nil if STRING1 is less than STRING2 in lexicographic order. -Case is significant. -Symbols are also allowed; their print names are used instead. */) - (Lisp_Object string1, Lisp_Object string2) +/* Return -1/0/1 to indicate the relation between string1 and string2. */ +static int +string_cmp (Lisp_Object string1, Lisp_Object string2) { - if (SYMBOLP (string1)) - string1 = SYMBOL_NAME (string1); - else - CHECK_STRING (string1); - if (SYMBOLP (string2)) - string2 = SYMBOL_NAME (string2); - else - CHECK_STRING (string2); - ptrdiff_t n = min (SCHARS (string1), SCHARS (string2)); if ((!STRING_MULTIBYTE (string1) || SCHARS (string1) == SBYTES (string1)) @@ -489,7 +479,9 @@ Symbols are also allowed; their print names are used instead. */) /* Each argument is either unibyte or all-ASCII multibyte: we can compare bytewise. */ int d = memcmp (SSDATA (string1), SSDATA (string2), n); - return d < 0 || (d == 0 && n < SCHARS (string2)) ? Qt : Qnil; + if (d) + return d; + return n < SCHARS (string2) ? -1 : n > SCHARS (string2); } else if (STRING_MULTIBYTE (string1) && STRING_MULTIBYTE (string2)) { @@ -523,7 +515,7 @@ Symbols are also allowed; their print names are used instead. */) if (b >= nb) /* One string is a prefix of the other. */ - return b < nb2 ? Qt : Qnil; + return b < nb2 ? -1 : b > nb2; /* Now back up to the start of the differing characters: it's the last byte not having the bit pattern 10xxxxxx. */ @@ -535,7 +527,7 @@ Symbols are also allowed; their print names are used instead. */) ptrdiff_t i1_byte = b, i2_byte = b; int c1 = fetch_string_char_advance_no_check (string1, &i1, &i1_byte); int c2 = fetch_string_char_advance_no_check (string2, &i2, &i2_byte); - return c1 < c2 ? Qt : Qnil; + return c1 < c2 ? -1 : c1 > c2; } else if (STRING_MULTIBYTE (string1)) { @@ -546,9 +538,9 @@ Symbols are also allowed; their print names are used instead. */) int c1 = fetch_string_char_advance_no_check (string1, &i1, &i1_byte); int c2 = SREF (string2, i2++); if (c1 != c2) - return c1 < c2 ? Qt : Qnil; + return c1 < c2 ? -1 : 1; } - return i1 < SCHARS (string2) ? Qt : Qnil; + return i1 < SCHARS (string2) ? -1 : i1 > SCHARS (string2); } else { @@ -559,12 +551,30 @@ Symbols are also allowed; their print names are used instead. */) int c1 = SREF (string1, i1++); int c2 = fetch_string_char_advance_no_check (string2, &i2, &i2_byte); if (c1 != c2) - return c1 < c2 ? Qt : Qnil; + return c1 < c2 ? -1 : 1; } - return i1 < SCHARS (string2) ? Qt : Qnil; + return i1 < SCHARS (string2) ? -1 : i1 > SCHARS (string2); } } +DEFUN ("string-lessp", Fstring_lessp, Sstring_lessp, 2, 2, 0, + doc: /* Return non-nil if STRING1 is less than STRING2 in lexicographic order. +Case is significant. +Symbols are also allowed; their print names are used instead. */) + (Lisp_Object string1, Lisp_Object string2) +{ + if (SYMBOLP (string1)) + string1 = SYMBOL_NAME (string1); + else + CHECK_STRING (string1); + if (SYMBOLP (string2)) + string2 = SYMBOL_NAME (string2); + else + CHECK_STRING (string2); + + return string_cmp (string1, string2) < 0 ? Qt : Qnil; +} + DEFUN ("string-version-lessp", Fstring_version_lessp, Sstring_version_lessp, 2, 2, 0, doc: /* Return non-nil if S1 is less than S2, as version strings. @@ -2908,6 +2918,233 @@ internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind, return false; } + +/* Return -1/0/1 for the lexicographic relation between bool-vectors. */ +static int +bool_vector_cmp (Lisp_Object a, Lisp_Object b) +{ + ptrdiff_t na = bool_vector_size (a); + ptrdiff_t nb = bool_vector_size (b); + /* Skip equal words. */ + ptrdiff_t words_min = min (na, nb) / BITS_PER_BITS_WORD; + bits_word *ad = bool_vector_data (a); + bits_word *bd = bool_vector_data (b); + ptrdiff_t i = 0; + while (i < words_min && ad[i] == bd[i]) + i++; + na -= i * BITS_PER_BITS_WORD; + nb -= i * BITS_PER_BITS_WORD; + eassume (na >= 0 && nb >= 0); + if (nb == 0) + return na != 0; + if (na == 0) + return -1; + + bits_word aw = bits_word_to_host_endian (ad[i]); + bits_word bw = bits_word_to_host_endian (bd[i]); + bits_word xw = aw ^ bw; + if (xw == 0) + return na < nb ? -1 : na > nb; + + bits_word d = xw & -xw; /* Isolate first difference. */ + eassume (d != 0); + return (d & aw) ? 1 : -1; +} + +/* Return -1, 0 or 1 to indicate whether ab in the sense of value<. + In particular 0 does not mean equality in the sense of Fequal, only + that the arguments cannot be ordered yet they can be compared (same + type). */ +static int +value_cmp (Lisp_Object a, Lisp_Object b, int maxdepth) +{ + if (maxdepth < 0) + error ("Maximum depth exceeded in comparison"); + + tail_recurse: + /* Shortcut for a common case. */ + if (BASE_EQ (a, b)) + return 0; + + switch (XTYPE (a)) + { + case_Lisp_Int: + { + EMACS_INT ia = XFIXNUM (a); + if (FIXNUMP (b)) + return ia < XFIXNUM (b) ? -1 : 1; /* we know that a≠b */ + if (FLOATP (b)) + return ia < XFLOAT_DATA (b) ? -1 : ia > XFLOAT_DATA (b); + if (BIGNUMP (b)) + return -mpz_sgn (*xbignum_val (b)); + } + goto type_mismatch; + + case Lisp_Symbol: + if (BARE_SYMBOL_P (b)) + return string_cmp (XBARE_SYMBOL (a)->u.s.name, + XBARE_SYMBOL (b)->u.s.name); + if (CONSP (b) && NILP (a)) + return -1; + if (SYMBOLP (b)) + /* Slow-path branch when B is a symbol-with-pos. */ + return string_cmp (XBARE_SYMBOL (a)->u.s.name, XSYMBOL (b)->u.s.name); + goto type_mismatch; + + case Lisp_String: + if (STRINGP (b)) + return string_cmp (a, b); + goto type_mismatch; + + case Lisp_Cons: + /* FIXME: Optimise for difference in the first element? */ + FOR_EACH_TAIL (b) + { + int cmp = value_cmp (XCAR (a), XCAR (b), maxdepth - 1); + if (cmp != 0) + return cmp; + a = XCDR (a); + if (!CONSP (a)) + { + b = XCDR (b); + goto tail_recurse; + } + } + if (NILP (b)) + return 1; + else + goto type_mismatch; + goto tail_recurse; + + case Lisp_Vectorlike: + if (VECTORLIKEP (b)) + { + enum pvec_type ta = PSEUDOVECTOR_TYPE (XVECTOR (a)); + enum pvec_type tb = PSEUDOVECTOR_TYPE (XVECTOR (b)); + if (ta == tb) + switch (ta) + { + case PVEC_NORMAL_VECTOR: + case PVEC_RECORD: + { + ptrdiff_t len_a = ASIZE (a); + ptrdiff_t len_b = ASIZE (b); + if (ta == PVEC_RECORD) + { + len_a &= PSEUDOVECTOR_SIZE_MASK; + len_b &= PSEUDOVECTOR_SIZE_MASK; + } + ptrdiff_t len_min = min (len_a, len_b); + for (ptrdiff_t i = 0; i < len_min; i++) + { + int cmp = value_cmp (AREF (a, i), AREF (b, i), + maxdepth - 1); + if (cmp != 0) + return cmp; + } + return len_a < len_b ? -1 : len_a > len_b; + } + + case PVEC_BOOL_VECTOR: + return bool_vector_cmp (a, b); + + case PVEC_MARKER: + { + Lisp_Object buf_a = Fmarker_buffer (a); + Lisp_Object buf_b = Fmarker_buffer (b); + if (NILP (buf_a)) + return NILP (buf_b) ? 0 : -1; + if (NILP (buf_b)) + return 1; + int cmp = value_cmp (buf_a, buf_b, maxdepth - 1); + if (cmp != 0) + return cmp; + ptrdiff_t pa = XMARKER (a)->charpos; + ptrdiff_t pb = XMARKER (b)->charpos; + return pa < pb ? -1 : pa > pb; + } + + case PVEC_PROCESS: + a = Fprocess_name (a); + b = Fprocess_name (b); + goto tail_recurse; + + case PVEC_BUFFER: + { + /* Killed buffers lack names and sort before those alive. */ + Lisp_Object na = Fbuffer_name (a); + Lisp_Object nb = Fbuffer_name (b); + if (NILP (na)) + return NILP (nb) ? 0 : -1; + if (NILP (nb)) + return 1; + a = na; + b = nb; + goto tail_recurse; + } + + case PVEC_BIGNUM: + return mpz_cmp (*xbignum_val (a), *xbignum_val (b)); + + case PVEC_SYMBOL_WITH_POS: + /* Compare by name, enabled or not. */ + a = XSYMBOL_WITH_POS_SYM (a); + b = XSYMBOL_WITH_POS_SYM (b); + goto tail_recurse; + + default: + /* Treat other types as unordered. */ + return 0; + } + } + else if (BIGNUMP (a)) + return -value_cmp (b, a, maxdepth); + else if (SYMBOL_WITH_POS_P (a) && symbols_with_pos_enabled) + { + a = XSYMBOL_WITH_POS_SYM (a); + goto tail_recurse; + } + + goto type_mismatch; + + case Lisp_Float: + { + double fa = XFLOAT_DATA (a); + if (FLOATP (b)) + return fa < XFLOAT_DATA (b) ? -1 : fa > XFLOAT_DATA (b); + if (FIXNUMP (b)) + return fa < XFIXNUM (b) ? -1 : fa > XFIXNUM (b); + if (BIGNUMP (b)) + { + if (isnan (fa)) + return 0; + return -mpz_cmp_d (*xbignum_val (b), fa); + } + } + goto type_mismatch; + + default: + eassume (0); + } + type_mismatch: + xsignal2 (Qtype_mismatch, a, b); +} + +DEFUN ("value<", Fvaluelt, Svaluelt, 2, 2, 0, + doc: /* Return non-nil if A precedes B in standard value order. +A and B must have the same basic type. +Numbers are compared with `<'. +Strings and symbols are compared with `string-lessp'. +Lists, vectors, bool-vectors and records are compared lexicographically. +Markers are compared lexicographically by buffer and position. +Buffers and processes are compared by name. +Other types are considered unordered and the return value will be `nil'. */) + (Lisp_Object a, Lisp_Object b) +{ + int maxdepth = 20; /* FIXME: arbitrary value */ + return value_cmp (a, b, maxdepth) < 0 ? Qt : Qnil; +} + DEFUN ("fillarray", Ffillarray, Sfillarray, 2, 2, 0, @@ -6589,6 +6826,7 @@ For best results this should end in a space. */); defsubr (&Seql); defsubr (&Sequal); defsubr (&Sequal_including_properties); + defsubr (&Svaluelt); defsubr (&Sfillarray); defsubr (&Sclear_string); defsubr (&Snconc); diff --git a/src/lisp.h b/src/lisp.h index f86758c88fb..5583a7e2e8e 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -1882,6 +1882,30 @@ bool_vector_bytes (EMACS_INT size) return (size + BOOL_VECTOR_BITS_PER_CHAR - 1) / BOOL_VECTOR_BITS_PER_CHAR; } +INLINE bits_word +bits_word_to_host_endian (bits_word val) +{ +#ifndef WORDS_BIGENDIAN + return val; +#else + if (BITS_WORD_MAX >> 31 == 1) + return bswap_32 (val); + if (BITS_WORD_MAX >> 31 >> 31 >> 1 == 1) + return bswap_64 (val); + { + int i; + bits_word r = 0; + for (i = 0; i < sizeof val; i++) + { + r = ((r << 1 << (CHAR_BIT - 1)) + | (val & ((1u << 1 << (CHAR_BIT - 1)) - 1))); + val = val >> 1 >> (CHAR_BIT - 1); + } + return r; + } +#endif +} + INLINE bool BOOL_VECTOR_P (Lisp_Object a) { diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el index 7437c07f156..844000cdc76 100644 --- a/test/src/fns-tests.el +++ b/test/src/fns-tests.el @@ -1513,4 +1513,222 @@ (should-error (copy-alist "abc") :type 'wrong-type-argument)) +(ert-deftest fns-value<-ordered () + ;; values (X . Y) where X nil: `b' is now always a proper prefix of `a'. + (should-not (value< a b)) + (should (value< b a))) + (t + ;; nil -> t: `a' is now less than `b'. + (should (value< a b)) + (should-not (value< b a)))) + ;; Undo the flip. + (aset b i val))))))))))) + ;;; fns-tests.el ends here