Skip to content

rustdoc search: prefer stable items in search results #141658

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/librustdoc/formats/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,7 @@ fn add_item_to_search_index(tcx: TyCtxt<'_>, cache: &mut Cache, item: &clean::It
search_type,
aliases,
deprecation,
is_unstable: item.stability(tcx).map(|x| x.is_unstable()).unwrap_or(false),
};
cache.search_index.push(index_item);
}
Expand Down
1 change: 1 addition & 0 deletions src/librustdoc/html/render/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ pub(crate) struct IndexItem {
pub(crate) search_type: Option<IndexItemFunctionType>,
pub(crate) aliases: Box<[Symbol]>,
pub(crate) deprecation: Option<Deprecation>,
pub(crate) is_unstable: bool,
}

/// A type used for the search index.
Expand Down
6 changes: 6 additions & 0 deletions src/librustdoc/html/render/search_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ pub(crate) fn build_index(
),
aliases: item.attrs.get_doc_aliases(),
deprecation: item.deprecation(tcx),
is_unstable: item.stability(tcx).map(|x| x.is_unstable()).unwrap_or(false),
});
}
}
Expand Down Expand Up @@ -642,6 +643,7 @@ pub(crate) fn build_index(
let mut parents_backref_queue = VecDeque::new();
let mut functions = String::with_capacity(self.items.len());
let mut deprecated = Vec::with_capacity(self.items.len());
let mut unstable = Vec::with_capacity(self.items.len());

let mut type_backref_queue = VecDeque::new();

Expand Down Expand Up @@ -698,6 +700,9 @@ pub(crate) fn build_index(
// bitmasks always use 1-indexing for items, with 0 as the crate itself
deprecated.push(u32::try_from(index + 1).unwrap());
}
if item.is_unstable {
unstable.push(u32::try_from(index + 1).unwrap());
}
}

for (index, path) in &revert_extra_paths {
Expand Down Expand Up @@ -736,6 +741,7 @@ pub(crate) fn build_index(
crate_data.serialize_field("r", &re_exports)?;
crate_data.serialize_field("b", &self.associated_item_disambiguators)?;
crate_data.serialize_field("c", &bitmap_to_string(&deprecated))?;
crate_data.serialize_field("u", &bitmap_to_string(&unstable))?;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add this field only if unstable is not empty?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could, although i'm pretty sure it would make most doc bundles bigger, as the logic for handling the undef field would be more than 6 bytes, even when minified (i believe this is why we don't omit other fields either, even tho many crates have no reexports or aliases)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, I'll keep that in a corner of my memory until I have some idea on how to make it better.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can get part of the way there by noticing js truthiness rules are just sane enough for this case, the only falsy string value is the empty string, so we can do | "" to cleanse the undef. we're still using a few chars for the field access though, so i think in order to make this profitable we have to put this in a loop and apply it to multiple fields. unfortunately our fields are not homogeneous, so we'll need a different default value for different fields, meaning we'd be barely breaking even in the average case.

honestly if we want to make the search index smaller, we're much better off chasing an improvement that scales, like delta compressing the row ids, instead of trying to save a single digit number of bytes.

crate_data.serialize_field("e", &bitmap_to_string(&self.empty_desc))?;
crate_data.serialize_field("P", &param_names)?;
if has_aliases {
Expand Down
5 changes: 4 additions & 1 deletion src/librustdoc/html/static/js/rustdoc.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ declare namespace rustdoc {

/**
* A single parsed "atom" in a search query. For example,
*
*
* std::fmt::Formatter, Write -> Result<()>
* ┏━━━━━━━━━━━━━━━━━━ ┌──── ┏━━━━━┅┅┅┅┄┄┄┄┄┄┄┄┄┄┄┄┄┄┐
* ┃ │ ┗ QueryElement { ┊
Expand Down Expand Up @@ -442,6 +442,8 @@ declare namespace rustdoc {
* of `p`) but is used for modules items like free functions.
*
* `c` is an array of item indices that are deprecated.
*
* `u` is an array of item indices that are unstable.
*/
type RawSearchIndexCrate = {
doc: string,
Expand All @@ -456,6 +458,7 @@ declare namespace rustdoc {
p: Array<[number, string] | [number, string, number] | [number, string, number, number] | [number, string, number, number, string]>,
b: Array<[number, String]>,
c: string,
u: string,
r: Array<[number, number]>,
P: Array<[number, string]>,
};
Expand Down
21 changes: 20 additions & 1 deletion src/librustdoc/html/static/js/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -1463,6 +1463,11 @@ class DocSearch {
* @type {Map<String, RoaringBitmap>}
*/
this.searchIndexEmptyDesc = new Map();
/**
* @type {Map<String, RoaringBitmap>}
*/
this.searchIndexUnstable = new Map();

/**
* @type {Uint32Array}
*/
Expand Down Expand Up @@ -2048,9 +2053,10 @@ class DocSearch {
};
const descShardList = [descShard];

// Deprecated items and items with no description
// Deprecated and unstable items and items with no description
this.searchIndexDeprecated.set(crate, new RoaringBitmap(crateCorpus.c));
this.searchIndexEmptyDesc.set(crate, new RoaringBitmap(crateCorpus.e));
this.searchIndexUnstable.set(crate, new RoaringBitmap(crateCorpus.u));
let descIndex = 0;

/**
Expand Down Expand Up @@ -3284,6 +3290,19 @@ class DocSearch {
return a - b;
}

// sort unstable items later
a = Number(
// @ts-expect-error
this.searchIndexUnstable.get(aaa.item.crate).contains(aaa.item.bitIndex),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of retrieving this information every time, why not storing this information in the related item when we build the search index?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, you're right, we should keep linear scans out of inner loops if possible.

if we have a binary search impl, or if we ever pull one in in the future, we could actually use that to speed up the unpacking time for the search index (since we can pre-sort them when generating the index), though it's probably not worth embedding a binary search impl for something that only speeds up the standard libraries.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, right, I copied this from the implementation of deprecated items sorting, so if we want to change one, we should probably change all of them (especially because those ones will be applicable to all crates).

Also, we have RoaringBitmap, so it's not a linear scan, actually.

I'm not sure what the memory impact of having 3 more boolean fields on each row is (especially when one of those is going to be unused in every non-sysroot crate), so I would kinda wanna do some degree of perf testing to see if this is worth it.

One thing we could do actually would be to add this data during transformResults, that way we're not adding a field to everything in the search index, only to the results that are currently being show, so we would only ever do 1 bitmap lookup per item per search, which may be better than doing it per comparsion, except for the fact that we're not doing it once per comparison, we're only doing it to compare when the items have the same edit distance, which is actually fairly likely to be less than the number of results.

TL;DR it's actually not obvious what the most performant solution would be, so I'd rather stick with what the code is already doing and then maybe make a followup PR focused on performance.

Copy link
Member

@GuillaumeGomez GuillaumeGomez Jun 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding this data when we build the search index seems like the way to go.

As for performance checking, @notriddle wrote a tool for it (which you can find here if I'm not wrong).

There are multiple ways of limiting the impact on performance. Another idea I had would be to put the new comparison in a function which would be created during search index creation: if we don't have any stability information, then the function is empty.

Anyway, it'll likely impact performance of all crates using rustdoc (both std/core/alloc and the others without stability info) so I'd really prefer to ensure what the impact is before approving it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another idea I had would be to put the new comparison in a function which would be created during search index creation: if we don't have any stability information, then the function is empty.

Is a dynamic function call faster than a searching an empty bitmap?
I suppose JITs have the advantage that they can inline closures dynamically, but I'm not sure how significant it would be.

I'm trying to run the perf tool, but i'm having a bit of trouble making a full custom toolchain, probably due to bootstrap settings.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got it "working" and I think it's a bit out of date:

Testing T ... TypeError: searchModule.execQuery is not a function
    at Object.doSearch (/home/binarycat/src/rs/rustdoc-js-profile/src/tester.js:229:37)
    at main (/home/binarycat/src/rs/rustdoc-js-profile/src/tester.js:316:49)
    at Object.<anonymous> (/home/binarycat/src/rs/rustdoc-js-profile/src/tester.js:327:1)
    at Module._compile (node:internal/modules/cjs/loader:1734:14)
    at Object..js (node:internal/modules/cjs/loader:1899:10)
    at Module.load (node:internal/modules/cjs/loader:1469:32)
    at Module._load (node:internal/modules/cjs/loader:1286:12)
    at TracingChannel.traceSync (node:diagnostics_channel:322:14)
    at wrapModuleLoad (node:internal/modules/cjs/loader:235:24)
    at Module.executeUserEntryPoint [as runMain] (node:internal/modules/run_main:152:5)

);
b = Number(
// @ts-expect-error
this.searchIndexUnstable.get(bbb.item.crate).contains(bbb.item.bitIndex),
);
if (a !== b) {
return a - b;
}

// sort by crate (current crate comes first)
a = Number(aaa.item.crate !== preferredCrate);
b = Number(bbb.item.crate !== preferredCrate);
Expand Down
2 changes: 1 addition & 1 deletion tests/rustdoc-js-std/core-transmute.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ const EXPECTED = [
{
'query': 'generic:T -> generic:U',
'others': [
{ 'path': 'core::mem', 'name': 'transmute' },
{ 'path': 'core::intrinsics::simd', 'name': 'simd_as' },
{ 'path': 'core::intrinsics::simd', 'name': 'simd_cast' },
{ 'path': 'core::mem', 'name': 'transmute' },
],
},
];
2 changes: 1 addition & 1 deletion tests/rustdoc-js-std/transmute-fail.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ const EXPECTED = [
// should-fail tag and the search query below:
'query': 'generic:T -> generic:T',
'others': [
{ 'path': 'std::mem', 'name': 'transmute' },
{ 'path': 'std::intrinsics::simd', 'name': 'simd_as' },
{ 'path': 'std::intrinsics::simd', 'name': 'simd_cast' },
{ 'path': 'std::mem', 'name': 'transmute' },
],
},
];
2 changes: 1 addition & 1 deletion tests/rustdoc-js-std/transmute.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ const EXPECTED = [
// should-fail tag and the search query below:
'query': 'generic:T -> generic:U',
'others': [
{ 'path': 'std::mem', 'name': 'transmute' },
{ 'path': 'std::intrinsics::simd', 'name': 'simd_as' },
{ 'path': 'std::intrinsics::simd', 'name': 'simd_cast' },
{ 'path': 'std::mem', 'name': 'transmute' },
],
},
];
9 changes: 9 additions & 0 deletions tests/rustdoc-js/sort-stability.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
const EXPECTED = [
{
'query': 'foo',
'others': [
{"path": "sort_stability::old", "name": "foo"},
{"path": "sort_stability::new", "name": "foo"},
],
},
];
16 changes: 16 additions & 0 deletions tests/rustdoc-js/sort-stability.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#![feature(staged_api)]
#![stable(feature = "foo_lib", since = "1.0.0")]

#[stable(feature = "old_foo", since = "1.0.1")]
pub mod old {
/// Old, stable foo
#[stable(feature = "old_foo", since = "1.0.1")]
pub fn foo() {}
}

#[unstable(feature = "new_foo", issue = "none")]
pub mod new {
/// New, unstable foo
#[unstable(feature = "new_foo", issue = "none")]
pub fn foo() {}
}
Loading