Skip to content

Commit 856ccf9

Browse files
committed
URL-encode canonical URLs when they can include UTF8 characters
1 parent ce8b117 commit 856ccf9

File tree

5 files changed

+74
-6
lines changed

5 files changed

+74
-6
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ tower-service = "0.3.2"
9999
tower-http = { version = "0.3.4", features = ["trace"] }
100100
mime = "0.3.16"
101101
httpdate = "1.0.2"
102+
percent-encoding = "2.2.0"
102103

103104
# NOTE: if you change this, also double-check that the comment in `queue_builder::remove_tempdirs` is still accurate.
104105
tempfile = "3.1.0"

src/web/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ use iron::{
115115
Chain, Handler, Iron, IronError, IronResult, Listening, Request, Response, Url,
116116
};
117117
use page::TemplateData;
118+
use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
118119
use postgres::Client;
119120
use router::{NoRoute, TrailingSlash};
120121
use semver::{Version, VersionReq};
@@ -126,6 +127,15 @@ use tower::ServiceBuilder;
126127
use tower_http::trace::TraceLayer;
127128
use url::form_urlencoded;
128129

130+
// from https://github.com/servo/rust-url/blob/master/url/src/parser.rs
131+
// and https://github.com/tokio-rs/axum/blob/main/axum-extra/src/lib.rs
132+
const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
133+
const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');
134+
135+
pub(crate) fn encode_url_path(path: &str) -> String {
136+
utf8_percent_encode(path, PATH).to_string()
137+
}
138+
129139
/// Duration of static files for staticfile and DatabaseFileHandler (in seconds)
130140
const STATIC_FILE_CACHE_DURATION: u64 = 60 * 60 * 24 * 30 * 12; // 12 months
131141

src/web/rustdoc.rs

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use crate::{
1010
cache::CachePolicy,
1111
crate_details::CrateDetails,
1212
csp::Csp,
13+
encode_url_path,
1314
error::{AxumNope, AxumResult, Nope},
1415
file::File,
1516
match_version, match_version_axum,
@@ -571,7 +572,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult<Response> {
571572
let canonical_url = format!(
572573
"https://docs.rs/{}/latest/{}",
573574
name,
574-
inner_path.replace("index.html", "")
575+
encode_url_path(&inner_path.replace("index.html", ""))
575576
);
576577

577578
metrics
@@ -876,7 +877,11 @@ impl Handler for LegacySharedResourceHandler {
876877

877878
#[cfg(test)]
878879
mod test {
879-
use crate::{test::*, web::cache::CachePolicy, Config};
880+
use crate::{
881+
test::*,
882+
web::{cache::CachePolicy, encode_url_path},
883+
Config,
884+
};
880885
use anyhow::Context;
881886
use kuchiki::traits::TendrilSink;
882887
use reqwest::{blocking::ClientBuilder, redirect, StatusCode};
@@ -2347,11 +2352,13 @@ mod test {
23472352
.rustdoc_file("dummy_dash/index.html")
23482353
.create()?;
23492354

2355+
let utf8_filename = "序列化工具简单测试结果.html";
23502356
env.fake_release()
23512357
.name("dummy-docs")
23522358
.version("0.1.0")
23532359
.documentation_url(Some("https://docs.rs/foo".to_string()))
23542360
.rustdoc_file("dummy_docs/index.html")
2361+
.rustdoc_file(&format!("dummy_docs/{utf8_filename}"))
23552362
.create()?;
23562363

23572364
env.fake_release()
@@ -2384,6 +2391,20 @@ mod test {
23842391
.unwrap()
23852392
.contains("<https://docs.rs/dummy-docs/latest/dummy_docs/>; rel=\"canonical\""),);
23862393

2394+
assert_eq!(
2395+
web.get(&format!("/dummy-docs/0.1.0/dummy_docs/{utf8_filename}"))
2396+
.send()?
2397+
.headers()
2398+
.get("link")
2399+
.unwrap()
2400+
.to_str()
2401+
.unwrap(),
2402+
format!(
2403+
"<https://docs.rs/dummy-docs/latest/dummy_docs/{}>; rel=\"canonical\"",
2404+
encode_url_path(utf8_filename)
2405+
)
2406+
);
2407+
23872408
assert!(web
23882409
.get("/dummy-nodocs/0.1.0/dummy_nodocs/")
23892410
.send()?
@@ -2407,6 +2428,7 @@ mod test {
24072428
.unwrap()
24082429
.contains("<https://docs.rs/dummy-nodocs/latest/dummy_nodocs/struct.Foo.html>; rel=\"canonical\""),
24092430
);
2431+
24102432
Ok(())
24112433
})
24122434
}

src/web/source.rs

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ use crate::{
44
impl_axum_webpage,
55
utils::{get_correct_docsrs_style_file, spawn_blocking},
66
web::{
7-
cache::CachePolicy, error::AxumNope, file::File as DbFile, headers::CanonicalUrl,
8-
MatchSemver, MetaData,
7+
cache::CachePolicy, encode_url_path, error::AxumNope, file::File as DbFile,
8+
headers::CanonicalUrl, MatchSemver, MetaData,
99
},
1010
Storage,
1111
};
@@ -253,7 +253,11 @@ pub(crate) async fn source_browser_handler(
253253
})
254254
.await?;
255255

256-
let canonical_url = format!("https://docs.rs/crate/{}/latest/source/{}", name, path);
256+
let canonical_url = format!(
257+
"https://docs.rs/crate/{}/latest/source/{}",
258+
name,
259+
encode_url_path(&path),
260+
);
257261

258262
let (file, file_content) = if let Some(blob) = blob {
259263
let is_text = blob.mime.starts_with("text") || blob.mime == "application/json";
@@ -321,7 +325,7 @@ pub(crate) async fn source_browser_handler(
321325
#[cfg(test)]
322326
mod tests {
323327
use crate::test::*;
324-
use crate::web::cache::CachePolicy;
328+
use crate::web::{cache::CachePolicy, encode_url_path};
325329
use kuchiki::traits::TendrilSink;
326330
use reqwest::StatusCode;
327331
use test_case::test_case;
@@ -338,6 +342,36 @@ mod tests {
338342
.collect()
339343
}
340344

345+
#[test_case(true)]
346+
#[test_case(false)]
347+
fn fetch_source_file_utf8_path(archive_storage: bool) {
348+
wrapper(|env| {
349+
let filename = "序列化工具简单测试结果.pdf";
350+
351+
env.fake_release()
352+
.archive_storage(archive_storage)
353+
.name("fake")
354+
.version("0.1.0")
355+
.source_file(filename, b"some_random_content")
356+
.create()?;
357+
358+
let web = env.frontend();
359+
let response = web
360+
.get(&format!("/crate/fake/0.1.0/source/{filename}"))
361+
.send()?;
362+
assert!(response.status().is_success());
363+
assert_eq!(
364+
response.headers().get("link").unwrap(),
365+
&format!(
366+
"<https://docs.rs/crate/fake/latest/source/{}>; rel=\"canonical\"",
367+
encode_url_path(filename),
368+
)
369+
);
370+
assert!(response.text()?.contains("some_random_content"));
371+
Ok(())
372+
});
373+
}
374+
341375
#[test_case(true)]
342376
#[test_case(false)]
343377
fn fetch_source_file_content(archive_storage: bool) {

0 commit comments

Comments
 (0)