From eb0258d91885c0d298f98a2dd1020f1890204866 Mon Sep 17 00:00:00 2001 From: Jared Reyes Date: Sat, 7 Feb 2026 11:05:04 +1100 Subject: [PATCH 1/3] Add comprehensive fuzz targets for all workspace crates Add 7 fuzz targets covering the entire rust-url workspace: - fuzz_url_parse_roundtrip: URL parse/serialize roundtrip invariant checking - fuzz_url_differential: relative URL resolution and make_relative roundtrip - fuzz_url_setters: URL mutation via setters with validity invariant checks - fuzz_idna: IDNA domain_to_ascii/domain_to_unicode roundtrip + Punycode - fuzz_data_url: data: URL processing and base64 decoding - fuzz_form_urlencoded: form-urlencoded parse/serialize roundtrip - fuzz_percent_encoding: percent encode/decode roundtrip across ASCII sets Also includes: - Seed corpus with representative URL samples - Fuzzing dictionary for URL/IDNA/data-url tokens - CIFuzz workflow to fuzz all pull requests automatically --- .github/workflows/cifuzz.yml | 32 ++++++++ fuzz/Cargo.toml | 57 +++++++++++++ fuzz/corpus/seed/idna_01 | 1 + fuzz/corpus/seed/idna_02 | 1 + fuzz/corpus/seed/url_01 | 1 + fuzz/corpus/seed/url_02 | 1 + fuzz/corpus/seed/url_03 | 1 + fuzz/corpus/seed/url_04 | 1 + fuzz/corpus/seed/url_05 | 1 + fuzz/corpus/seed/url_06 | 1 + fuzz/corpus/seed/url_07 | 1 + fuzz/corpus/seed/url_08 | 1 + fuzz/corpus/seed/url_09 | 1 + fuzz/corpus/seed/url_10 | 1 + fuzz/fuzz.dict | 81 ++++++++++++++++++ fuzz/fuzz_targets/fuzz_data_url.rs | 48 +++++++++++ fuzz/fuzz_targets/fuzz_form_urlencoded.rs | 35 ++++++++ fuzz/fuzz_targets/fuzz_idna.rs | 64 +++++++++++++++ fuzz/fuzz_targets/fuzz_percent_encoding.rs | 82 +++++++++++++++++++ fuzz/fuzz_targets/fuzz_url_differential.rs | 55 +++++++++++++ fuzz/fuzz_targets/fuzz_url_parse_roundtrip.rs | 44 ++++++++++ fuzz/fuzz_targets/fuzz_url_setters.rs | 78 ++++++++++++++++++ 22 files changed, 588 insertions(+) create mode 100644 .github/workflows/cifuzz.yml create mode 100644 fuzz/Cargo.toml create mode 100644 fuzz/corpus/seed/idna_01 create mode 100644 fuzz/corpus/seed/idna_02 create mode 100644 fuzz/corpus/seed/url_01 create mode 100644 fuzz/corpus/seed/url_02 create mode 100644 fuzz/corpus/seed/url_03 create mode 100644 fuzz/corpus/seed/url_04 create mode 100644 fuzz/corpus/seed/url_05 create mode 100644 fuzz/corpus/seed/url_06 create mode 100644 fuzz/corpus/seed/url_07 create mode 100644 fuzz/corpus/seed/url_08 create mode 100644 fuzz/corpus/seed/url_09 create mode 100644 fuzz/corpus/seed/url_10 create mode 100644 fuzz/fuzz.dict create mode 100644 fuzz/fuzz_targets/fuzz_data_url.rs create mode 100644 fuzz/fuzz_targets/fuzz_form_urlencoded.rs create mode 100644 fuzz/fuzz_targets/fuzz_idna.rs create mode 100644 fuzz/fuzz_targets/fuzz_percent_encoding.rs create mode 100644 fuzz/fuzz_targets/fuzz_url_differential.rs create mode 100644 fuzz/fuzz_targets/fuzz_url_parse_roundtrip.rs create mode 100644 fuzz/fuzz_targets/fuzz_url_setters.rs diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml new file mode 100644 index 000000000..e9623e7cf --- /dev/null +++ b/.github/workflows/cifuzz.yml @@ -0,0 +1,32 @@ +name: CIFuzz +on: + pull_request: + branches: + - main + +permissions: {} + +jobs: + Fuzzing: + runs-on: ubuntu-latest + permissions: + security-events: write + steps: + - name: Build Fuzzers + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: "rust-url" + language: rust + - name: Run Fuzzers + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + oss-fuzz-project-name: "rust-url" + language: rust + fuzz-seconds: 600 + - name: Upload Crash + uses: actions/upload-artifact@v4 + if: failure() && steps.build.outcome == 'success' + with: + name: artifacts + path: ./out/artifacts diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 000000000..382e30ace --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,57 @@ +[package] +name = "rust-url-fuzz" +version = "0.0.1" +authors = ["Automatically generated"] +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +url = { path = "../url" } +idna = { path = "../idna", features = ["std"] } +percent-encoding = { path = "../percent_encoding", features = ["alloc"] } +form_urlencoded = { path = "../form_urlencoded", features = ["alloc"] } +data-url = { path = "../data-url", features = ["std"] } + +# --- Fuzz targets --- + +[[bin]] +name = "fuzz_url_parse_roundtrip" +path = "fuzz_targets/fuzz_url_parse_roundtrip.rs" +doc = false + +[[bin]] +name = "fuzz_url_differential" +path = "fuzz_targets/fuzz_url_differential.rs" +doc = false + +[[bin]] +name = "fuzz_url_setters" +path = "fuzz_targets/fuzz_url_setters.rs" +doc = false + +[[bin]] +name = "fuzz_idna" +path = "fuzz_targets/fuzz_idna.rs" +doc = false + +[[bin]] +name = "fuzz_data_url" +path = "fuzz_targets/fuzz_data_url.rs" +doc = false + +[[bin]] +name = "fuzz_form_urlencoded" +path = "fuzz_targets/fuzz_form_urlencoded.rs" +doc = false + +[[bin]] +name = "fuzz_percent_encoding" +path = "fuzz_targets/fuzz_percent_encoding.rs" +doc = false + +[workspace] +members = ["."] diff --git a/fuzz/corpus/seed/idna_01 b/fuzz/corpus/seed/idna_01 new file mode 100644 index 000000000..06c159d73 --- /dev/null +++ b/fuzz/corpus/seed/idna_01 @@ -0,0 +1 @@ +münchen.de \ No newline at end of file diff --git a/fuzz/corpus/seed/idna_02 b/fuzz/corpus/seed/idna_02 new file mode 100644 index 000000000..99b3b7437 --- /dev/null +++ b/fuzz/corpus/seed/idna_02 @@ -0,0 +1 @@ +xn--mnchen-3ya.de \ No newline at end of file diff --git a/fuzz/corpus/seed/url_01 b/fuzz/corpus/seed/url_01 new file mode 100644 index 000000000..bf54804e9 --- /dev/null +++ b/fuzz/corpus/seed/url_01 @@ -0,0 +1 @@ +https://example.com/path?query=value#fragment \ No newline at end of file diff --git a/fuzz/corpus/seed/url_02 b/fuzz/corpus/seed/url_02 new file mode 100644 index 000000000..dfd944647 --- /dev/null +++ b/fuzz/corpus/seed/url_02 @@ -0,0 +1 @@ +http://user:password@host.example.com:8080/path/to/resource?key=val&key2=val2#frag \ No newline at end of file diff --git a/fuzz/corpus/seed/url_03 b/fuzz/corpus/seed/url_03 new file mode 100644 index 000000000..e36d2e67a --- /dev/null +++ b/fuzz/corpus/seed/url_03 @@ -0,0 +1 @@ +ftp://ftp.example.com/pub/files/readme.txt \ No newline at end of file diff --git a/fuzz/corpus/seed/url_04 b/fuzz/corpus/seed/url_04 new file mode 100644 index 000000000..2609dbc7d --- /dev/null +++ b/fuzz/corpus/seed/url_04 @@ -0,0 +1 @@ +file:///tmp/local/file.txt \ No newline at end of file diff --git a/fuzz/corpus/seed/url_05 b/fuzz/corpus/seed/url_05 new file mode 100644 index 000000000..504138580 --- /dev/null +++ b/fuzz/corpus/seed/url_05 @@ -0,0 +1 @@ +https://[::1]:443/ipv6 \ No newline at end of file diff --git a/fuzz/corpus/seed/url_06 b/fuzz/corpus/seed/url_06 new file mode 100644 index 000000000..1927b5602 --- /dev/null +++ b/fuzz/corpus/seed/url_06 @@ -0,0 +1 @@ +https://xn--nxasmq6b.example.com/idn \ No newline at end of file diff --git a/fuzz/corpus/seed/url_07 b/fuzz/corpus/seed/url_07 new file mode 100644 index 000000000..a763ffe79 --- /dev/null +++ b/fuzz/corpus/seed/url_07 @@ -0,0 +1 @@ +data:text/plain;base64,SGVsbG8gV29ybGQh \ No newline at end of file diff --git a/fuzz/corpus/seed/url_08 b/fuzz/corpus/seed/url_08 new file mode 100644 index 000000000..59ca05f80 --- /dev/null +++ b/fuzz/corpus/seed/url_08 @@ -0,0 +1 @@ +data:text/html,%3Ch1%3EHello%3C%2Fh1%3E \ No newline at end of file diff --git a/fuzz/corpus/seed/url_09 b/fuzz/corpus/seed/url_09 new file mode 100644 index 000000000..c0644cc06 --- /dev/null +++ b/fuzz/corpus/seed/url_09 @@ -0,0 +1 @@ +https://example.com/path%20with%20spaces?q=%E4%B8%AD%E6%96%87 \ No newline at end of file diff --git a/fuzz/corpus/seed/url_10 b/fuzz/corpus/seed/url_10 new file mode 100644 index 000000000..ac7a50b37 --- /dev/null +++ b/fuzz/corpus/seed/url_10 @@ -0,0 +1 @@ +https://example.com/?foo=bar&baz=qux&empty=&key+with+plus=value+with+plus \ No newline at end of file diff --git a/fuzz/fuzz.dict b/fuzz/fuzz.dict new file mode 100644 index 000000000..ee9b77d6a --- /dev/null +++ b/fuzz/fuzz.dict @@ -0,0 +1,81 @@ +# URL schemes +"http://" +"https://" +"ftp://" +"file://" +"data:" +"blob:" +"ws://" +"wss://" +"custom://" + +# URL delimiters +"://" +":/" +"//" +"/" +"?" +"#" +"@" +":" +";" + +# Common URL components +"example.com" +"localhost" +"127.0.0.1" +"[::1]" +"[2001:db8::1]" +"0.0.0.0" + +# Percent encoding +"%00" +"%20" +"%25" +"%2F" +"%3A" +"%3F" +"%40" +"%23" +"%26" +"%3D" +"%C3%A9" +"%E4%B8%AD" + +# Form URL encoded +"&" +"=" +"+" +"key=value" +"a=b&c=d" + +# IDNA / Punycode +"xn--" +"xn--nxasmq6b" +".com" +".de" +".org" + +# Data URL +"data:," +"data:text/plain," +"data:text/plain;base64," +"data:text/html," +"data:application/octet-stream;base64," +";base64" +";charset=utf-8" +";charset=US-ASCII" + +# Base64 +"SGVsbG8=" +"AAAA" +"////+" + +# Special characters +"\t" +"\n" +"\r" +" " +"\\" +".." +"." diff --git a/fuzz/fuzz_targets/fuzz_data_url.rs b/fuzz/fuzz_targets/fuzz_data_url.rs new file mode 100644 index 000000000..774737afe --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_data_url.rs @@ -0,0 +1,48 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use std::str; + +fuzz_target!(|data: &[u8]| { + let Ok(utf8) = str::from_utf8(data) else { + return; + }; + + let Ok(data_url) = data_url::DataUrl::process(utf8) else { + return; + }; + + // Access MIME type (should not panic) + let mime = data_url.mime_type(); + let _ = mime.type_.len(); + let _ = mime.subtype.len(); + for (name, value) in &mime.parameters { + let _ = name.len(); + let _ = value.len(); + } + + // Decode body (should not panic) + match data_url.decode_to_vec() { + Ok((body, fragment)) => { + // Body must be valid bytes + let _ = body.len(); + if let Some(frag) = fragment { + // Fragment percent-encoding should produce valid UTF-8 + let _ = frag.to_percent_encoded(); + } + } + Err(_) => { + // Base64 decode errors are expected for malformed input + } + } + + // Test streaming decode + let mut chunks = Vec::new(); + let _ = data_url.decode(|bytes| { + chunks.push(bytes.to_vec()); + Ok::<(), std::convert::Infallible>(()) + }); + + // Test forgiving_base64 directly + let _ = data_url::forgiving_base64::decode_to_vec(data); +}); diff --git a/fuzz/fuzz_targets/fuzz_form_urlencoded.rs b/fuzz/fuzz_targets/fuzz_form_urlencoded.rs new file mode 100644 index 000000000..673e4998f --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_form_urlencoded.rs @@ -0,0 +1,35 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + // Parse the input as form-urlencoded data + let pairs: Vec<(String, String)> = form_urlencoded::parse(data) + .into_owned() + .collect(); + + // Roundtrip invariant: serialize and re-parse should produce the same pairs + let mut serializer = form_urlencoded::Serializer::new(String::new()); + for (name, value) in &pairs { + serializer.append_pair(name, value); + } + let serialized = serializer.finish(); + + let reparsed: Vec<(String, String)> = form_urlencoded::parse(serialized.as_bytes()) + .into_owned() + .collect(); + + // The key insight: form_urlencoded uses lossy UTF-8 decoding, + // so we need to compare the parsed pairs (not raw bytes). + // After one roundtrip through parse->serialize->parse, the result should be stable. + assert_eq!( + pairs, reparsed, + "form_urlencoded roundtrip mismatch: serialized={:?}", + serialized + ); + + // Test byte_serialize roundtrip + let byte_serialized: String = form_urlencoded::byte_serialize(data).collect(); + // byte_serialize output should be valid UTF-8 (it produces &str slices) + let _ = byte_serialized.len(); +}); diff --git a/fuzz/fuzz_targets/fuzz_idna.rs b/fuzz/fuzz_targets/fuzz_idna.rs new file mode 100644 index 000000000..3e36b9328 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_idna.rs @@ -0,0 +1,64 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use std::str; + +fuzz_target!(|data: &[u8]| { + // Test domain_to_ascii_cow (primary entry point, takes &[u8]) + let _ = idna::domain_to_ascii_cow(data, idna::AsciiDenyList::URL); + let _ = idna::domain_to_ascii_cow(data, idna::AsciiDenyList::EMPTY); + let _ = idna::domain_to_ascii_cow(data, idna::AsciiDenyList::STD3); + + let Ok(utf8) = str::from_utf8(data) else { + return; + }; + + // Test domain_to_ascii (takes &str) + let ascii_result = idna::domain_to_ascii(utf8); + let strict_result = idna::domain_to_ascii_strict(utf8); + + // Roundtrip invariant: if we can convert to ASCII, converting to Unicode + // and back to ASCII should produce the same result + if let Ok(ref ascii) = ascii_result { + let (unicode, unicode_result) = idna::domain_to_unicode(ascii); + if unicode_result.is_ok() { + if let Ok(back_to_ascii) = idna::domain_to_ascii(&unicode) { + assert_eq!( + ascii.to_lowercase(), + back_to_ascii.to_lowercase(), + "IDNA roundtrip mismatch: input={:?}, ascii={:?}, unicode={:?}, back={:?}", + utf8, + ascii, + unicode, + back_to_ascii + ); + } + } + } + + // Consistency: strict mode should be a subset of non-strict + if strict_result.is_ok() { + assert!( + ascii_result.is_ok(), + "strict succeeded but non-strict failed for {:?}", + utf8 + ); + } + + // Test domain_to_unicode + let (unicode_str, _result) = idna::domain_to_unicode(utf8); + + // The Unicode result should itself be valid UTF-8 (it's a String) + let _ = unicode_str.len(); + + // Test Punycode encode/decode roundtrip + if let Some(encoded) = idna::punycode::encode_str(utf8) { + if let Some(decoded) = idna::punycode::decode_to_string(&encoded) { + assert_eq!( + utf8, decoded, + "Punycode roundtrip mismatch: input={:?}, encoded={:?}, decoded={:?}", + utf8, encoded, decoded + ); + } + } +}); diff --git a/fuzz/fuzz_targets/fuzz_percent_encoding.rs b/fuzz/fuzz_targets/fuzz_percent_encoding.rs new file mode 100644 index 000000000..a4345526f --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_percent_encoding.rs @@ -0,0 +1,82 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use percent_encoding::{ + percent_decode, percent_decode_str, percent_encode, utf8_percent_encode, AsciiSet, CONTROLS, + NON_ALPHANUMERIC, +}; +use std::borrow::Cow; +use std::str; + +/// https://url.spec.whatwg.org/#fragment-percent-encode-set +const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); + +/// https://url.spec.whatwg.org/#path-percent-encode-set +const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}'); + +/// https://url.spec.whatwg.org/#userinfo-percent-encode-set +const USERINFO: &AsciiSet = &PATH + .add(b'/') + .add(b':') + .add(b';') + .add(b'=') + .add(b'@') + .add(b'[') + .add(b'\\') + .add(b']') + .add(b'^') + .add(b'|'); + +fuzz_target!(|data: &[u8]| { + if data.is_empty() { + return; + } + + let ascii_sets: [&AsciiSet; 4] = [&CONTROLS, NON_ALPHANUMERIC, FRAGMENT, USERINFO]; + let set_idx = data[0] as usize % ascii_sets.len(); + let ascii_set = ascii_sets[set_idx]; + let input = &data[1..]; + + // Test percent_encode -> percent_decode roundtrip + let encoded: Cow = percent_encode(input, ascii_set).into(); + + // Encoded output must be valid UTF-8 (it's a Cow) + let _ = encoded.len(); + + // Decode the encoded result + let decoded: Cow<[u8]> = percent_decode(encoded.as_bytes()).into(); + assert_eq!( + &*decoded, input, + "percent_encode/decode roundtrip mismatch with set index {}", + set_idx + ); + + // Test UTF-8 path: if input is valid UTF-8, utf8_percent_encode should work too + if let Ok(utf8_input) = str::from_utf8(input) { + let utf8_encoded = utf8_percent_encode(utf8_input, ascii_set).to_string(); + + // Decode should recover original + let utf8_decoded = percent_decode_str(&utf8_encoded) + .decode_utf8() + .expect("decoding percent-encoded UTF-8 must produce valid UTF-8"); + assert_eq!( + utf8_input, &*utf8_decoded, + "utf8_percent_encode roundtrip mismatch" + ); + } + + // Test percent_decode directly on raw input + let direct_decoded: Cow<[u8]> = percent_decode(input).into(); + // Re-encoding the decoded bytes and decoding again should be stable + let re_encoded: Cow = percent_encode(&direct_decoded, ascii_set).into(); + let re_decoded: Cow<[u8]> = percent_decode(re_encoded.as_bytes()).into(); + assert_eq!( + &*direct_decoded, &*re_decoded, + "double roundtrip mismatch" + ); + + // Test percent_decode_str if input is valid UTF-8 + if let Ok(utf8_input) = str::from_utf8(input) { + let _ = percent_decode_str(utf8_input).decode_utf8_lossy(); + } +}); diff --git a/fuzz/fuzz_targets/fuzz_url_differential.rs b/fuzz/fuzz_targets/fuzz_url_differential.rs new file mode 100644 index 000000000..fc86581cb --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_url_differential.rs @@ -0,0 +1,55 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use std::str; +use url::Url; + +fuzz_target!(|data: &[u8]| { + if data.len() < 2 { + return; + } + + let Ok(utf8) = str::from_utf8(data) else { + return; + }; + + // Split input into a base URL part and a relative part + let split = (data[0] as usize) % utf8.len().max(1); + let (base_str, relative_str) = utf8.split_at(split); + + // Try parsing base as absolute URL + let Ok(base) = Url::parse(base_str) else { + return; + }; + + // Test relative URL resolution + if let Ok(resolved) = base.join(relative_str) { + // The resolved URL must be valid + let serialized = resolved.as_str(); + let reparsed = + Url::parse(serialized).expect("re-parsing a resolved URL must succeed"); + assert_eq!(resolved.as_str(), reparsed.as_str()); + + // make_relative + join should roundtrip for non-opaque paths + if !base.cannot_be_a_base() && !resolved.cannot_be_a_base() { + if let Some(relative) = resolved.make_relative(&base) { + // Re-resolving the relative URL from base should give the same result + if let Ok(re_resolved) = base.join(&relative) { + // Scheme and host should match + assert_eq!(re_resolved.scheme(), resolved.scheme()); + assert_eq!(re_resolved.host_str(), resolved.host_str()); + } + } + } + } + + // Test parse_with_params + if utf8.len() < 500 { + let params = [("key", "value"), ("a", "b")]; + if let Ok(with_params) = Url::parse_with_params(utf8, ¶ms) { + let query = with_params.query().unwrap_or(""); + assert!(query.contains("key=value")); + assert!(query.contains("a=b")); + } + } +}); diff --git a/fuzz/fuzz_targets/fuzz_url_parse_roundtrip.rs b/fuzz/fuzz_targets/fuzz_url_parse_roundtrip.rs new file mode 100644 index 000000000..8df8ef62a --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_url_parse_roundtrip.rs @@ -0,0 +1,44 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use std::str; +use url::Url; + +fuzz_target!(|data: &[u8]| { + let Ok(utf8) = str::from_utf8(data) else { + return; + }; + + // Parse the input as a URL + let Ok(parsed) = Url::parse(utf8) else { + return; + }; + + // Roundtrip invariant: serializing and re-parsing must produce the same URL + let serialized = parsed.as_str(); + let reparsed = Url::parse(serialized).expect("re-parsing a serialized URL must succeed"); + assert_eq!( + parsed.as_str(), + reparsed.as_str(), + "roundtrip mismatch for input: {:?}", + utf8 + ); + + // Component invariant: individual components must be consistent + assert_eq!(parsed.scheme(), reparsed.scheme()); + assert_eq!(parsed.username(), reparsed.username()); + assert_eq!(parsed.password(), reparsed.password()); + assert_eq!(parsed.host_str(), reparsed.host_str()); + assert_eq!(parsed.port(), reparsed.port()); + assert_eq!(parsed.path(), reparsed.path()); + assert_eq!(parsed.query(), reparsed.query()); + assert_eq!(parsed.fragment(), reparsed.fragment()); + + // Join invariant: joining an absolute URL with itself yields the same URL + if let Ok(joined) = parsed.join(serialized) { + assert_eq!(joined.as_str(), serialized); + } + + // Origin consistency + let _ = parsed.origin(); +}); diff --git a/fuzz/fuzz_targets/fuzz_url_setters.rs b/fuzz/fuzz_targets/fuzz_url_setters.rs new file mode 100644 index 000000000..7402d04d2 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_url_setters.rs @@ -0,0 +1,78 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use std::str; +use url::Url; + +fuzz_target!(|data: &[u8]| { + if data.len() < 3 { + return; + } + + let Ok(utf8) = str::from_utf8(&data[2..]) else { + return; + }; + + // Use first byte to select a base URL, second byte to select which setter to test + let base_urls = [ + "https://example.com/path?query#fragment", + "http://user:pass@host:8080/a/b/c", + "ftp://files.example.com/pub", + "file:///tmp/test", + "custom://example", + ]; + + let base_idx = data[0] as usize % base_urls.len(); + let setter_idx = data[1] % 10; + + let mut url = Url::parse(base_urls[base_idx]).unwrap(); + let original = url.as_str().to_string(); + + match setter_idx { + 0 => { + let _ = url.set_scheme(utf8); + } + 1 => { + let _ = url.set_host(Some(utf8)); + } + 2 => { + let _ = url.set_host(None); + } + 3 => { + let _ = url.set_username(utf8); + } + 4 => { + let _ = url.set_password(Some(utf8)); + } + 5 => { + url.set_path(utf8); + } + 6 => { + url.set_query(Some(utf8)); + } + 7 => { + url.set_fragment(Some(utf8)); + } + 8 => { + if let Ok(port) = utf8.parse::() { + let _ = url.set_port(Some(port)); + } + } + 9 => { + if let Ok(mut segs) = url.path_segments_mut() { + segs.push(utf8); + } + } + _ => {} + } + + // After mutation, the URL must still be valid (roundtrip) + let modified = url.as_str().to_string(); + let reparsed = Url::parse(&modified).unwrap_or_else(|e| { + panic!( + "URL became invalid after mutation: {:?}\noriginal: {}\nmodified: {}\nerror: {}", + setter_idx, original, modified, e + ); + }); + assert_eq!(url.as_str(), reparsed.as_str()); +}); From 929d07a79a62c29d94f641087b2bab0854a6eb62 Mon Sep 17 00:00:00 2001 From: Jared Reyes Date: Sat, 7 Feb 2026 11:25:03 +1100 Subject: [PATCH 2/3] Fix false positives in fuzz targets found during local fuzzing - fuzz_percent_encoding: use NON_ALPHANUMERIC for roundtrip assertions since it encodes '%', preventing spurious decode mismatches - fuzz_url_differential: use char_indices() to split UTF-8 input on valid character boundaries, preventing panics on multi-byte chars - fuzz.dict: replace C-style escapes (\t, \n, \r, \\) with \xHH hex escapes required by libfuzzer dictionary format --- fuzz/fuzz.dict | 8 +++--- fuzz/fuzz_targets/fuzz_percent_encoding.rs | 32 +++++++++++----------- fuzz/fuzz_targets/fuzz_url_differential.rs | 7 ++++- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/fuzz/fuzz.dict b/fuzz/fuzz.dict index ee9b77d6a..023cd90ea 100644 --- a/fuzz/fuzz.dict +++ b/fuzz/fuzz.dict @@ -72,10 +72,10 @@ "////+" # Special characters -"\t" -"\n" -"\r" +"\x09" +"\x0a" +"\x0d" " " -"\\" +"\x5c" ".." "." diff --git a/fuzz/fuzz_targets/fuzz_percent_encoding.rs b/fuzz/fuzz_targets/fuzz_percent_encoding.rs index a4345526f..7b178a661 100644 --- a/fuzz/fuzz_targets/fuzz_percent_encoding.rs +++ b/fuzz/fuzz_targets/fuzz_percent_encoding.rs @@ -32,30 +32,30 @@ fuzz_target!(|data: &[u8]| { return; } + // Use NON_ALPHANUMERIC for roundtrip tests since it includes '%', + // ensuring encode→decode is a true roundtrip. Sets that don't encode '%' + // will cause percent_decode to interpret literal %XX in the input. let ascii_sets: [&AsciiSet; 4] = [&CONTROLS, NON_ALPHANUMERIC, FRAGMENT, USERINFO]; let set_idx = data[0] as usize % ascii_sets.len(); let ascii_set = ascii_sets[set_idx]; let input = &data[1..]; - // Test percent_encode -> percent_decode roundtrip - let encoded: Cow = percent_encode(input, ascii_set).into(); - - // Encoded output must be valid UTF-8 (it's a Cow) - let _ = encoded.len(); - - // Decode the encoded result - let decoded: Cow<[u8]> = percent_decode(encoded.as_bytes()).into(); + // Test percent_encode -> percent_decode roundtrip with NON_ALPHANUMERIC + // (which encodes '%', guaranteeing a clean roundtrip) + let safe_encoded: Cow = percent_encode(input, NON_ALPHANUMERIC).into(); + let safe_decoded: Cow<[u8]> = percent_decode(safe_encoded.as_bytes()).into(); assert_eq!( - &*decoded, input, - "percent_encode/decode roundtrip mismatch with set index {}", - set_idx + &*safe_decoded, input, + "percent_encode/decode roundtrip mismatch with NON_ALPHANUMERIC" ); + // Test that encoding with the selected set produces valid output + let encoded: Cow = percent_encode(input, ascii_set).into(); + let _ = encoded.len(); + // Test UTF-8 path: if input is valid UTF-8, utf8_percent_encode should work too if let Ok(utf8_input) = str::from_utf8(input) { - let utf8_encoded = utf8_percent_encode(utf8_input, ascii_set).to_string(); - - // Decode should recover original + let utf8_encoded = utf8_percent_encode(utf8_input, NON_ALPHANUMERIC).to_string(); let utf8_decoded = percent_decode_str(&utf8_encoded) .decode_utf8() .expect("decoding percent-encoded UTF-8 must produce valid UTF-8"); @@ -67,8 +67,8 @@ fuzz_target!(|data: &[u8]| { // Test percent_decode directly on raw input let direct_decoded: Cow<[u8]> = percent_decode(input).into(); - // Re-encoding the decoded bytes and decoding again should be stable - let re_encoded: Cow = percent_encode(&direct_decoded, ascii_set).into(); + // Re-encoding with NON_ALPHANUMERIC and decoding again should be stable + let re_encoded: Cow = percent_encode(&direct_decoded, NON_ALPHANUMERIC).into(); let re_decoded: Cow<[u8]> = percent_decode(re_encoded.as_bytes()).into(); assert_eq!( &*direct_decoded, &*re_decoded, diff --git a/fuzz/fuzz_targets/fuzz_url_differential.rs b/fuzz/fuzz_targets/fuzz_url_differential.rs index fc86581cb..ee97207cd 100644 --- a/fuzz/fuzz_targets/fuzz_url_differential.rs +++ b/fuzz/fuzz_targets/fuzz_url_differential.rs @@ -13,8 +13,13 @@ fuzz_target!(|data: &[u8]| { return; }; - // Split input into a base URL part and a relative part + // Split input into a base URL part and a relative part. + // Ensure we split on a char boundary. let split = (data[0] as usize) % utf8.len().max(1); + let split = match utf8.char_indices().find(|&(i, _)| i >= split) { + Some((i, _)) => i, + None => utf8.len(), + }; let (base_str, relative_str) = utf8.split_at(split); // Try parsing base as absolute URL From 3c7adca303fc624288b62fd5947290e0376e0721 Mon Sep 17 00:00:00 2001 From: Jared Reyes Date: Sat, 7 Feb 2026 14:15:44 +1100 Subject: [PATCH 3/3] Fix file:// URL roundtrip bugs (#1101, #1102) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit fixes two bugs found through fuzzing that caused file:// URLs to fail roundtrip tests (parse → serialize → parse). Bug #1101: File URLs with hosts and paths starting with multiple slashes were losing their host component during roundtrip. The path normalization logic was too aggressive in stripping leading slashes, which changed how the URL was interpreted on re-parsing. Fix: Preserve path structure when a host component is present, only normalizing leading slashes for hostless file:// URLs. Bug #1102: Calling set_host("localhost") on file:// URLs didn't apply the same normalization as the parser, which converts "localhost" to an empty host per WHATWG spec. Fix: Normalize "localhost" to empty host in set_host() for file:// URLs, matching parser behavior. Both fixes improve WHATWG URL spec compliance and resolve 4 previously failing Web Platform Tests: - file://spider/// - file://monkey/ with pathname set to \\\\ - file:///unicorn with pathname set to //\\/ - file:///unicorn with pathname set to //monkey/..// --- url/src/lib.rs | 12 +++++++++ url/src/parser.rs | 27 +++++++++++++++++---- url/tests/expected_failures.txt | 4 --- url/tests/roundtrip_bugs.rs | 43 +++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 9 deletions(-) create mode 100644 url/tests/roundtrip_bugs.rs diff --git a/url/src/lib.rs b/url/src/lib.rs index f1558682b..0f76ff14a 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -2025,6 +2025,18 @@ impl Url { let scheme_type = SchemeType::from(self.scheme()); + // Normalize "localhost" to None for file:// URLs per WHATWG spec + // This matches the behavior of the URL parser + let host = if let Some(h) = host { + if scheme_type.is_file() && h.eq_ignore_ascii_case("localhost") { + None + } else { + Some(h) + } + } else { + None + }; + if let Some(host) = host { if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() { return Err(ParseError::EmptyHost); diff --git a/url/src/parser.rs b/url/src/parser.rs index dbdf9b906..295845101 100644 --- a/url/src/parser.rs +++ b/url/src/parser.rs @@ -1369,13 +1369,30 @@ impl Parser<'_> { } } if scheme_type.is_file() { - // while url’s path’s size is greater than 1 - // and url’s path[0] is the empty string, - // validation error, remove the first item from url’s path. + // while url's path's size is greater than 1 + // and url's path[0] is the empty string, + // validation error, remove the first item from url's path. //FIXME: log violation let path = self.serialization.split_off(path_start); - self.serialization.push('/'); - self.serialization.push_str(path.trim_start_matches('/')); + // When there's no host, normalize by removing all leading slashes + // and adding back a single one. When there's a host, preserve + // the path structure for correct roundtripping, but still ensure + // it starts with a single slash. + if path.starts_with('/') { + // Path already has slashes - preserve structure when host exists + if *has_host { + // Keep the path as-is for roundtrip correctness + self.serialization.push_str(&path); + } else { + // No host - normalize to remove redundant leading slashes + self.serialization.push('/'); + self.serialization.push_str(path.trim_start_matches('/')); + } + } else { + // Path doesn't start with slash - add one + self.serialization.push('/'); + self.serialization.push_str(&path); + } } input diff --git a/url/tests/expected_failures.txt b/url/tests/expected_failures.txt index 8d4407c45..ac9ee1e79 100644 --- a/url/tests/expected_failures.txt +++ b/url/tests/expected_failures.txt @@ -3,7 +3,6 @@ - @@ -38,9 +37,6 @@ set hostname to <> set pathname to <> set href to - set pathname to <\\\\> - set pathname to - set pathname to set pathname to set pathname to set pathname to diff --git a/url/tests/roundtrip_bugs.rs b/url/tests/roundtrip_bugs.rs new file mode 100644 index 000000000..30e58019f --- /dev/null +++ b/url/tests/roundtrip_bugs.rs @@ -0,0 +1,43 @@ +// Reproduction tests for bugs #1101 and #1102 +use url::Url; + +#[test] +fn test_bug_1101_file_url_roundtrip_with_host() { + // Bug #1101: file:// URL parse roundtrip mismatch + // When parsing file URLs with both host and path components, + // the path normalization was stripping semantic leading slashes, + // causing roundtrip failures + let input = "file://.cRe!+aacRddddddddddddddtpe=//t:/a|et/!.."; + let url1 = Url::parse(input).unwrap(); + let serialized = url1.to_string(); + let url2 = Url::parse(&serialized).unwrap(); + + assert_eq!(url1.host_str(), url2.host_str(), "Host should match after roundtrip"); + assert_eq!(url1.path(), url2.path(), "Path should match after roundtrip"); + assert_eq!(url1, url2, "Full URL should roundtrip correctly"); +} + +#[test] +fn test_bug_1102_set_host_localhost_roundtrip() { + // Bug #1102: set_host("localhost") on file:// URLs doesn't normalize + // The parser normalizes "localhost" to empty host per WHATWG spec, + // but set_host() was not applying the same normalization + let mut url = Url::parse("file:///path").unwrap(); + url.set_host(Some("localhost")).unwrap(); + let serialized = url.to_string(); + let reparsed = Url::parse(&serialized).unwrap(); + + assert_eq!(url.host_str(), reparsed.host_str(), "Host should match after set_host roundtrip"); + assert_eq!(url, reparsed, "URL should roundtrip correctly after set_host(localhost)"); +} + +#[test] +fn test_file_url_localhost_normalization() { + // Additional test: verify that "localhost" is normalized to empty host + // for file:// URLs per WHATWG spec + let url1 = Url::parse("file://localhost/path").unwrap(); + let url2 = Url::parse("file:///path").unwrap(); + + assert_eq!(url1.host_str(), url2.host_str(), "localhost should normalize to empty host"); + assert_eq!(url1, url2, "file://localhost/path should equal file:///path"); +}