From eb0258d91885c0d298f98a2dd1020f1890204866 Mon Sep 17 00:00:00 2001
From: Jared Reyes <jared.reyes@kimberlite.dev>
Date: Sat, 7 Feb 2026 11:05:04 +1100
Subject: [PATCH 1/3] Add comprehensive fuzz targets for all workspace crates

Add 7 fuzz targets covering the entire rust-url workspace:

- fuzz_url_parse_roundtrip: URL parse/serialize roundtrip invariant checking
- fuzz_url_differential: relative URL resolution and make_relative roundtrip
- fuzz_url_setters: URL mutation via setters with validity invariant checks
- fuzz_idna: IDNA domain_to_ascii/domain_to_unicode roundtrip + Punycode
- fuzz_data_url: data: URL processing and base64 decoding
- fuzz_form_urlencoded: form-urlencoded parse/serialize roundtrip
- fuzz_percent_encoding: percent encode/decode roundtrip across ASCII sets

Also includes:
- Seed corpus with representative URL samples
- Fuzzing dictionary for URL/IDNA/data-url tokens
- CIFuzz workflow to fuzz all pull requests automatically
---
 .github/workflows/cifuzz.yml                  | 32 ++++++++
 fuzz/Cargo.toml                               | 57 +++++++++++++
 fuzz/corpus/seed/idna_01                      |  1 +
 fuzz/corpus/seed/idna_02                      |  1 +
 fuzz/corpus/seed/url_01                       |  1 +
 fuzz/corpus/seed/url_02                       |  1 +
 fuzz/corpus/seed/url_03                       |  1 +
 fuzz/corpus/seed/url_04                       |  1 +
 fuzz/corpus/seed/url_05                       |  1 +
 fuzz/corpus/seed/url_06                       |  1 +
 fuzz/corpus/seed/url_07                       |  1 +
 fuzz/corpus/seed/url_08                       |  1 +
 fuzz/corpus/seed/url_09                       |  1 +
 fuzz/corpus/seed/url_10                       |  1 +
 fuzz/fuzz.dict                                | 81 ++++++++++++++++++
 fuzz/fuzz_targets/fuzz_data_url.rs            | 48 +++++++++++
 fuzz/fuzz_targets/fuzz_form_urlencoded.rs     | 35 ++++++++
 fuzz/fuzz_targets/fuzz_idna.rs                | 64 +++++++++++++++
 fuzz/fuzz_targets/fuzz_percent_encoding.rs    | 82 +++++++++++++++++++
 fuzz/fuzz_targets/fuzz_url_differential.rs    | 55 +++++++++++++
 fuzz/fuzz_targets/fuzz_url_parse_roundtrip.rs | 44 ++++++++++
 fuzz/fuzz_targets/fuzz_url_setters.rs         | 78 ++++++++++++++++++
 22 files changed, 588 insertions(+)
 create mode 100644 .github/workflows/cifuzz.yml
 create mode 100644 fuzz/Cargo.toml
 create mode 100644 fuzz/corpus/seed/idna_01
 create mode 100644 fuzz/corpus/seed/idna_02
 create mode 100644 fuzz/corpus/seed/url_01
 create mode 100644 fuzz/corpus/seed/url_02
 create mode 100644 fuzz/corpus/seed/url_03
 create mode 100644 fuzz/corpus/seed/url_04
 create mode 100644 fuzz/corpus/seed/url_05
 create mode 100644 fuzz/corpus/seed/url_06
 create mode 100644 fuzz/corpus/seed/url_07
 create mode 100644 fuzz/corpus/seed/url_08
 create mode 100644 fuzz/corpus/seed/url_09
 create mode 100644 fuzz/corpus/seed/url_10
 create mode 100644 fuzz/fuzz.dict
 create mode 100644 fuzz/fuzz_targets/fuzz_data_url.rs
 create mode 100644 fuzz/fuzz_targets/fuzz_form_urlencoded.rs
 create mode 100644 fuzz/fuzz_targets/fuzz_idna.rs
 create mode 100644 fuzz/fuzz_targets/fuzz_percent_encoding.rs
 create mode 100644 fuzz/fuzz_targets/fuzz_url_differential.rs
 create mode 100644 fuzz/fuzz_targets/fuzz_url_parse_roundtrip.rs
 create mode 100644 fuzz/fuzz_targets/fuzz_url_setters.rs

diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml
new file mode 100644
index 000000000..e9623e7cf
--- /dev/null
+++ b/.github/workflows/cifuzz.yml
@@ -0,0 +1,32 @@
+name: CIFuzz
+on:
+  pull_request:
+    branches:
+      - main
+
+permissions: {}
+
+jobs:
+  Fuzzing:
+    runs-on: ubuntu-latest
+    permissions:
+      security-events: write
+    steps:
+      - name: Build Fuzzers
+        id: build
+        uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
+        with:
+          oss-fuzz-project-name: "rust-url"
+          language: rust
+      - name: Run Fuzzers
+        uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
+        with:
+          oss-fuzz-project-name: "rust-url"
+          language: rust
+          fuzz-seconds: 600
+      - name: Upload Crash
+        uses: actions/upload-artifact@v4
+        if: failure() && steps.build.outcome == 'success'
+        with:
+          name: artifacts
+          path: ./out/artifacts
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
new file mode 100644
index 000000000..382e30ace
--- /dev/null
+++ b/fuzz/Cargo.toml
@@ -0,0 +1,57 @@
+[package]
+name = "rust-url-fuzz"
+version = "0.0.1"
+authors = ["Automatically generated"]
+publish = false
+edition = "2021"
+
+[package.metadata]
+cargo-fuzz = true
+
+[dependencies]
+libfuzzer-sys = "0.4"
+url = { path = "../url" }
+idna = { path = "../idna", features = ["std"] }
+percent-encoding = { path = "../percent_encoding", features = ["alloc"] }
+form_urlencoded = { path = "../form_urlencoded", features = ["alloc"] }
+data-url = { path = "../data-url", features = ["std"] }
+
+# --- Fuzz targets ---
+
+[[bin]]
+name = "fuzz_url_parse_roundtrip"
+path = "fuzz_targets/fuzz_url_parse_roundtrip.rs"
+doc = false
+
+[[bin]]
+name = "fuzz_url_differential"
+path = "fuzz_targets/fuzz_url_differential.rs"
+doc = false
+
+[[bin]]
+name = "fuzz_url_setters"
+path = "fuzz_targets/fuzz_url_setters.rs"
+doc = false
+
+[[bin]]
+name = "fuzz_idna"
+path = "fuzz_targets/fuzz_idna.rs"
+doc = false
+
+[[bin]]
+name = "fuzz_data_url"
+path = "fuzz_targets/fuzz_data_url.rs"
+doc = false
+
+[[bin]]
+name = "fuzz_form_urlencoded"
+path = "fuzz_targets/fuzz_form_urlencoded.rs"
+doc = false
+
+[[bin]]
+name = "fuzz_percent_encoding"
+path = "fuzz_targets/fuzz_percent_encoding.rs"
+doc = false
+
+[workspace]
+members = ["."]
diff --git a/fuzz/corpus/seed/idna_01 b/fuzz/corpus/seed/idna_01
new file mode 100644
index 000000000..06c159d73
--- /dev/null
+++ b/fuzz/corpus/seed/idna_01
@@ -0,0 +1 @@
+münchen.de
\ No newline at end of file
diff --git a/fuzz/corpus/seed/idna_02 b/fuzz/corpus/seed/idna_02
new file mode 100644
index 000000000..99b3b7437
--- /dev/null
+++ b/fuzz/corpus/seed/idna_02
@@ -0,0 +1 @@
+xn--mnchen-3ya.de
\ No newline at end of file
diff --git a/fuzz/corpus/seed/url_01 b/fuzz/corpus/seed/url_01
new file mode 100644
index 000000000..bf54804e9
--- /dev/null
+++ b/fuzz/corpus/seed/url_01
@@ -0,0 +1 @@
+https://example.com/path?query=value#fragment
\ No newline at end of file
diff --git a/fuzz/corpus/seed/url_02 b/fuzz/corpus/seed/url_02
new file mode 100644
index 000000000..dfd944647
--- /dev/null
+++ b/fuzz/corpus/seed/url_02
@@ -0,0 +1 @@
+http://user:password@host.example.com:8080/path/to/resource?key=val&key2=val2#frag
\ No newline at end of file
diff --git a/fuzz/corpus/seed/url_03 b/fuzz/corpus/seed/url_03
new file mode 100644
index 000000000..e36d2e67a
--- /dev/null
+++ b/fuzz/corpus/seed/url_03
@@ -0,0 +1 @@
+ftp://ftp.example.com/pub/files/readme.txt
\ No newline at end of file
diff --git a/fuzz/corpus/seed/url_04 b/fuzz/corpus/seed/url_04
new file mode 100644
index 000000000..2609dbc7d
--- /dev/null
+++ b/fuzz/corpus/seed/url_04
@@ -0,0 +1 @@
+file:///tmp/local/file.txt
\ No newline at end of file
diff --git a/fuzz/corpus/seed/url_05 b/fuzz/corpus/seed/url_05
new file mode 100644
index 000000000..504138580
--- /dev/null
+++ b/fuzz/corpus/seed/url_05
@@ -0,0 +1 @@
+https://[::1]:443/ipv6
\ No newline at end of file
diff --git a/fuzz/corpus/seed/url_06 b/fuzz/corpus/seed/url_06
new file mode 100644
index 000000000..1927b5602
--- /dev/null
+++ b/fuzz/corpus/seed/url_06
@@ -0,0 +1 @@
+https://xn--nxasmq6b.example.com/idn
\ No newline at end of file
diff --git a/fuzz/corpus/seed/url_07 b/fuzz/corpus/seed/url_07
new file mode 100644
index 000000000..a763ffe79
--- /dev/null
+++ b/fuzz/corpus/seed/url_07
@@ -0,0 +1 @@
+data:text/plain;base64,SGVsbG8gV29ybGQh
\ No newline at end of file
diff --git a/fuzz/corpus/seed/url_08 b/fuzz/corpus/seed/url_08
new file mode 100644
index 000000000..59ca05f80
--- /dev/null
+++ b/fuzz/corpus/seed/url_08
@@ -0,0 +1 @@
+data:text/html,%3Ch1%3EHello%3C%2Fh1%3E
\ No newline at end of file
diff --git a/fuzz/corpus/seed/url_09 b/fuzz/corpus/seed/url_09
new file mode 100644
index 000000000..c0644cc06
--- /dev/null
+++ b/fuzz/corpus/seed/url_09
@@ -0,0 +1 @@
+https://example.com/path%20with%20spaces?q=%E4%B8%AD%E6%96%87
\ No newline at end of file
diff --git a/fuzz/corpus/seed/url_10 b/fuzz/corpus/seed/url_10
new file mode 100644
index 000000000..ac7a50b37
--- /dev/null
+++ b/fuzz/corpus/seed/url_10
@@ -0,0 +1 @@
+https://example.com/?foo=bar&baz=qux&empty=&key+with+plus=value+with+plus
\ No newline at end of file
diff --git a/fuzz/fuzz.dict b/fuzz/fuzz.dict
new file mode 100644
index 000000000..ee9b77d6a
--- /dev/null
+++ b/fuzz/fuzz.dict
@@ -0,0 +1,81 @@
+# URL schemes
+"http://"
+"https://"
+"ftp://"
+"file://"
+"data:"
+"blob:"
+"ws://"
+"wss://"
+"custom://"
+
+# URL delimiters
+"://"
+":/"
+"//"
+"/"
+"?"
+"#"
+"@"
+":"
+";"
+
+# Common URL components
+"example.com"
+"localhost"
+"127.0.0.1"
+"[::1]"
+"[2001:db8::1]"
+"0.0.0.0"
+
+# Percent encoding
+"%00"
+"%20"
+"%25"
+"%2F"
+"%3A"
+"%3F"
+"%40"
+"%23"
+"%26"
+"%3D"
+"%C3%A9"
+"%E4%B8%AD"
+
+# Form URL encoded
+"&"
+"="
+"+"
+"key=value"
+"a=b&c=d"
+
+# IDNA / Punycode
+"xn--"
+"xn--nxasmq6b"
+".com"
+".de"
+".org"
+
+# Data URL
+"data:,"
+"data:text/plain,"
+"data:text/plain;base64,"
+"data:text/html,"
+"data:application/octet-stream;base64,"
+";base64"
+";charset=utf-8"
+";charset=US-ASCII"
+
+# Base64
+"SGVsbG8="
+"AAAA"
+"////+"
+
+# Special characters
+"\t"
+"\n"
+"\r"
+" "
+"\\"
+".."
+"."
diff --git a/fuzz/fuzz_targets/fuzz_data_url.rs b/fuzz/fuzz_targets/fuzz_data_url.rs
new file mode 100644
index 000000000..774737afe
--- /dev/null
+++ b/fuzz/fuzz_targets/fuzz_data_url.rs
@@ -0,0 +1,48 @@
+#![no_main]
+
+use libfuzzer_sys::fuzz_target;
+use std::str;
+
+fuzz_target!(|data: &[u8]| {
+    let Ok(utf8) = str::from_utf8(data) else {
+        return;
+    };
+
+    let Ok(data_url) = data_url::DataUrl::process(utf8) else {
+        return;
+    };
+
+    // Access MIME type (should not panic)
+    let mime = data_url.mime_type();
+    let _ = mime.type_.len();
+    let _ = mime.subtype.len();
+    for (name, value) in &mime.parameters {
+        let _ = name.len();
+        let _ = value.len();
+    }
+
+    // Decode body (should not panic)
+    match data_url.decode_to_vec() {
+        Ok((body, fragment)) => {
+            // Body must be valid bytes
+            let _ = body.len();
+            if let Some(frag) = fragment {
+                // Fragment percent-encoding should produce valid UTF-8
+                let _ = frag.to_percent_encoded();
+            }
+        }
+        Err(_) => {
+            // Base64 decode errors are expected for malformed input
+        }
+    }
+
+    // Test streaming decode
+    let mut chunks = Vec::new();
+    let _ = data_url.decode(|bytes| {
+        chunks.push(bytes.to_vec());
+        Ok::<(), std::convert::Infallible>(())
+    });
+
+    // Test forgiving_base64 directly
+    let _ = data_url::forgiving_base64::decode_to_vec(data);
+});
diff --git a/fuzz/fuzz_targets/fuzz_form_urlencoded.rs b/fuzz/fuzz_targets/fuzz_form_urlencoded.rs
new file mode 100644
index 000000000..673e4998f
--- /dev/null
+++ b/fuzz/fuzz_targets/fuzz_form_urlencoded.rs
@@ -0,0 +1,35 @@
+#![no_main]
+
+use libfuzzer_sys::fuzz_target;
+
+fuzz_target!(|data: &[u8]| {
+    // Parse the input as form-urlencoded data
+    let pairs: Vec<(String, String)> = form_urlencoded::parse(data)
+        .into_owned()
+        .collect();
+
+    // Roundtrip invariant: serialize and re-parse should produce the same pairs
+    let mut serializer = form_urlencoded::Serializer::new(String::new());
+    for (name, value) in &pairs {
+        serializer.append_pair(name, value);
+    }
+    let serialized = serializer.finish();
+
+    let reparsed: Vec<(String, String)> = form_urlencoded::parse(serialized.as_bytes())
+        .into_owned()
+        .collect();
+
+    // The key insight: form_urlencoded uses lossy UTF-8 decoding,
+    // so we need to compare the parsed pairs (not raw bytes).
+    // After one roundtrip through parse->serialize->parse, the result should be stable.
+    assert_eq!(
+        pairs, reparsed,
+        "form_urlencoded roundtrip mismatch: serialized={:?}",
+        serialized
+    );
+
+    // Test byte_serialize roundtrip
+    let byte_serialized: String = form_urlencoded::byte_serialize(data).collect();
+    // byte_serialize output should be valid UTF-8 (it produces &str slices)
+    let _ = byte_serialized.len();
+});
diff --git a/fuzz/fuzz_targets/fuzz_idna.rs b/fuzz/fuzz_targets/fuzz_idna.rs
new file mode 100644
index 000000000..3e36b9328
--- /dev/null
+++ b/fuzz/fuzz_targets/fuzz_idna.rs
@@ -0,0 +1,64 @@
+#![no_main]
+
+use libfuzzer_sys::fuzz_target;
+use std::str;
+
+fuzz_target!(|data: &[u8]| {
+    // Test domain_to_ascii_cow (primary entry point, takes &[u8])
+    let _ = idna::domain_to_ascii_cow(data, idna::AsciiDenyList::URL);
+    let _ = idna::domain_to_ascii_cow(data, idna::AsciiDenyList::EMPTY);
+    let _ = idna::domain_to_ascii_cow(data, idna::AsciiDenyList::STD3);
+
+    let Ok(utf8) = str::from_utf8(data) else {
+        return;
+    };
+
+    // Test domain_to_ascii (takes &str)
+    let ascii_result = idna::domain_to_ascii(utf8);
+    let strict_result = idna::domain_to_ascii_strict(utf8);
+
+    // Roundtrip invariant: if we can convert to ASCII, converting to Unicode
+    // and back to ASCII should produce the same result
+    if let Ok(ref ascii) = ascii_result {
+        let (unicode, unicode_result) = idna::domain_to_unicode(ascii);
+        if unicode_result.is_ok() {
+            if let Ok(back_to_ascii) = idna::domain_to_ascii(&unicode) {
+                assert_eq!(
+                    ascii.to_lowercase(),
+                    back_to_ascii.to_lowercase(),
+                    "IDNA roundtrip mismatch: input={:?}, ascii={:?}, unicode={:?}, back={:?}",
+                    utf8,
+                    ascii,
+                    unicode,
+                    back_to_ascii
+                );
+            }
+        }
+    }
+
+    // Consistency: strict mode should be a subset of non-strict
+    if strict_result.is_ok() {
+        assert!(
+            ascii_result.is_ok(),
+            "strict succeeded but non-strict failed for {:?}",
+            utf8
+        );
+    }
+
+    // Test domain_to_unicode
+    let (unicode_str, _result) = idna::domain_to_unicode(utf8);
+
+    // The Unicode result should itself be valid UTF-8 (it's a String)
+    let _ = unicode_str.len();
+
+    // Test Punycode encode/decode roundtrip
+    if let Some(encoded) = idna::punycode::encode_str(utf8) {
+        if let Some(decoded) = idna::punycode::decode_to_string(&encoded) {
+            assert_eq!(
+                utf8, decoded,
+                "Punycode roundtrip mismatch: input={:?}, encoded={:?}, decoded={:?}",
+                utf8, encoded, decoded
+            );
+        }
+    }
+});
diff --git a/fuzz/fuzz_targets/fuzz_percent_encoding.rs b/fuzz/fuzz_targets/fuzz_percent_encoding.rs
new file mode 100644
index 000000000..a4345526f
--- /dev/null
+++ b/fuzz/fuzz_targets/fuzz_percent_encoding.rs
@@ -0,0 +1,82 @@
+#![no_main]
+
+use libfuzzer_sys::fuzz_target;
+use percent_encoding::{
+    percent_decode, percent_decode_str, percent_encode, utf8_percent_encode, AsciiSet, CONTROLS,
+    NON_ALPHANUMERIC,
+};
+use std::borrow::Cow;
+use std::str;
+
+/// https://url.spec.whatwg.org/#fragment-percent-encode-set
+const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
+
+/// https://url.spec.whatwg.org/#path-percent-encode-set
+const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');
+
+/// https://url.spec.whatwg.org/#userinfo-percent-encode-set
+const USERINFO: &AsciiSet = &PATH
+    .add(b'/')
+    .add(b':')
+    .add(b';')
+    .add(b'=')
+    .add(b'@')
+    .add(b'[')
+    .add(b'\\')
+    .add(b']')
+    .add(b'^')
+    .add(b'|');
+
+fuzz_target!(|data: &[u8]| {
+    if data.is_empty() {
+        return;
+    }
+
+    let ascii_sets: [&AsciiSet; 4] = [&CONTROLS, NON_ALPHANUMERIC, FRAGMENT, USERINFO];
+    let set_idx = data[0] as usize % ascii_sets.len();
+    let ascii_set = ascii_sets[set_idx];
+    let input = &data[1..];
+
+    // Test percent_encode -> percent_decode roundtrip
+    let encoded: Cow<str> = percent_encode(input, ascii_set).into();
+
+    // Encoded output must be valid UTF-8 (it's a Cow<str>)
+    let _ = encoded.len();
+
+    // Decode the encoded result
+    let decoded: Cow<[u8]> = percent_decode(encoded.as_bytes()).into();
+    assert_eq!(
+        &*decoded, input,
+        "percent_encode/decode roundtrip mismatch with set index {}",
+        set_idx
+    );
+
+    // Test UTF-8 path: if input is valid UTF-8, utf8_percent_encode should work too
+    if let Ok(utf8_input) = str::from_utf8(input) {
+        let utf8_encoded = utf8_percent_encode(utf8_input, ascii_set).to_string();
+
+        // Decode should recover original
+        let utf8_decoded = percent_decode_str(&utf8_encoded)
+            .decode_utf8()
+            .expect("decoding percent-encoded UTF-8 must produce valid UTF-8");
+        assert_eq!(
+            utf8_input, &*utf8_decoded,
+            "utf8_percent_encode roundtrip mismatch"
+        );
+    }
+
+    // Test percent_decode directly on raw input
+    let direct_decoded: Cow<[u8]> = percent_decode(input).into();
+    // Re-encoding the decoded bytes and decoding again should be stable
+    let re_encoded: Cow<str> = percent_encode(&direct_decoded, ascii_set).into();
+    let re_decoded: Cow<[u8]> = percent_decode(re_encoded.as_bytes()).into();
+    assert_eq!(
+        &*direct_decoded, &*re_decoded,
+        "double roundtrip mismatch"
+    );
+
+    // Test percent_decode_str if input is valid UTF-8
+    if let Ok(utf8_input) = str::from_utf8(input) {
+        let _ = percent_decode_str(utf8_input).decode_utf8_lossy();
+    }
+});
diff --git a/fuzz/fuzz_targets/fuzz_url_differential.rs b/fuzz/fuzz_targets/fuzz_url_differential.rs
new file mode 100644
index 000000000..fc86581cb
--- /dev/null
+++ b/fuzz/fuzz_targets/fuzz_url_differential.rs
@@ -0,0 +1,55 @@
+#![no_main]
+
+use libfuzzer_sys::fuzz_target;
+use std::str;
+use url::Url;
+
+fuzz_target!(|data: &[u8]| {
+    if data.len() < 2 {
+        return;
+    }
+
+    let Ok(utf8) = str::from_utf8(data) else {
+        return;
+    };
+
+    // Split input into a base URL part and a relative part
+    let split = (data[0] as usize) % utf8.len().max(1);
+    let (base_str, relative_str) = utf8.split_at(split);
+
+    // Try parsing base as absolute URL
+    let Ok(base) = Url::parse(base_str) else {
+        return;
+    };
+
+    // Test relative URL resolution
+    if let Ok(resolved) = base.join(relative_str) {
+        // The resolved URL must be valid
+        let serialized = resolved.as_str();
+        let reparsed =
+            Url::parse(serialized).expect("re-parsing a resolved URL must succeed");
+        assert_eq!(resolved.as_str(), reparsed.as_str());
+
+        // make_relative + join should roundtrip for non-opaque paths
+        if !base.cannot_be_a_base() && !resolved.cannot_be_a_base() {
+            if let Some(relative) = resolved.make_relative(&base) {
+                // Re-resolving the relative URL from base should give the same result
+                if let Ok(re_resolved) = base.join(&relative) {
+                    // Scheme and host should match
+                    assert_eq!(re_resolved.scheme(), resolved.scheme());
+                    assert_eq!(re_resolved.host_str(), resolved.host_str());
+                }
+            }
+        }
+    }
+
+    // Test parse_with_params
+    if utf8.len() < 500 {
+        let params = [("key", "value"), ("a", "b")];
+        if let Ok(with_params) = Url::parse_with_params(utf8, &params) {
+            let query = with_params.query().unwrap_or("");
+            assert!(query.contains("key=value"));
+            assert!(query.contains("a=b"));
+        }
+    }
+});
diff --git a/fuzz/fuzz_targets/fuzz_url_parse_roundtrip.rs b/fuzz/fuzz_targets/fuzz_url_parse_roundtrip.rs
new file mode 100644
index 000000000..8df8ef62a
--- /dev/null
+++ b/fuzz/fuzz_targets/fuzz_url_parse_roundtrip.rs
@@ -0,0 +1,44 @@
+#![no_main]
+
+use libfuzzer_sys::fuzz_target;
+use std::str;
+use url::Url;
+
+fuzz_target!(|data: &[u8]| {
+    let Ok(utf8) = str::from_utf8(data) else {
+        return;
+    };
+
+    // Parse the input as a URL
+    let Ok(parsed) = Url::parse(utf8) else {
+        return;
+    };
+
+    // Roundtrip invariant: serializing and re-parsing must produce the same URL
+    let serialized = parsed.as_str();
+    let reparsed = Url::parse(serialized).expect("re-parsing a serialized URL must succeed");
+    assert_eq!(
+        parsed.as_str(),
+        reparsed.as_str(),
+        "roundtrip mismatch for input: {:?}",
+        utf8
+    );
+
+    // Component invariant: individual components must be consistent
+    assert_eq!(parsed.scheme(), reparsed.scheme());
+    assert_eq!(parsed.username(), reparsed.username());
+    assert_eq!(parsed.password(), reparsed.password());
+    assert_eq!(parsed.host_str(), reparsed.host_str());
+    assert_eq!(parsed.port(), reparsed.port());
+    assert_eq!(parsed.path(), reparsed.path());
+    assert_eq!(parsed.query(), reparsed.query());
+    assert_eq!(parsed.fragment(), reparsed.fragment());
+
+    // Join invariant: joining an absolute URL with itself yields the same URL
+    if let Ok(joined) = parsed.join(serialized) {
+        assert_eq!(joined.as_str(), serialized);
+    }
+
+    // Origin consistency
+    let _ = parsed.origin();
+});
diff --git a/fuzz/fuzz_targets/fuzz_url_setters.rs b/fuzz/fuzz_targets/fuzz_url_setters.rs
new file mode 100644
index 000000000..7402d04d2
--- /dev/null
+++ b/fuzz/fuzz_targets/fuzz_url_setters.rs
@@ -0,0 +1,78 @@
+#![no_main]
+
+use libfuzzer_sys::fuzz_target;
+use std::str;
+use url::Url;
+
+fuzz_target!(|data: &[u8]| {
+    if data.len() < 3 {
+        return;
+    }
+
+    let Ok(utf8) = str::from_utf8(&data[2..]) else {
+        return;
+    };
+
+    // Use first byte to select a base URL, second byte to select which setter to test
+    let base_urls = [
+        "https://example.com/path?query#fragment",
+        "http://user:pass@host:8080/a/b/c",
+        "ftp://files.example.com/pub",
+        "file:///tmp/test",
+        "custom://example",
+    ];
+
+    let base_idx = data[0] as usize % base_urls.len();
+    let setter_idx = data[1] % 10;
+
+    let mut url = Url::parse(base_urls[base_idx]).unwrap();
+    let original = url.as_str().to_string();
+
+    match setter_idx {
+        0 => {
+            let _ = url.set_scheme(utf8);
+        }
+        1 => {
+            let _ = url.set_host(Some(utf8));
+        }
+        2 => {
+            let _ = url.set_host(None);
+        }
+        3 => {
+            let _ = url.set_username(utf8);
+        }
+        4 => {
+            let _ = url.set_password(Some(utf8));
+        }
+        5 => {
+            url.set_path(utf8);
+        }
+        6 => {
+            url.set_query(Some(utf8));
+        }
+        7 => {
+            url.set_fragment(Some(utf8));
+        }
+        8 => {
+            if let Ok(port) = utf8.parse::<u16>() {
+                let _ = url.set_port(Some(port));
+            }
+        }
+        9 => {
+            if let Ok(mut segs) = url.path_segments_mut() {
+                segs.push(utf8);
+            }
+        }
+        _ => {}
+    }
+
+    // After mutation, the URL must still be valid (roundtrip)
+    let modified = url.as_str().to_string();
+    let reparsed = Url::parse(&modified).unwrap_or_else(|e| {
+        panic!(
+            "URL became invalid after mutation: {:?}\noriginal: {}\nmodified: {}\nerror: {}",
+            setter_idx, original, modified, e
+        );
+    });
+    assert_eq!(url.as_str(), reparsed.as_str());
+});

From 929d07a79a62c29d94f641087b2bab0854a6eb62 Mon Sep 17 00:00:00 2001
From: Jared Reyes <jared.reyes@kimberlite.dev>
Date: Sat, 7 Feb 2026 11:25:03 +1100
Subject: [PATCH 2/3] Fix false positives in fuzz targets found during local
 fuzzing

- fuzz_percent_encoding: use NON_ALPHANUMERIC for roundtrip assertions
  since it encodes '%', preventing spurious decode mismatches
- fuzz_url_differential: use char_indices() to split UTF-8 input on
  valid character boundaries, preventing panics on multi-byte chars
- fuzz.dict: replace C-style escapes (\t, \n, \r, \\) with \xHH hex
  escapes required by libfuzzer dictionary format
---
 fuzz/fuzz.dict                             |  8 +++---
 fuzz/fuzz_targets/fuzz_percent_encoding.rs | 32 +++++++++++-----------
 fuzz/fuzz_targets/fuzz_url_differential.rs |  7 ++++-
 3 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/fuzz/fuzz.dict b/fuzz/fuzz.dict
index ee9b77d6a..023cd90ea 100644
--- a/fuzz/fuzz.dict
+++ b/fuzz/fuzz.dict
@@ -72,10 +72,10 @@
 "////+"
 
 # Special characters
-"\t"
-"\n"
-"\r"
+"\x09"
+"\x0a"
+"\x0d"
 " "
-"\\"
+"\x5c"
 ".."
 "."
diff --git a/fuzz/fuzz_targets/fuzz_percent_encoding.rs b/fuzz/fuzz_targets/fuzz_percent_encoding.rs
index a4345526f..7b178a661 100644
--- a/fuzz/fuzz_targets/fuzz_percent_encoding.rs
+++ b/fuzz/fuzz_targets/fuzz_percent_encoding.rs
@@ -32,30 +32,30 @@ fuzz_target!(|data: &[u8]| {
         return;
     }
 
+    // Use NON_ALPHANUMERIC for roundtrip tests since it includes '%',
+    // ensuring encode→decode is a true roundtrip. Sets that don't encode '%'
+    // will cause percent_decode to interpret literal %XX in the input.
     let ascii_sets: [&AsciiSet; 4] = [&CONTROLS, NON_ALPHANUMERIC, FRAGMENT, USERINFO];
     let set_idx = data[0] as usize % ascii_sets.len();
     let ascii_set = ascii_sets[set_idx];
     let input = &data[1..];
 
-    // Test percent_encode -> percent_decode roundtrip
-    let encoded: Cow<str> = percent_encode(input, ascii_set).into();
-
-    // Encoded output must be valid UTF-8 (it's a Cow<str>)
-    let _ = encoded.len();
-
-    // Decode the encoded result
-    let decoded: Cow<[u8]> = percent_decode(encoded.as_bytes()).into();
+    // Test percent_encode -> percent_decode roundtrip with NON_ALPHANUMERIC
+    // (which encodes '%', guaranteeing a clean roundtrip)
+    let safe_encoded: Cow<str> = percent_encode(input, NON_ALPHANUMERIC).into();
+    let safe_decoded: Cow<[u8]> = percent_decode(safe_encoded.as_bytes()).into();
     assert_eq!(
-        &*decoded, input,
-        "percent_encode/decode roundtrip mismatch with set index {}",
-        set_idx
+        &*safe_decoded, input,
+        "percent_encode/decode roundtrip mismatch with NON_ALPHANUMERIC"
     );
 
+    // Test that encoding with the selected set produces valid output
+    let encoded: Cow<str> = percent_encode(input, ascii_set).into();
+    let _ = encoded.len();
+
     // Test UTF-8 path: if input is valid UTF-8, utf8_percent_encode should work too
     if let Ok(utf8_input) = str::from_utf8(input) {
-        let utf8_encoded = utf8_percent_encode(utf8_input, ascii_set).to_string();
-
-        // Decode should recover original
+        let utf8_encoded = utf8_percent_encode(utf8_input, NON_ALPHANUMERIC).to_string();
         let utf8_decoded = percent_decode_str(&utf8_encoded)
             .decode_utf8()
             .expect("decoding percent-encoded UTF-8 must produce valid UTF-8");
@@ -67,8 +67,8 @@ fuzz_target!(|data: &[u8]| {
 
     // Test percent_decode directly on raw input
     let direct_decoded: Cow<[u8]> = percent_decode(input).into();
-    // Re-encoding the decoded bytes and decoding again should be stable
-    let re_encoded: Cow<str> = percent_encode(&direct_decoded, ascii_set).into();
+    // Re-encoding with NON_ALPHANUMERIC and decoding again should be stable
+    let re_encoded: Cow<str> = percent_encode(&direct_decoded, NON_ALPHANUMERIC).into();
     let re_decoded: Cow<[u8]> = percent_decode(re_encoded.as_bytes()).into();
     assert_eq!(
         &*direct_decoded, &*re_decoded,
diff --git a/fuzz/fuzz_targets/fuzz_url_differential.rs b/fuzz/fuzz_targets/fuzz_url_differential.rs
index fc86581cb..ee97207cd 100644
--- a/fuzz/fuzz_targets/fuzz_url_differential.rs
+++ b/fuzz/fuzz_targets/fuzz_url_differential.rs
@@ -13,8 +13,13 @@ fuzz_target!(|data: &[u8]| {
         return;
     };
 
-    // Split input into a base URL part and a relative part
+    // Split input into a base URL part and a relative part.
+    // Ensure we split on a char boundary.
     let split = (data[0] as usize) % utf8.len().max(1);
+    let split = match utf8.char_indices().find(|&(i, _)| i >= split) {
+        Some((i, _)) => i,
+        None => utf8.len(),
+    };
     let (base_str, relative_str) = utf8.split_at(split);
 
     // Try parsing base as absolute URL

From 3c7adca303fc624288b62fd5947290e0376e0721 Mon Sep 17 00:00:00 2001
From: Jared Reyes <jared.reyes@kimberlite.dev>
Date: Sat, 7 Feb 2026 14:15:44 +1100
Subject: [PATCH 3/3] Fix file:// URL roundtrip bugs (#1101, #1102)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit fixes two bugs found through fuzzing that caused file:// URLs
to fail roundtrip tests (parse → serialize → parse).

Bug #1101: File URLs with hosts and paths starting with multiple slashes
were losing their host component during roundtrip. The path normalization
logic was too aggressive in stripping leading slashes, which changed how
the URL was interpreted on re-parsing.

Fix: Preserve path structure when a host component is present, only
normalizing leading slashes for hostless file:// URLs.

Bug #1102: Calling set_host("localhost") on file:// URLs didn't apply
the same normalization as the parser, which converts "localhost" to an
empty host per WHATWG spec.

Fix: Normalize "localhost" to empty host in set_host() for file:// URLs,
matching parser behavior.

Both fixes improve WHATWG URL spec compliance and resolve 4 previously
failing Web Platform Tests:
- file://spider///
- file://monkey/ with pathname set to \\\\
- file:///unicorn with pathname set to //\\/
- file:///unicorn with pathname set to //monkey/..//
---
 url/src/lib.rs                  | 12 +++++++++
 url/src/parser.rs               | 27 +++++++++++++++++----
 url/tests/expected_failures.txt |  4 ---
 url/tests/roundtrip_bugs.rs     | 43 +++++++++++++++++++++++++++++++++
 4 files changed, 77 insertions(+), 9 deletions(-)
 create mode 100644 url/tests/roundtrip_bugs.rs

diff --git a/url/src/lib.rs b/url/src/lib.rs
index f1558682b..0f76ff14a 100644
--- a/url/src/lib.rs
+++ b/url/src/lib.rs
@@ -2025,6 +2025,18 @@ impl Url {
 
         let scheme_type = SchemeType::from(self.scheme());
 
+        // Normalize "localhost" to None for file:// URLs per WHATWG spec
+        // This matches the behavior of the URL parser
+        let host = if let Some(h) = host {
+            if scheme_type.is_file() && h.eq_ignore_ascii_case("localhost") {
+                None
+            } else {
+                Some(h)
+            }
+        } else {
+            None
+        };
+
         if let Some(host) = host {
             if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() {
                 return Err(ParseError::EmptyHost);
diff --git a/url/src/parser.rs b/url/src/parser.rs
index dbdf9b906..295845101 100644
--- a/url/src/parser.rs
+++ b/url/src/parser.rs
@@ -1369,13 +1369,30 @@ impl Parser<'_> {
             }
         }
         if scheme_type.is_file() {
-            // while url’s path’s size is greater than 1
-            // and url’s path[0] is the empty string,
-            // validation error, remove the first item from url’s path.
+            // while url's path's size is greater than 1
+            // and url's path[0] is the empty string,
+            // validation error, remove the first item from url's path.
             //FIXME: log violation
             let path = self.serialization.split_off(path_start);
-            self.serialization.push('/');
-            self.serialization.push_str(path.trim_start_matches('/'));
+            // When there's no host, normalize by removing all leading slashes
+            // and adding back a single one. When there's a host, preserve
+            // the path structure for correct roundtripping, but still ensure
+            // it starts with a single slash.
+            if path.starts_with('/') {
+                // Path already has slashes - preserve structure when host exists
+                if *has_host {
+                    // Keep the path as-is for roundtrip correctness
+                    self.serialization.push_str(&path);
+                } else {
+                    // No host - normalize to remove redundant leading slashes
+                    self.serialization.push('/');
+                    self.serialization.push_str(path.trim_start_matches('/'));
+                }
+            } else {
+                // Path doesn't start with slash - add one
+                self.serialization.push('/');
+                self.serialization.push_str(&path);
+            }
         }
 
         input
diff --git a/url/tests/expected_failures.txt b/url/tests/expected_failures.txt
index 8d4407c45..ac9ee1e79 100644
--- a/url/tests/expected_failures.txt
+++ b/url/tests/expected_failures.txt
@@ -3,7 +3,6 @@
 <file:\\\\\\\\>
 <file:\\\\\\\\?fox>
 <file:\\\\\\\\#guppy>
-<file://spider///>
 <file:\\\\localhost//>
 <file://\\/localhost//cat>
 <file://localhost//a//../..//>
@@ -38,9 +37,6 @@
 <non-spec:/.//p> set hostname to <>
 <foo:///some/path> set pathname to <>
 <file:///var/log/system.log> set href to <http://0300.168.0xF0>
-<file://monkey/> set pathname to <\\\\>
-<file:///unicorn> set pathname to <//\\/>
-<file:///unicorn> set pathname to <//monkey/..//>
 <non-spec:/> set pathname to </.//p>
 <non-spec:/> set pathname to </..//p>
 <non-spec:/> set pathname to <//p>
diff --git a/url/tests/roundtrip_bugs.rs b/url/tests/roundtrip_bugs.rs
new file mode 100644
index 000000000..30e58019f
--- /dev/null
+++ b/url/tests/roundtrip_bugs.rs
@@ -0,0 +1,43 @@
+// Reproduction tests for bugs #1101 and #1102
+use url::Url;
+
+#[test]
+fn test_bug_1101_file_url_roundtrip_with_host() {
+    // Bug #1101: file:// URL parse roundtrip mismatch
+    // When parsing file URLs with both host and path components,
+    // the path normalization was stripping semantic leading slashes,
+    // causing roundtrip failures
+    let input = "file://.cRe!+aacRddddddddddddddtpe=//t:/a|et/!..";
+    let url1 = Url::parse(input).unwrap();
+    let serialized = url1.to_string();
+    let url2 = Url::parse(&serialized).unwrap();
+
+    assert_eq!(url1.host_str(), url2.host_str(), "Host should match after roundtrip");
+    assert_eq!(url1.path(), url2.path(), "Path should match after roundtrip");
+    assert_eq!(url1, url2, "Full URL should roundtrip correctly");
+}
+
+#[test]
+fn test_bug_1102_set_host_localhost_roundtrip() {
+    // Bug #1102: set_host("localhost") on file:// URLs doesn't normalize
+    // The parser normalizes "localhost" to empty host per WHATWG spec,
+    // but set_host() was not applying the same normalization
+    let mut url = Url::parse("file:///path").unwrap();
+    url.set_host(Some("localhost")).unwrap();
+    let serialized = url.to_string();
+    let reparsed = Url::parse(&serialized).unwrap();
+
+    assert_eq!(url.host_str(), reparsed.host_str(), "Host should match after set_host roundtrip");
+    assert_eq!(url, reparsed, "URL should roundtrip correctly after set_host(localhost)");
+}
+
+#[test]
+fn test_file_url_localhost_normalization() {
+    // Additional test: verify that "localhost" is normalized to empty host
+    // for file:// URLs per WHATWG spec
+    let url1 = Url::parse("file://localhost/path").unwrap();
+    let url2 = Url::parse("file:///path").unwrap();
+
+    assert_eq!(url1.host_str(), url2.host_str(), "localhost should normalize to empty host");
+    assert_eq!(url1, url2, "file://localhost/path should equal file:///path");
+}