Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 91 additions & 1 deletion benchmarks/percent_encode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,26 @@ std::string examples[] = {"\xE1|", "other:9818274x1!!",
"ref=web-twc-ao-gbl-adsinfo&utm_source=twc&utm_",
"connect_timeout=10&application_name=myapp"};

std::string long_examples[] = {
"connect timeout=10 application name=myapp server=db host internal "
"database=production analytics read preference=secondary preferred "
"ssl=true retry writes=true w=majority max pool size=50",
"ref=web twc ao gbl adsinfo utm source=twc utm medium=cpc "
"utm campaign=brand awareness q4 2024 utm content=banner 300x250 "
"utm term=weather forecast today gclid=Cj0KCQiA3Y ABhCnARIsAK",
};

std::string decode_examples[] = {
"%E4%BD%A0%E5%A5%BD%E4%B8%96%E7%95%8C%20%21%22%23%24%25%26%27",
"connect_timeout%3D10%26application_name%3Dmyapp%26server%3Ddb.host",
"%68%65%6C%6C%6F%20%77%6F%72%6C%64%20%74%68%69%73%20%69%73%20"
"%61%20%70%65%72%63%65%6E%74%20%68%65%61%76%79%20%73%74%72%69"
"%6E%67",
"%2Fapi%2Fv1%2Fusers%2F12345%2Fposts%3Fpage%3D1%26limit%3D50%26"
"sort%3Dcreated%26order%3Ddesc%26fields%3Did%2Ctitle%2Cbody%26"
"filter%3Dstatus%253Dpublished",
};

void init_data() {}

double examples_bytes = []() -> double {
Expand Down Expand Up @@ -115,7 +135,7 @@ static void SpecialQuery(benchmark::State& state) {
for (auto _ : state) {
for (std::string& url_string : examples) {
benchmark::DoNotOptimize(ada::unicode::percent_encode(
url_string, ada::character_sets::FRAGMENT_PERCENT_ENCODE));
url_string, ada::character_sets::SPECIAL_QUERY_PERCENT_ENCODE));
}
}
if (collector.has_events()) {
Expand Down Expand Up @@ -243,6 +263,76 @@ static void C0Control(benchmark::State& state) {
}
BENCHMARK(C0Control);

double long_examples_bytes = []() -> double {
size_t bytes{0};
for (std::string& s : long_examples) {
bytes += s.size();
}
return double(bytes);
}();

static void LongFragment(benchmark::State& state) {
for (auto _ : state) {
for (std::string& s : long_examples) {
benchmark::DoNotOptimize(ada::unicode::percent_encode(
s, ada::character_sets::FRAGMENT_PERCENT_ENCODE));
}
}
state.counters["speed"] = benchmark::Counter(
long_examples_bytes, benchmark::Counter::kIsIterationInvariantRate);
}
BENCHMARK(LongFragment);

static void LongQuery(benchmark::State& state) {
for (auto _ : state) {
for (std::string& s : long_examples) {
benchmark::DoNotOptimize(ada::unicode::percent_encode(
s, ada::character_sets::QUERY_PERCENT_ENCODE));
}
}
state.counters["speed"] = benchmark::Counter(
long_examples_bytes, benchmark::Counter::kIsIterationInvariantRate);
}
BENCHMARK(LongQuery);

double decode_examples_bytes = []() -> double {
size_t bytes{0};
for (std::string& s : decode_examples) {
bytes += s.size();
}
return double(bytes);
}();

const size_t decode_first_pct[] = {
decode_examples[0].find('%'),
decode_examples[1].find('%'),
decode_examples[2].find('%'),
decode_examples[3].find('%'),
};

static void Decode(benchmark::State& state) {
for (auto _ : state) {
for (size_t i = 0; i < std::size(decode_examples); i++) {
benchmark::DoNotOptimize(ada::unicode::percent_decode(
decode_examples[i], decode_first_pct[i]));
}
}
state.counters["speed"] = benchmark::Counter(
decode_examples_bytes, benchmark::Counter::kIsIterationInvariantRate);
}
BENCHMARK(Decode);

static void DecodeClean(benchmark::State& state) {
std::string clean(200, 'a');
for (auto _ : state) {
benchmark::DoNotOptimize(
ada::unicode::percent_decode(clean, std::string_view::npos));
}
state.counters["speed"] =
benchmark::Counter(200.0, benchmark::Counter::kIsIterationInvariantRate);
}
BENCHMARK(DecodeClean);

int main(int argc, char** argv) {
#if defined(ADA_RUST_VERSION)
benchmark::AddCustomContext("rust version ", ADA_RUST_VERSION);
Expand Down
41 changes: 0 additions & 41 deletions include/ada/unicode-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,46 +5,5 @@
#ifndef ADA_UNICODE_INL_H
#define ADA_UNICODE_INL_H
#include "ada/unicode.h"
#include "ada/character_sets.h"

/**
* Unicode operations. These functions are not part of our public API and may
* change at any time.
*
* private
* @namespace ada::unicode
* @brief Includes the declarations for unicode operations
*/
namespace ada::unicode {
ada_really_inline size_t percent_encode_index(const std::string_view input,
const uint8_t character_set[]) {
const char* data = input.data();
const size_t size = input.size();

// Process 8 bytes at a time using unrolled loop
size_t i = 0;
for (; i + 8 <= size; i += 8) {
unsigned char chunk[8];
std::memcpy(&chunk, data + i,
8); // entices compiler to unconditionally process 8 characters

// Check 8 characters at once
for (size_t j = 0; j < 8; j++) {
if (character_sets::bit_at(character_set, chunk[j])) {
return i + j;
}
}
}

// Handle remaining bytes
for (; i < size; i++) {
if (character_sets::bit_at(character_set, data[i])) {
return i;
}
}

return size;
}
} // namespace ada::unicode

#endif // ADA_UNICODE_INL_H
4 changes: 2 additions & 2 deletions include/ada/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,8 @@ bool percent_encode(std::string_view input, const uint8_t character_set[],
* Returns the index at which percent encoding should start, or (equivalently),
* the length of the prefix that does not require percent encoding.
*/
ada_really_inline size_t percent_encode_index(std::string_view input,
const uint8_t character_set[]);
size_t percent_encode_index(std::string_view input,
const uint8_t character_set[]);
/**
* @private
* Lowers the string in-place, assuming that the content is ASCII.
Expand Down
8 changes: 4 additions & 4 deletions src/helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ ada_really_inline size_t find_next_host_delimiter_special(
uint8x16_t classify = vandq_u8(lowpart, highpart);
if (vmaxvq_u32(vreinterpretq_u32_u8(classify)) != 0) {
uint8x16_t is_zero = vceqq_u8(classify, zero);
uint16_t is_non_zero = ~to_bitmask(is_zero);
uint16_t is_non_zero = static_cast<uint16_t>(~to_bitmask(is_zero));
return i + trailing_zeroes(is_non_zero);
}
}
Expand All @@ -307,7 +307,7 @@ ada_really_inline size_t find_next_host_delimiter_special(
uint8x16_t classify = vandq_u8(lowpart, highpart);
if (vmaxvq_u32(vreinterpretq_u32_u8(classify)) != 0) {
uint8x16_t is_zero = vceqq_u8(classify, zero);
uint16_t is_non_zero = ~to_bitmask(is_zero);
uint16_t is_non_zero = static_cast<uint16_t>(~to_bitmask(is_zero));
return view.length() - 16 + trailing_zeroes(is_non_zero);
}
}
Expand Down Expand Up @@ -583,7 +583,7 @@ ada_really_inline size_t find_next_host_delimiter(std::string_view view,
uint8x16_t classify = vandq_u8(lowpart, highpart);
if (vmaxvq_u32(vreinterpretq_u32_u8(classify)) != 0) {
uint8x16_t is_zero = vceqq_u8(classify, zero);
uint16_t is_non_zero = ~to_bitmask(is_zero);
uint16_t is_non_zero = static_cast<uint16_t>(~to_bitmask(is_zero));
return i + trailing_zeroes(is_non_zero);
}
}
Expand All @@ -596,7 +596,7 @@ ada_really_inline size_t find_next_host_delimiter(std::string_view view,
uint8x16_t classify = vandq_u8(lowpart, highpart);
if (vmaxvq_u32(vreinterpretq_u32_u8(classify)) != 0) {
uint8x16_t is_zero = vceqq_u8(classify, zero);
uint16_t is_non_zero = ~to_bitmask(is_zero);
uint16_t is_non_zero = static_cast<uint16_t>(~to_bitmask(is_zero));
return view.length() - 16 + trailing_zeroes(is_non_zero);
}
}
Expand Down
Loading
Loading