Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
8d7c176
Fix sign conversion when Integer is a signed type
mborland Mar 2, 2026
02583a1
Merge pull request #291 from boostorg/sign
mborland Mar 2, 2026
8e2ad30
Add from_chars for integers CUDA support
mborland Mar 13, 2026
e649c46
Add CUDA cmake CI runner
mborland Mar 13, 2026
833c656
Add CUDA testing infrastructure
mborland Mar 13, 2026
29487c2
Add CUDA tests and CML options
mborland Mar 13, 2026
ac110be
Add testing with bases from 2 to 36
mborland Mar 13, 2026
9f4faa3
Disallow builtin 128-bit integers on device
mborland Mar 16, 2026
eb191f1
Disable emulated128 path on device
mborland Mar 16, 2026
977fb1f
Replace uses of numeric limits on device
mborland Mar 16, 2026
ffba31a
Merge pull request #1 from cppalliance/from_chars
mborland Mar 16, 2026
4848531
Add cuda compatible memcpy
mborland Mar 16, 2026
494ee73
Modify implementation for CUDA support
mborland Mar 16, 2026
9e7fe1e
Modify driver functions with CUDA markers
mborland Mar 16, 2026
4c6a6aa
Add CUDA testing of to_chars
mborland Mar 16, 2026
cacba89
Fix typo
mborland Mar 16, 2026
3e0cfda
Fix to_chars_result for device
mborland Mar 16, 2026
cec072d
Move endif to remove vexing parse for device
mborland Mar 16, 2026
a8f7c29
Fix missing definition of BOOST_CHARCONV_CONSTEXPR
mborland Mar 16, 2026
af47854
Merge pull request #2 from cppalliance/to_chars_2
mborland Mar 16, 2026
1e95837
Update documentation to add CUDA
mborland Mar 16, 2026
f0970a3
Deactivate CUDA runs since it's not available in boostorg repos
mborland Mar 16, 2026
9e07e3e
Add host device marker to digit counting of u32 and u64
mborland Mar 17, 2026
9d98c61
Add testing of device digit counting
mborland Mar 17, 2026
9f0c532
Merge pull request #293 from cppalliance/digits
mborland Mar 17, 2026
4dc34a9
Fix potential pessimization on host with CUDA detection
mborland Mar 22, 2026
59932aa
Pass macro enabling CUDA as COMPILE_DEFINITION
mborland Mar 22, 2026
d7bae92
Better document CUDA support and how to use
mborland Mar 22, 2026
b1a7565
Add from_chars benchmark
mborland Mar 30, 2026
b242a31
Add to_chars benchmark
mborland Mar 30, 2026
72b118d
Remove duplicated macro section
mborland Mar 30, 2026
ab45b54
Make performance comparison more fair
mborland Mar 31, 2026
9f7d89e
Use std::array instead of c-array
mborland Mar 31, 2026
d0862ed
Add benchmark of u32 to and from as well
mborland Mar 31, 2026
b7dee45
Add benchmarks doc page
mborland Mar 31, 2026
95bfc98
Merge pull request #295 from boostorg/cuda_bench
mborland Mar 31, 2026
2b9f7e6
Add expanded reproducer
mborland Apr 27, 2026
1686430
Fix buffer sizing calculation for precision 0
mborland Apr 27, 2026
dd24281
Fix handling of negative precision 0 values with >=80 bit ldbls
mborland Apr 27, 2026
87bf578
Separate issue with decomposition of 128-bit long doubles
mborland Apr 27, 2026
78d2d26
Merge pull request #297 from boostorg/296
mborland Apr 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -765,3 +765,65 @@ jobs:

- name: Run tests
run: ci/build.sh


cuda-cmake-test:
strategy:
fail-fast: false

# Only in cppa mirror
# runs-on: gpu-runner-1
runs-on: ubuntu-24.04

steps:
- uses: Jimver/cuda-toolkit@v0.2.25
id: cuda-toolkit
with:
cuda: '12.8.0'
method: 'network'
sub-packages: '["nvcc"]'

- name: Output CUDA information
run: |
echo "Installed cuda version is: ${{steps.cuda-toolkit.outputs.cuda}}"+
echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
nvcc -V
- uses: actions/checkout@v4

- name: Install Packages
run: |
sudo apt-get install -y cmake make
- name: Setup Boost
run: |
echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY
LIBRARY=${GITHUB_REPOSITORY#*/}
echo LIBRARY: $LIBRARY
echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV
echo GITHUB_BASE_REF: $GITHUB_BASE_REF
echo GITHUB_REF: $GITHUB_REF
REF=${GITHUB_BASE_REF:-$GITHUB_REF}
REF=${REF#refs/heads/}
echo REF: $REF
BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true
echo BOOST_BRANCH: $BOOST_BRANCH
cd ..
git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root
cd boost-root
mkdir -p libs/$LIBRARY
cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY
git submodule update --init tools/boostdep
python3 tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY
- name: Configure
run: |
cd ../boost-root
mkdir __build__ && cd __build__
cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_CHARCONV_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 -DCMAKE_CUDA_STANDARD=17 ..
- name: Build tests
run: |
cd ../boost-root/__build__
cmake --build . --target tests -j $(nproc)
# Only runs in cppa mirror
#- name: Run tests
# run: |
# cd ../boost-root/__build__
# ctest --output-on-failure --no-tests=error
1 change: 1 addition & 0 deletions doc/charconv.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ include::charconv/to_chars.adoc[]
include::charconv/chars_format.adoc[]
include::charconv/limits.adoc[]
include::charconv/benchmarks.adoc[]
include::charconv/cuda_benchmarks.adoc[]
include::charconv/sources.adoc[]
include::charconv/acknowledgments.adoc[]
include::charconv/copyright.adoc[]
Expand Down
2 changes: 2 additions & 0 deletions doc/charconv/api_reference.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,7 @@ https://www.boost.org/LICENSE_1_0.txt

== Macros

- <<enable_cuda_, `BOOST_CHARCONV_ENABLE_CUDA`>>
- <<enable_cuda_, `BOOST_CHARCONV_HOST_DEVICE`>>
- <<integral_usage_notes_, `BOOST_CHARCONV_CONSTEXPR`>>
- <<run_benchmarks_, `BOOST_CHARCONV_RUN_BENCHMARKS`>>
6 changes: 6 additions & 0 deletions doc/charconv/build.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ If you are using another build system and you want support for these types you w

IMPORTANT: libquadmath is only available on supported platforms (e.g. Linux with x86, x86_64, PPC64, and IA64).

[#enable_cuda_]
== CUDA Support

This library has partial support for CUDA which can be enabled during compilation with `BOOST_CHARCONV_ENABLE_CUDA`.
Functions with `BOOST_CHARCONV_HOST_DEVICE` in their signature can be run on both host and device, all others are strictly run on host.

== Dependencies

This library depends on: Boost.Assert, Boost.Config, Boost.Core, and optionally libquadmath (see above).
65 changes: 65 additions & 0 deletions doc/charconv/cuda_benchmarks.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
////
Copyright 2026 Matt Borland
Distributed under the Boost Software License, Version 1.0.
https://www.boost.org/LICENSE_1_0.txt
////

= CUDA Benchmarks
:idprefix: cuda_benchmarks

This section describes a range of performance benchmarks that have been run comparing this library with the standard library, and how to run your own benchmarks if required.

The values are relative to the performance of `boost::charconv::to_chars` and `boost::charconv::from_chars`.
Larger numbers are more performant, (e.g. 2.00 means twice as fast, and 0.50 means it takes twice as long).
To make the comparison between host and device fairer, the time to launch the CUDA kernel is included in the device time.
E.g. (in pseudocode)
[source, c++]
----
time_point device_begin;
run_cuda_to_chars<<<blocks, threads>>>(input, output, numElements);
time_point device_end;

time_point host_begin;
for (int i = 0; i < num_Elements, ++i)
{
uint32_t value;
boost::charconv::from_chars(input, input + sizeof(input), value);
results[i] = value;
}
time_point host_end;
----

== How to run the Benchmarks
[#run_cuda_benchmarks_]

To run the benchmarks yourself, navigate to the test folder, and you will find files of the form `cuda_benchmark_to_chars_<type>.cu`,
and `cuda_benchmark_from_chars_<type>.cu`.
You will need to compile these using a valid host compiler and version of the CUDA toolkit.
In our CI we use CUDA Toolkit v12.8 while locally we use CUDA Toolkit v13.1.

== Results
[#benchmark_cuda_results]

=== x86_64 Linux

Data in the following tables were collected on Ubuntu 24.04 with x86_64 architecture using GCC 13.4.0 with libstdc++, CUDA Toolkit v13.1 on a device with `CUDA_ARCHITECTURE=86`.

==== Integral

.to_chars base 10 integers
|===
| Functions | Relative Performance (uint32_t / uint64_t)
| Boost.Charconv.to_chars on Host
| 1.00 / 1.00
| Boost.Charconv.to_chars on Device
| 10.53 / 10.45
|===

.from_chars base 10 integers
|===
| Functions | Relative Performance (uint32_t / uint64_t)
| Boost.Charconv.from_chars on Host
| 1.00 / 1.00
| Boost.Charconv.from_chars on Device
| 2.64 / 2.66
|===
12 changes: 9 additions & 3 deletions doc/charconv/from_chars.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ struct from_chars_result
const char* ptr;
std::errc ec;

friend constexpr bool operator==(const from_chars_result& lhs, const from_chars_result& rhs) noexcept = default;
constexpr explicit operator bool() const noexcept { return ec == std::errc{}; }
BOOST_CHARCONV_HOST_DEVICE friend constexpr bool operator==(const from_chars_result& lhs, const from_chars_result& rhs) noexcept = default;
BOOST_CHARCONV_HOST_DEVICE constexpr explicit operator bool() const noexcept { return ec == std::errc{}; }
}

template <typename Integral>
BOOST_CXX14_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, Integral& value, int base = 10) noexcept;
BOOST_CHARCONV_HOST_DEVICE BOOST_CXX14_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, Integral& value, int base = 10) noexcept;

template <typename Integral>
BOOST_CXX14_CONSTEXPR from_chars_result from_chars(boost::core::string_view sv, Integral& value, int base = 10) noexcept;
Expand All @@ -54,6 +54,12 @@ from_chars_result from_chars_erange(boost::core::string_view sv, Real& value, ch
}} // Namespace boost::charconv
----

[#host_device_]
== `BOOST_CHARCONV_HOST_DEVICE`

When compiling the library with NVCC, functions marked `BOOST_CHARCONV_HOST_DEVICE` expands to `pass:[__host__ __device__]` for use on both CPU and GPU.
Otherwise `BOOST_CHARCONV_HOST_DEVICE` expands to the empty string.

== from_chars parameters
* `first`, `last` - pointers to a valid range to parse
* `sv` - string view of a valid range to parse.
Expand Down
4 changes: 3 additions & 1 deletion doc/charconv/overview.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ Boost.Charconv converts character buffers to numbers, and numbers to character b
It is a small library of two overloaded functions to do the heavy lifting, plus several supporting enums, structures, templates, and constants, with a particular focus on performance and consistency
across the supported development environments.

Why should I be interested in this Library? Charconv is locale-independent, non-allocating^1^, non-throwing and only requires a minimum of C++ 11.
Why should I be interested in this Library? Charconv is locale-independent, non-allocating^1^, non-throwing and only requires a minimum of pass:[C++11].
It provides functionality similar to that found in `std::printf` or `std::strtod` with <<benchmark_results_, substantial performance increases>>.
This library can also be used in place of the standard library `<charconv>` if unavailable with your toolchain.
The integer portion of the library can also be used on GPU with NVCC.
Currently only https://en.cppreference.com/w/cpp/compiler_support/17.html[GCC 11+ and MSVC 19.24+] support both integer and floating-point conversions in their implementation of `<charconv>`. +
If you are using either of those compilers, Boost.Charconv is at least as performant as `<charconv>`, and can be up to several times faster.
See: <<Benchmarks>>
Expand All @@ -31,5 +32,6 @@ Boost.Charconv is tested on Ubuntu, macOS, and Windows with the following compil
* GCC 5 or later
* Clang 3.8 or later
* Visual Studio 2015 (14.0) or later
* NVCC 12.8 or later

Tested on https://github.com/boostorg/charconv/actions[GitHub Actions] and https://drone.cpp.al/boostorg/charconv[Drone].
7 changes: 4 additions & 3 deletions doc/charconv/to_chars.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ struct to_chars_result
char* ptr;
std::errc ec;

friend constexpr bool operator==(const to_chars_result& lhs, const to_chars_result& rhs) noexcept; = default;
constexpr explicit operator bool() const noexcept { return ec == std::errc{}; }
BOOST_CHARCONV_HOST_DEVICE friend constexpr bool operator==(const to_chars_result& lhs, const to_chars_result& rhs) noexcept; = default;
BOOST_CHARCONV_HOST_DEVICE constexpr explicit operator bool() const noexcept { return ec == std::errc{}; }
};

template <typename Integral>
BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, Integral value, int base = 10) noexcept;
BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, Integral value, int base = 10) noexcept;

template <typename Integral>
BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars<bool>(char* first, char* last, Integral value, int base) noexcept = delete;
Expand Down Expand Up @@ -71,6 +71,7 @@ See <<chars_format overview>> for description.
** compiled using `-std=c++14` or newer
** using a compiler with `\__builtin_ is_constant_evaluated`
* These functions have been tested to support `\__int128` and `unsigned __int128`
* When compiling with NVCC, (e.g., `BOOST_CHARCONV_HOST_DEVICE` defined as `pass:[__host__ __device__]`), these functions are available for use on CPU and GPU.

=== Usage notes for to_chars for floating point types
* The following will be returned when handling different values of `NaN`
Expand Down
4 changes: 2 additions & 2 deletions include/boost/charconv/detail/apply_sign.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ namespace boost { namespace charconv { namespace detail {

template <typename Integer, typename Unsigned_Integer = detail::make_unsigned_t<Integer>,
typename std::enable_if<detail::is_signed<Integer>::value, bool>::type = true>
constexpr Unsigned_Integer apply_sign(Integer val) noexcept
BOOST_CHARCONV_HOST_DEVICE constexpr Unsigned_Integer apply_sign(Integer val) noexcept
{
return -(static_cast<Unsigned_Integer>(val));
}

template <typename Unsigned_Integer, typename std::enable_if<!detail::is_signed<Unsigned_Integer>::value, bool>::type = true>
constexpr Unsigned_Integer apply_sign(Unsigned_Integer val) noexcept
BOOST_CHARCONV_HOST_DEVICE constexpr Unsigned_Integer apply_sign(Unsigned_Integer val) noexcept
{
return val;
}
Expand Down
7 changes: 6 additions & 1 deletion include/boost/charconv/detail/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#endif

// Use 128-bit integers and suppress warnings for using extensions
#if defined(BOOST_HAS_INT128)
#if defined(BOOST_HAS_INT128) && !(defined(BOOST_CHARCONV_ENABLE_CUDA) && defined(__CUDACC__))
# define BOOST_CHARCONV_HAS_INT128
# define BOOST_CHARCONV_INT128_MAX static_cast<boost::int128_type>((static_cast<boost::uint128_type>(1) << 127) - 1)
# define BOOST_CHARCONV_INT128_MIN (-BOOST_CHARCONV_INT128_MAX - 1)
Expand Down Expand Up @@ -201,5 +201,10 @@ static_assert(std::is_same<long double, __float128>::value, "__float128 should b

#endif

#if defined(BOOST_CHARCONV_ENABLE_CUDA) && defined(__CUDACC__)
# define BOOST_CHARCONV_HOST_DEVICE __host__ __device__
#else
# define BOOST_CHARCONV_HOST_DEVICE
#endif

#endif // BOOST_CHARCONV_DETAIL_CONFIG_HPP
2 changes: 1 addition & 1 deletion include/boost/charconv/detail/dragonbox/floff.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1764,7 +1764,7 @@ BOOST_CHARCONV_SAFEBUFFERS to_chars_result floff(const double x, int precision,

const auto initial_digits = static_cast<std::uint32_t>(prod >> 32);

buffer -= (initial_digits < 10 && buffer != first ? 1 : 0);
buffer -= (initial_digits < 10 && buffer != first && precision != 0 ? 1 : 0);
remaining_digits -= (2 - (initial_digits < 10 ? 1 : 0));

// Avoid the situation where we have a leading 0 that we don't need
Expand Down
Loading
Loading