Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/error.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ has the following meanings which are used by both CPU and GPU versions
24 spread kernel formula type invalid
25 unknown exception caught
26 requested tolerance epsilon too small to achieve (hard error; tolerance must be >= machine epsilon)
27 iteration inside the setup code for the PSWF function evaluator failed to converge

For any nonzero value of ``ier`` the transform may not have been performed and the output should not be trusted. However, we hope that the value of ``ier`` will help to narrow down the problem.

Expand Down
21 changes: 12 additions & 9 deletions include/finufft/makeplan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,19 @@
int q = (int)(2 + 3.0 * J2); // not sure why so large? (NB cannot exceed MAX_NQUAD)
TF f[MAX_NQUAD];
double z[2 * MAX_NQUAD], w[2 * MAX_NQUAD];
gaussquad(2 * q, z, w); // only half the nodes used, eg on (0,1)
gaussquad(2 * q, z, w); // only half the nodes used, eg on (0,1)
std::complex<TF> a[MAX_NQUAD];
for (int n = 0; n < q; ++n) { // set up nodes z_n and vals f_n
z[n] *= J2; // rescale nodes
// vals & quadr weighs
for (int n = 0; n < q; ++n) { // set up nodes z_n and vals f_n
z[n] *= J2; // rescale nodes
// vals & quadr weighs
f[n] = J2 * (TF)w[n] * evaluate_kernel_runtime(TF(z[n]));
// phase winding rates
a[n] = -std::exp(2 * PI * std::complex<double>(0, 1) * z[n] / double(nf));
}
BIGINT nout = nf / 2 + 1; // how many values we're writing to
int nt = std::min(nout, (BIGINT)m.spopts.nthreads); // how many chunks
std::vector<BIGINT> brk(nt + 1); // start indices for each thread
for (int t = 0; t <= nt; ++t) // split nout mode indices btw threads
std::vector<BIGINT> brk(nt + 1); // start indices for each thread
for (int t = 0; t <= nt; ++t) // split nout mode indices btw threads
brk[t] = (BIGINT)(0.5 + nout * t / (double)nt);
#pragma omp parallel num_threads(nt)
{ // each thread gets own chunk to do
Expand Down Expand Up @@ -198,7 +198,7 @@

// heuristic dir=1 chunking for nthr>>1, typical for intel i7 and skylake...
m.spopts.max_subproblem_size = (dim == 1) ? 10000 : 100000; // todo: revisit
if (opts.spread_max_sp_size > 0) // override
if (opts.spread_max_sp_size > 0) // override
m.spopts.max_subproblem_size = opts.spread_max_sp_size;
// nthr above which switch OMP critical->atomic (add_wrapped..). R Blackwell's val:
m.spopts.atomic_threshold =
Expand Down Expand Up @@ -238,13 +238,16 @@
// coefficient (k=0 -> highest-degree). `horner_coeffs` was filled with
// zeros above, so panels that need fewer coefficients leave the rest as 0.

auto kernel_lambda = kernel_definition_lambda(m.spopts);

for (int j = 0; j < nspread; ++j) { // ......... loop over intervals (panels)
// affine map of x in [-1,1] to z in jth interval [-1+2j/w,-1+2(j+1)/w]
const TF xshiftj = TF(2 * j + 1 - nspread); // jth center in [-w,w]
// *** explore making this lambda double always, like kernel itself:
const auto kernel_this_interval = [xshiftj, this, nspread](TF x) -> TF {
const auto kernel_this_interval = [xshiftj, this, nspread, kernel_lambda](

Check warning on line 247 in include/finufft/makeplan.hpp

View workflow job for this annotation

GitHub Actions / cmake-sanitizers (macos-14, llvm, ON)

lambda capture 'this' is not used [-Wunused-lambda-capture]

Check warning on line 247 in include/finufft/makeplan.hpp

View workflow job for this annotation

GitHub Actions / cmake-sanitizers (macos-14, llvm, ON)

lambda capture 'this' is not used [-Wunused-lambda-capture]
TF x) -> TF {
const TF z = (x + xshiftj) / (TF)nspread;
return (TF)kernel_definition(m.spopts, (double)z);
return (TF)kernel_lambda((double)z);
};

// we're fitting in float for TF=float, *** explore always double:
Expand Down
3 changes: 2 additions & 1 deletion include/finufft_common/kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <cmath>
#include <cstddef>
#include <cstdio>
#include <functional>
#include <vector>

#include <finufft_common/constants.h>
Expand Down Expand Up @@ -67,7 +68,7 @@ template<class T, class F> std::vector<T> poly_fit(F &&f, int n) {
// The spread/interp kernel phi_beta(z) on z in [-1,1]. Not performance-critical;
// used only for polynomial interpolation (precompute_horner_coeffs). Always double.
// Defined in src/common/kernel.cpp.
double kernel_definition(const finufft_spread_opts &spopts, double z);
std::function<double(double)> kernel_definition_lambda(const finufft_spread_opts &spopts);

int theoretical_kernel_ns(double tol, int dim, int type, int debug,
const finufft_spread_opts &spopts);
Expand Down
60 changes: 56 additions & 4 deletions include/finufft_common/pswf.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,63 @@
#ifndef MATH_PSWF_H
#define MATH_PSWF_H

#include <array>
#include <cmath>
#include <finufft_errors.h>
#include <vector>

namespace finufft::common {
/*
normalized zeroth-order pswf
*/
double pswf(double c, double x);

/* Class for evaluation of the prolate spheroidal wavefunction
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this doc - thanks!

of order zero (Psi_0^c) inside [-1,1], for arbitrary frequency parameter c.
Computation is done using a basis of Legendre polynomials.
This implementation is based on work by Libin Lu for FINUFFT.
The orignal implementation was done by Vladimir Rokhlin and
can be found in src/common/specialfunctions/ of the DMK repo
https://github.com/flatironinstitute/dmk
The returned function values are normalized in such a way that
evaluation at x=0 always returns 1.0.
CAUTION: the internal routines have been heavily tweaked compared
to the original versions and cannot be expected to work in a more
general context! */
class PSWF0 {
private:
double c;
std::vector<double> workdata; // Legendre coefficients
std::vector<std::array<double, 3>> coef;
double xv0; // factor needed for normalization

template<typename T> T eval_raw(T x) const {
const T xsq = x * x;
T pjm1 = 0;
T pjm2 = 1;
T val = workdata[0];

size_t i = 1;
for (; i + 1 < coef.size(); i += 2) {
pjm1 = pjm2 * (xsq * coef[i][0] - coef[i][1]) - pjm1 * coef[i][2];
val += workdata[i] * pjm1;
pjm2 = pjm1 * (xsq * coef[i + 1][0] - coef[i + 1][1]) - pjm2 * coef[i + 1][2];
val += workdata[i + 1] * pjm2;
}
for (; i < coef.size(); ++i) {
T tmp = pjm2 * (xsq * coef[i][0] - coef[i][1]) - pjm1 * coef[i][2];
val += workdata[i] * tmp;
pjm1 = pjm2;
pjm2 = tmp;
}
return val;
}

public:
PSWF0(double c_);

double operator()(double x) const {
if (std::abs(x) > 1) return 0.;
return eval_raw(x) * xv0;
}
};

} // namespace finufft::common
#endif // MATH_PSWF_H
1 change: 1 addition & 0 deletions include/finufft_errors.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ enum {
FINUFFT_ERR_KERFORMULA_NOTVALID = 24,
FINUFFT_ERR_UNKNOWN_EXCEPTION = 25,
FINUFFT_ERR_EPS_TOO_SMALL = 26,
FINUFFT_ERR_PSWF_SETUP = 27,
};
// clang-format on
#endif
64 changes: 42 additions & 22 deletions src/common/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,15 @@

namespace finufft::kernel {

double kernel_definition(const finufft_spread_opts &spopts, const double z) {
std::function<double(double)> kernel_definition_lambda(
const finufft_spread_opts &spopts) {
/* The spread/interp kernel phi_beta(z) function on standard interval z in [-1,1].
This evaluation does not need to be fast; it is used *only* for polynomial
interpolation via Horner coeffs (the interpolant is evaluated fast).
It can thus always be double-precision. No analytic Fourier transform pair is
needed, thanks to numerical quadrature in finufft_core:onedim*; playing with
new kernels is thus very easy.
Inputs:
z - real ordinate on standard interval [-1,1]. Handling of edge cases
at or near +-1 is no longer crucial, because precompute_horner_coeffs
(the only user of this function) has interpolation nodes in (-1,1).
spopts - spread_opts struct containing fields:
beta - shape parameter for ES, KB, or other prolate kernels
(a.k.a. c parameter in PSWF).
Expand All @@ -32,40 +30,62 @@ double kernel_definition(const finufft_spread_opts &spopts, const double z) {
kerformula also to select a parameter-choice method.)
Note: the default 0 (in opts.spread_kerformula) is invalid here;
selection of a >0 kernel type must already have happened.
Output: phi(z), as in the notation of original 2019 paper ([FIN] in the docs).
Output: lambda function that can be called with z to return phi(z),
as in the notation of original 2019 paper ([FIN] in the docs).

Notes: 1) no normalization of max value or integral is needed, since any
overall factor is cancelled out in the deconvolve step. However,
values as large as exp(beta) have caused floating-pt overflow; don't
use them.
Barnett rewritten 1/13/26 for double on [-1,1]; based on Barbone Dec 2025.
*/
if (std::abs(z) > 1.0) return 0.0; // restrict support to [-1,1]
double beta = spopts.beta; // get shape param
double arg = beta * std::sqrt(1.0 - z * z); // common argument for exp, I0, etc
double beta = spopts.beta; // get shape param
int kf = spopts.kerformula;

if (kf == 1 || kf == 2)
if (kf == 1 || kf == 2) {
// ES ("exponential of semicircle" or "exp sqrt"), see [FIN] reference.
// Used in FINUFFT 2017-2025 (up to v2.4.1). max is 1, as of v2.3.0.
return std::exp(arg) / std::exp(beta);
else if (kf == 3)
const double expbeta = std::exp(beta);
return [beta, expbeta](double z) {
if (std::abs(z) > 1.0) return 0.0; // restrict support to [-1,1]
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's a bit sad how much code duplication this PR created. I guess there's no way to have the clean return of 0.0 as in my original l.43 ? Same for the arg which is now repeated several times...

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I can write a version with less code duplication, which is more similar in spirit to the current structure. That will have to wait till next week though.
My goal was to minimize the individual functions that are doing the actual evaluation; if we keep the current style, the evaluation function itself will contain the if(kf==<something>) sequence, which felt worse to me.
Some degree of duplication will be unavoidable however, if we want to separate the initialization part (i.r. precomputing the expbeta or setting up the PSWF0) and the evaluation proper - and I think we really want to do that.

return std::exp(beta * std::sqrt(1.0 - z * z)) / expbeta;
};
} else if (kf == 3) {
// forwards Kaiser--Bessel (KB), normalized to max of 1.
// std::cyl_bessel_i is from <cmath>, expects double. See src/common/utils.cpp
return common::cyl_bessel_i(0, arg) / common::cyl_bessel_i(0, beta);
else if (kf == 4)
const double besselbeta = common::cyl_bessel_i(0, beta);
return [beta, besselbeta](double z) {
if (std::abs(z) > 1.0) return 0.0; // restrict support to [-1,1]
return common::cyl_bessel_i(0, beta * std::sqrt(1.0 - z * z)) / besselbeta;
};
} else if (kf == 4) {
// continuous (deplinthed) KB, as in Barnett SIREV 2022, normalized to max nearly 1
return (common::cyl_bessel_i(0, arg) - 1.0) / common::cyl_bessel_i(0, beta);
else if (kf == 5)
return std::cosh(arg) / std::cosh(beta); // normalized cosh-type of Rmk. 13 [FIN]
else if (kf == 6)
return (std::cosh(arg) - 1.0) / std::cosh(beta); // Potts-Tasche cont cosh-type
else if (kf >= 7 && kf <= 9)
return common::pswf(beta, z); // prolate (PSWF) Psi_0, normalized to 1 at z=0
else {
const double besselbeta = common::cyl_bessel_i(0, beta);
return [beta, besselbeta](double z) {
if (std::abs(z) > 1.0) return 0.0; // restrict support to [-1,1]
return (common::cyl_bessel_i(0, beta * std::sqrt(1.0 - z * z)) - 1.0) / besselbeta;
};
} else if (kf == 5) {
const double coshbeta = std::cosh(beta);
return [beta, coshbeta](double z) {
if (std::abs(z) > 1.0) return 0.0; // restrict support to [-1,1]
return std::cosh(beta * std::sqrt(1.0 - z * z)) / coshbeta;
}; // normalized cosh-type of Rmk. 13 [FIN]
} else if (kf == 6) {
const double coshbeta = std::cosh(beta);
return [beta, coshbeta](double z) {
if (std::abs(z) > 1.0) return 0.0; // restrict support to [-1,1]
return (std::cosh(beta * std::sqrt(1.0 - z * z)) - 1.0) / coshbeta;
}; // Potts-Tasche cont cosh-type
} else if (kf >= 7 && kf <= 9) {
finufft::common::PSWF0 pswf(beta);
return [pswf](double z) {
if (std::abs(z) > 1.0) return 0.0; // restrict support to [-1,1]
return pswf(z);
}; // prolate (PSWF) Psi_0, normalized to 1 at z=0
} else {
fprintf(stderr, "[%s] unknown spopts.kerformula=%d\n", __func__, spopts.kerformula);
throw finufft::exception(FINUFFT_ERR_KERFORMULA_NOTVALID);
return std::numeric_limits<double>::quiet_NaN(); // never gets here, non-signalling
}
}

Expand Down
Loading
Loading