-
Notifications
You must be signed in to change notification settings - Fork 256
ENH: add parameter finder for degrees or freedom for students_t distribution #1385
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 3 commits
de6a562
7b015ec
a1609a9
1cf9ea2
1132241
123fdd1
24c88ba
0c8365a
1868814
b97dceb
9a26afc
eecf3d0
0308d3c
4957f2e
9c865a3
80de922
2540e5f
739df4b
ef2e0d2
a16d4bd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -58,6 +58,10 @@ class students_t_distribution | |||
| RealType sd, | ||||
| RealType hint = 100); | ||||
|
|
||||
| BOOST_MATH_GPU_ENABLED static RealType invert_probability_with_respect_to_degrees_of_freedom( | ||||
| RealType x, | ||||
| RealType p); | ||||
|
|
||||
| private: | ||||
| // Data member: | ||||
| RealType df_; // degrees of freedom is a real number > 0 or +infinity. | ||||
|
|
@@ -308,6 +312,23 @@ struct sample_size_func | |||
| RealType alpha, beta, ratio; | ||||
| }; | ||||
|
|
||||
| template <class RealType, class Policy> | ||||
| struct cdf_to_df_func | ||||
| { | ||||
| BOOST_MATH_GPU_ENABLED cdf_to_df_func(RealType x_val, RealType p_val, bool c) | ||||
| : x(x_val), p(p_val), comp(c) {} | ||||
|
|
||||
| BOOST_MATH_GPU_ENABLED RealType operator()(const RealType& df) | ||||
| { | ||||
| students_t_distribution<RealType, Policy> t(df); | ||||
| return comp ? | ||||
| RealType(p - cdf(complement(t, x))) | ||||
| : RealType(cdf(t, x) - p); | ||||
| } | ||||
| RealType x, p; | ||||
| bool comp; | ||||
| }; | ||||
|
|
||||
| } // namespace detail | ||||
|
|
||||
| template <class RealType, class Policy> | ||||
|
|
@@ -344,6 +365,111 @@ BOOST_MATH_GPU_ENABLED RealType students_t_distribution<RealType, Policy>::find_ | |||
| return result; | ||||
| } | ||||
|
|
||||
| template <class RealType, class Policy> | ||||
| BOOST_MATH_GPU_ENABLED RealType students_t_distribution<RealType, Policy>::invert_probability_with_respect_to_degrees_of_freedom( | ||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We already have a function for this here:
However, you have a better initial guess to the root finder which would be a very welcome addition!
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found this function but was not sure how to map the inversion problem we have here (finding degrees of freedom given a probability and a quantile) to this one arising from sample size calculation. The signatures are:
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah OK, slightly different problems, I would still overload the existing function though for consistency and call it: find_degrees_of_freedom(RealType t_stat, RealType p) |
||||
| RealType x, | ||||
| RealType p) | ||||
| { | ||||
| BOOST_MATH_STD_USING // for ADL of std functions | ||||
| constexpr auto function = "boost::math::students_t_distribution<%1%>::invert_probability_with_respect_to_degrees_of_freedom"; | ||||
| // | ||||
| // Check for domain errors: | ||||
| // | ||||
| RealType error_result; | ||||
| if (false == detail::check_probability(function, p, &error_result, Policy())) | ||||
| return error_result; | ||||
|
|
||||
| // Use symmetry: CDF(-x; df) = 1 - CDF(x; df), so always work with x >= 0. | ||||
| RealType x_abs = (x < 0) ? -x : x; | ||||
| RealType p_adj = (x < 0) ? (1 - p) : p; | ||||
|
|
||||
| if (x_abs == 0) | ||||
| { | ||||
| // CDF(0; df) = 0.5 for all df; only solvable when p == 0.5. | ||||
| if (p_adj == static_cast<RealType>(0.5)) | ||||
| return boost::math::numeric_limits<RealType>::infinity(); | ||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Small point, this should return policies::raise_overflow_error for consistency. |
||||
| return policies::raise_domain_error<RealType>( | ||||
| function, | ||||
| "No degrees of freedom can satisfy CDF(0; df) == %1% (must be 0.5).", | ||||
| p, Policy()); | ||||
| } | ||||
|
|
||||
| // | ||||
| // Step 1: Edgeworth warm start. | ||||
| // F(x; nu) ~ Phi(x) - phi(x)(x + x^3)/(4*nu) + phi(x)(3x + 5x^3 + 7x^5 - 3x^7)/(96*nu^2) | ||||
| // Substituting u = 1/nu and setting F(x; nu) = p gives: | ||||
| // B*u^2 - A*u + (Phi(x) - p) = 0 | ||||
| // where: | ||||
| // A = phi(x) * (x + x^3) / 4 | ||||
| // B = phi(x) * (3x + 5x^3 + 7x^5 - 3x^7) / 96 | ||||
| // | ||||
| normal_distribution<RealType, Policy> std_normal(0, 1); | ||||
| RealType phi = pdf(std_normal, x_abs); | ||||
| RealType Phi = cdf(std_normal, x_abs); | ||||
|
|
||||
| RealType x2 = x_abs * x_abs; | ||||
| RealType x3 = x2 * x_abs; | ||||
| RealType x5 = x3 * x2; | ||||
| RealType x7 = x5 * x2; | ||||
|
|
||||
| RealType A = phi * (x_abs + x3) / 4; | ||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If an LLM wrote most of this then colour me quite impressed ;) A small point of C++ etiquette: variables are normally all_lower_case and upper case is reserved for macros.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My workflow was basically:
|
||||
| RealType B = phi * (3 * x_abs + 5 * x3 + 7 * x5 - 3 * x7) / 96; | ||||
| RealType C = Phi - p_adj; | ||||
|
|
||||
| RealType hint = static_cast<RealType>(0.01); | ||||
| RealType discriminant = A * A - 4 * B * C; | ||||
| if (discriminant >= 0 && B != 0) | ||||
| { | ||||
| RealType sqrt_disc = sqrt(discriminant); | ||||
| // Two roots of B*u^2 - A*u + C = 0; pick the smallest positive u (largest nu = 1/u). | ||||
| RealType u1 = (A - sqrt_disc) / (2 * B); | ||||
| RealType u2 = (A + sqrt_disc) / (2 * B); | ||||
| RealType u = -1; | ||||
| if (u1 > 0 && u2 > 0) | ||||
| u = (u1 < u2) ? u1 : u2; | ||||
| else if (u1 > 0) | ||||
| u = u1; | ||||
| else if (u2 > 0) | ||||
| u = u2; | ||||
|
|
||||
| if (u > 0) | ||||
| { | ||||
| RealType nu_hat = 1 / u; | ||||
| // Step 2: validate by checking relative residual of the exact CDF. | ||||
| students_t_distribution<RealType, Policy> t_hat(nu_hat); | ||||
| RealType exact_cdf = cdf(t_hat, x_abs); | ||||
| RealType residual = (exact_cdf > p_adj) ? (exact_cdf - p_adj) : (p_adj - exact_cdf); | ||||
| RealType relative_residual = (p_adj != 0) ? residual / p_adj : residual; | ||||
| if (relative_residual <= tools::epsilon<RealType>() * 4) | ||||
| return nu_hat; // Edgeworth estimate is already exact to machine precision. | ||||
| if (relative_residual <= static_cast<RealType>(0.1)) | ||||
| hint = nu_hat; | ||||
| } | ||||
| } | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With all this in the function, it was initially hard for me to parse what was going on. Could this be put in a separate function called something like
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe refactor out, and use in the existing function? That would get automatically tested in the existing tests as well. |
||||
|
|
||||
| // | ||||
| // Step 3: root-find with bracket_and_solve_root on f(df) = CDF(x; df) - p. | ||||
| // For x > 0, CDF(x; df) is strictly increasing in df, so f is a rising function. | ||||
| // Pass min(p_adj, q_adj) always and use the complement CDF when p_adj > q_adj | ||||
| // to avoid catastrophic cancellation near 1 (mirrors non_centrality_finder_f). | ||||
| // | ||||
| RealType q_adj = 1 - p_adj; | ||||
| detail::cdf_to_df_func<RealType, Policy> f(x_abs, p_adj < q_adj ? p_adj : q_adj, p_adj >= q_adj); | ||||
| tools::eps_tolerance<RealType> tol(policies::digits<RealType, Policy>()); | ||||
| boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>(); | ||||
| boost::math::pair<RealType, RealType> r = tools::bracket_and_solve_root( | ||||
| f, hint, RealType(2), true, tol, max_iter, Policy()); | ||||
| RealType result = r.first + (r.second - r.first) / 2; | ||||
| if (max_iter >= policies::get_max_root_iterations<Policy>()) | ||||
| { | ||||
| return policies::raise_evaluation_error<RealType>(function, | ||||
| "Unable to locate solution in a reasonable time: either there is no answer to " | ||||
| "the degrees of freedom or the answer is infinite. Current best guess is %1%", | ||||
| result, Policy()); | ||||
| } | ||||
| return result; | ||||
| } | ||||
|
|
||||
| template <class RealType, class Policy> | ||||
| BOOST_MATH_GPU_ENABLED inline RealType mode(const students_t_distribution<RealType, Policy>& /*dist*/) | ||||
| { | ||||
|
|
||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -548,6 +548,103 @@ void test_spots(RealType) | |
| static_cast<RealType>(1.0))), | ||
| 9); | ||
|
|
||
| // Tests for invert_probability_with_respect_to_degrees_of_freedom. | ||
| // Each case is derived from the CDF spot tests above: the exact df is known, | ||
| // so we verify that inverting CDF(x; df) = p recovers df to tight tolerance. | ||
| { | ||
| // 100 times higher tolerance as in the quantile spot tests above as | ||
| // we use root finding | ||
| RealType tol_inv = boost::math::tools::epsilon<RealType>() * 500000; | ||
| if (boost::math::tools::digits<RealType>() > 100) | ||
| tol_inv *= 50000; | ||
| BOOST_CHECK_CLOSE( | ||
| students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom( | ||
| static_cast<RealType>(-6.96455673428326), | ||
| static_cast<RealType>(0.01)), | ||
| static_cast<RealType>(2), | ||
| tol_inv); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see these test values are used in quite a few places. It would be nice to combine all these into a single function which checks every method. This ensures we only have to define the values for the spot check in one place. It also cuts down on the amount of code. Maybe this would be for a separate PR though? |
||
| BOOST_CHECK_CLOSE( | ||
| students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom( | ||
| static_cast<RealType>(-3.36492999890721), | ||
| static_cast<RealType>(0.01)), | ||
| static_cast<RealType>(5), | ||
| tol_inv); | ||
| BOOST_CHECK_CLOSE( | ||
| students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom( | ||
| static_cast<RealType>(-0.559429644), | ||
| static_cast<RealType>(0.3)), | ||
| static_cast<RealType>(5), | ||
| tol_inv); | ||
| BOOST_CHECK_CLOSE( | ||
| students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom( | ||
| static_cast<RealType>(1.475884049), | ||
| static_cast<RealType>(0.9)), | ||
| static_cast<RealType>(5), | ||
| tol_inv); | ||
| BOOST_CHECK_CLOSE( | ||
| students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom( | ||
| static_cast<RealType>(-1.475884049), | ||
| static_cast<RealType>(0.1)), | ||
| static_cast<RealType>(5), | ||
| tol_inv); | ||
| BOOST_CHECK_CLOSE( | ||
| students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom( | ||
| static_cast<RealType>(-5.2410429995425), | ||
| static_cast<RealType>(0.00001)), | ||
| static_cast<RealType>(25), | ||
| tol_inv); | ||
| BOOST_CHECK_CLOSE( | ||
| students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom( | ||
| static_cast<RealType>(6.96455673428326), | ||
| static_cast<RealType>(0.99)), | ||
| static_cast<RealType>(2), | ||
| tol_inv); | ||
| BOOST_CHECK_CLOSE( | ||
| students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom( | ||
| static_cast<RealType>(2), | ||
| static_cast<RealType>(0.610822886098362)), | ||
| static_cast<RealType>(0.1), | ||
| tol_inv); | ||
| BOOST_CHECK_CLOSE( | ||
| students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom( | ||
| static_cast<RealType>(2), | ||
| static_cast<RealType>(0.777242554908434)), | ||
| static_cast<RealType>(0.5), | ||
| tol_inv); | ||
| BOOST_CHECK_CLOSE( | ||
| students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom( | ||
| static_cast<RealType>(2), | ||
| static_cast<RealType>(0.822925875908677)), | ||
| static_cast<RealType>(0.75), | ||
| tol_inv); | ||
| BOOST_CHECK_CLOSE( | ||
| students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom( | ||
| static_cast<RealType>(2), | ||
| static_cast<RealType>(0.977114826753374)), | ||
| static_cast<RealType>(1000), | ||
| tol_inv); | ||
| BOOST_CHECK_CLOSE( | ||
| students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom( | ||
| static_cast<RealType>(2), | ||
| static_cast<RealType>(0.97724973307434)), | ||
| static_cast<RealType>(1e6), | ||
| tol_inv); | ||
|
|
||
| // Domain error: p outside (0,1) | ||
| #ifndef BOOST_NO_EXCEPTIONS | ||
| BOOST_MATH_CHECK_THROW( | ||
| students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom( | ||
| static_cast<RealType>(2), | ||
| static_cast<RealType>(-0.1)), | ||
| std::domain_error); | ||
| BOOST_MATH_CHECK_THROW( | ||
| students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom( | ||
| static_cast<RealType>(2), | ||
| static_cast<RealType>(1.1)), | ||
| std::domain_error); | ||
| #endif | ||
| } // invert_probability_with_respect_to_degrees_of_freedom | ||
|
|
||
| // Test for large degrees of freedom when should be same as normal. | ||
| RealType inf = std::numeric_limits<RealType>::infinity(); | ||
| RealType nan = std::numeric_limits<RealType>::quiet_NaN(); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we please rename this to find_degrees_of_freedom for consistency? IMO It's fine to overload the existing function.