| /* |
| * Mathlib : A C Library of Special Functions |
| * Copyright (C) 1998 Ross Ihaka |
| * Copyright (C) 1999-2014 The R Core Team |
| * Copyright (C) 2004 Morten Welinder |
| * Copyright (C) 2004 The R Foundation |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, a copy is available at |
| * https://www.R-project.org/Licenses/ |
| * |
| * DESCRIPTION |
| * |
| * The distribution function of the hypergeometric distribution. |
| * |
| * Current implementation based on posting |
| * From: Morten Welinder <terra@gnome.org> |
| * Cc: R-bugs@biostat.ku.dk |
| * Subject: [Rd] phyper accuracy and efficiency (PR#6772) |
| * Date: Thu, 15 Apr 2004 18:06:37 +0200 (CEST) |
| ...... |
| |
| The current version has very serious cancellation issues. For example, |
| if you ask for a small right-tail you are likely to get total cancellation. |
| For example, phyper(59, 150, 150, 60, FALSE, FALSE) gives 6.372680161e-14. |
| The right answer is dhyper(0, 150, 150, 60, FALSE) which is 5.111204798e-22. |
| |
| phyper is also really slow for large arguments. |
| |
| Therefore, I suggest using the code below. This is a sniplet from Gnumeric ... |
| The code isn't perfect. In fact, if x*(NR+NB) is close to n*NR, |
| then this code can take a while. Not longer than the old code, though. |
| |
| -- Thanks to Ian Smith for ideas. |
| */ |
| |
| #include "nmath.h" |
| #include "dpq.h" |
| |
| static double pdhyper (double x, double NR, double NB, double n, int log_p) |
| { |
| /* |
| * Calculate |
| * |
| * phyper (x, NR, NB, n, TRUE, FALSE) |
| * [log] ---------------------------------- |
| * dhyper (x, NR, NB, n, FALSE) |
| * |
| * without actually calling phyper. This assumes that |
| * |
| * x * (NR + NB) <= n * NR |
| * |
| */ |
| LDOUBLE sum = 0; |
| LDOUBLE term = 1; |
| |
| while (x > 0 && term >= DBL_EPSILON * sum) { |
| term *= x * (NB - n + x) / (n + 1 - x) / (NR + 1 - x); |
| sum += term; |
| x--; |
| } |
| |
| double ss = (double) sum; |
| return log_p ? log1p(ss) : 1 + ss; |
| } |
| |
| |
| /* FIXME: The old phyper() code was basically used in ./qhyper.c as well |
| * ----- We need to sync this again! |
| */ |
| double phyper (double x, double NR, double NB, double n, |
| int lower_tail, int log_p) |
| { |
| /* Sample of n balls from NR red and NB black ones; x are red */ |
| |
| double d, pd; |
| |
| #ifdef IEEE_754 |
| if(ISNAN(x) || ISNAN(NR) || ISNAN(NB) || ISNAN(n)) |
| return x + NR + NB + n; |
| #endif |
| |
| x = floor (x + 1e-7); |
| NR = R_forceint(NR); |
| NB = R_forceint(NB); |
| n = R_forceint(n); |
| |
| if (NR < 0 || NB < 0 || !R_FINITE(NR + NB) || n < 0 || n > NR + NB) |
| ML_ERR_return_NAN; |
| |
| if (x * (NR + NB) > n * NR) { |
| /* Swap tails. */ |
| double oldNB = NB; |
| NB = NR; |
| NR = oldNB; |
| x = n - x - 1; |
| lower_tail = !lower_tail; |
| } |
| |
| if (x < 0) |
| return R_DT_0; |
| if (x >= NR || x >= n) |
| return R_DT_1; |
| |
| d = dhyper (x, NR, NB, n, log_p); |
| pd = pdhyper(x, NR, NB, n, log_p); |
| |
| return log_p ? R_DT_Log(d + pd) : R_D_Lval(d * pd); |
| } |