001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.math3.distribution; 019 020 import org.apache.commons.math3.exception.NotPositiveException; 021 import org.apache.commons.math3.exception.NotStrictlyPositiveException; 022 import org.apache.commons.math3.exception.NumberIsTooLargeException; 023 import org.apache.commons.math3.exception.util.LocalizedFormats; 024 import org.apache.commons.math3.util.FastMath; 025 import org.apache.commons.math3.random.RandomGenerator; 026 import org.apache.commons.math3.random.Well19937c; 027 028 /** 029 * Implementation of the hypergeometric distribution. 030 * 031 * @see <a href="http://en.wikipedia.org/wiki/Hypergeometric_distribution">Hypergeometric distribution (Wikipedia)</a> 032 * @see <a href="http://mathworld.wolfram.com/HypergeometricDistribution.html">Hypergeometric distribution (MathWorld)</a> 033 * @version $Id: HypergeometricDistribution.java 1416643 2012-12-03 19:37:14Z tn $ 034 */ 035 public class HypergeometricDistribution extends AbstractIntegerDistribution { 036 /** Serializable version identifier. */ 037 private static final long serialVersionUID = -436928820673516179L; 038 /** The number of successes in the population. */ 039 private final int numberOfSuccesses; 040 /** The population size. */ 041 private final int populationSize; 042 /** The sample size. */ 043 private final int sampleSize; 044 /** Cached numerical variance */ 045 private double numericalVariance = Double.NaN; 046 /** Whether or not the numerical variance has been calculated */ 047 private boolean numericalVarianceIsCalculated = false; 048 049 /** 050 * Construct a new hypergeometric distribution with the specified population 051 * size, number of successes in the population, and sample size. 052 * 053 * @param populationSize Population size. 054 * @param numberOfSuccesses Number of successes in the population. 055 * @param sampleSize Sample size. 056 * @throws NotPositiveException if {@code numberOfSuccesses < 0}. 057 * @throws NotStrictlyPositiveException if {@code populationSize <= 0}. 058 * @throws NumberIsTooLargeException if {@code numberOfSuccesses > populationSize}, 059 * or {@code sampleSize > populationSize}. 060 */ 061 public HypergeometricDistribution(int populationSize, int numberOfSuccesses, int sampleSize) 062 throws NotPositiveException, NotStrictlyPositiveException, NumberIsTooLargeException { 063 this(new Well19937c(), populationSize, numberOfSuccesses, sampleSize); 064 } 065 066 /** 067 * Creates a new hypergeometric distribution. 068 * 069 * @param rng Random number generator. 070 * @param populationSize Population size. 071 * @param numberOfSuccesses Number of successes in the population. 072 * @param sampleSize Sample size. 073 * @throws NotPositiveException if {@code numberOfSuccesses < 0}. 074 * @throws NotStrictlyPositiveException if {@code populationSize <= 0}. 075 * @throws NumberIsTooLargeException if {@code numberOfSuccesses > populationSize}, 076 * or {@code sampleSize > populationSize}. 077 * @since 3.1 078 */ 079 public HypergeometricDistribution(RandomGenerator rng, 080 int populationSize, 081 int numberOfSuccesses, 082 int sampleSize) 083 throws NotPositiveException, NotStrictlyPositiveException, NumberIsTooLargeException { 084 super(rng); 085 086 if (populationSize <= 0) { 087 throw new NotStrictlyPositiveException(LocalizedFormats.POPULATION_SIZE, 088 populationSize); 089 } 090 if (numberOfSuccesses < 0) { 091 throw new NotPositiveException(LocalizedFormats.NUMBER_OF_SUCCESSES, 092 numberOfSuccesses); 093 } 094 if (sampleSize < 0) { 095 throw new NotPositiveException(LocalizedFormats.NUMBER_OF_SAMPLES, 096 sampleSize); 097 } 098 099 if (numberOfSuccesses > populationSize) { 100 throw new NumberIsTooLargeException(LocalizedFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE, 101 numberOfSuccesses, populationSize, true); 102 } 103 if (sampleSize > populationSize) { 104 throw new NumberIsTooLargeException(LocalizedFormats.SAMPLE_SIZE_LARGER_THAN_POPULATION_SIZE, 105 sampleSize, populationSize, true); 106 } 107 108 this.numberOfSuccesses = numberOfSuccesses; 109 this.populationSize = populationSize; 110 this.sampleSize = sampleSize; 111 } 112 113 /** {@inheritDoc} */ 114 public double cumulativeProbability(int x) { 115 double ret; 116 117 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); 118 if (x < domain[0]) { 119 ret = 0.0; 120 } else if (x >= domain[1]) { 121 ret = 1.0; 122 } else { 123 ret = innerCumulativeProbability(domain[0], x, 1); 124 } 125 126 return ret; 127 } 128 129 /** 130 * Return the domain for the given hypergeometric distribution parameters. 131 * 132 * @param n Population size. 133 * @param m Number of successes in the population. 134 * @param k Sample size. 135 * @return a two element array containing the lower and upper bounds of the 136 * hypergeometric distribution. 137 */ 138 private int[] getDomain(int n, int m, int k) { 139 return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) }; 140 } 141 142 /** 143 * Return the lowest domain value for the given hypergeometric distribution 144 * parameters. 145 * 146 * @param n Population size. 147 * @param m Number of successes in the population. 148 * @param k Sample size. 149 * @return the lowest domain value of the hypergeometric distribution. 150 */ 151 private int getLowerDomain(int n, int m, int k) { 152 return FastMath.max(0, m - (n - k)); 153 } 154 155 /** 156 * Access the number of successes. 157 * 158 * @return the number of successes. 159 */ 160 public int getNumberOfSuccesses() { 161 return numberOfSuccesses; 162 } 163 164 /** 165 * Access the population size. 166 * 167 * @return the population size. 168 */ 169 public int getPopulationSize() { 170 return populationSize; 171 } 172 173 /** 174 * Access the sample size. 175 * 176 * @return the sample size. 177 */ 178 public int getSampleSize() { 179 return sampleSize; 180 } 181 182 /** 183 * Return the highest domain value for the given hypergeometric distribution 184 * parameters. 185 * 186 * @param m Number of successes in the population. 187 * @param k Sample size. 188 * @return the highest domain value of the hypergeometric distribution. 189 */ 190 private int getUpperDomain(int m, int k) { 191 return FastMath.min(k, m); 192 } 193 194 /** {@inheritDoc} */ 195 public double probability(int x) { 196 double ret; 197 198 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); 199 if (x < domain[0] || x > domain[1]) { 200 ret = 0.0; 201 } else { 202 double p = (double) sampleSize / (double) populationSize; 203 double q = (double) (populationSize - sampleSize) / (double) populationSize; 204 double p1 = SaddlePointExpansion.logBinomialProbability(x, 205 numberOfSuccesses, p, q); 206 double p2 = 207 SaddlePointExpansion.logBinomialProbability(sampleSize - x, 208 populationSize - numberOfSuccesses, p, q); 209 double p3 = 210 SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q); 211 ret = FastMath.exp(p1 + p2 - p3); 212 } 213 214 return ret; 215 } 216 217 /** 218 * For this distribution, {@code X}, this method returns {@code P(X >= x)}. 219 * 220 * @param x Value at which the CDF is evaluated. 221 * @return the upper tail CDF for this distribution. 222 * @since 1.1 223 */ 224 public double upperCumulativeProbability(int x) { 225 double ret; 226 227 final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); 228 if (x <= domain[0]) { 229 ret = 1.0; 230 } else if (x > domain[1]) { 231 ret = 0.0; 232 } else { 233 ret = innerCumulativeProbability(domain[1], x, -1); 234 } 235 236 return ret; 237 } 238 239 /** 240 * For this distribution, {@code X}, this method returns 241 * {@code P(x0 <= X <= x1)}. 242 * This probability is computed by summing the point probabilities for the 243 * values {@code x0, x0 + 1, x0 + 2, ..., x1}, in the order directed by 244 * {@code dx}. 245 * 246 * @param x0 Inclusive lower bound. 247 * @param x1 Inclusive upper bound. 248 * @param dx Direction of summation (1 indicates summing from x0 to x1, and 249 * 0 indicates summing from x1 to x0). 250 * @return {@code P(x0 <= X <= x1)}. 251 */ 252 private double innerCumulativeProbability(int x0, int x1, int dx) { 253 double ret = probability(x0); 254 while (x0 != x1) { 255 x0 += dx; 256 ret += probability(x0); 257 } 258 return ret; 259 } 260 261 /** 262 * {@inheritDoc} 263 * 264 * For population size {@code N}, number of successes {@code m}, and sample 265 * size {@code n}, the mean is {@code n * m / N}. 266 */ 267 public double getNumericalMean() { 268 return (double) (getSampleSize() * getNumberOfSuccesses()) / (double) getPopulationSize(); 269 } 270 271 /** 272 * {@inheritDoc} 273 * 274 * For population size {@code N}, number of successes {@code m}, and sample 275 * size {@code n}, the variance is 276 * {@code [n * m * (N - n) * (N - m)] / [N^2 * (N - 1)]}. 277 */ 278 public double getNumericalVariance() { 279 if (!numericalVarianceIsCalculated) { 280 numericalVariance = calculateNumericalVariance(); 281 numericalVarianceIsCalculated = true; 282 } 283 return numericalVariance; 284 } 285 286 /** 287 * Used by {@link #getNumericalVariance()}. 288 * 289 * @return the variance of this distribution 290 */ 291 protected double calculateNumericalVariance() { 292 final double N = getPopulationSize(); 293 final double m = getNumberOfSuccesses(); 294 final double n = getSampleSize(); 295 return (n * m * (N - n) * (N - m)) / (N * N * (N - 1)); 296 } 297 298 /** 299 * {@inheritDoc} 300 * 301 * For population size {@code N}, number of successes {@code m}, and sample 302 * size {@code n}, the lower bound of the support is 303 * {@code max(0, n + m - N)}. 304 * 305 * @return lower bound of the support 306 */ 307 public int getSupportLowerBound() { 308 return FastMath.max(0, 309 getSampleSize() + getNumberOfSuccesses() - getPopulationSize()); 310 } 311 312 /** 313 * {@inheritDoc} 314 * 315 * For number of successes {@code m} and sample size {@code n}, the upper 316 * bound of the support is {@code min(m, n)}. 317 * 318 * @return upper bound of the support 319 */ 320 public int getSupportUpperBound() { 321 return FastMath.min(getNumberOfSuccesses(), getSampleSize()); 322 } 323 324 /** 325 * {@inheritDoc} 326 * 327 * The support of this distribution is connected. 328 * 329 * @return {@code true} 330 */ 331 public boolean isSupportConnected() { 332 return true; 333 } 334 }