001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.distribution;
019    
020    import java.io.Serializable;
021    
022    import org.apache.commons.math.MathRuntimeException;
023    import org.apache.commons.math.util.MathUtils;
024    
025    /**
026     * The default implementation of {@link HypergeometricDistribution}.
027     *
028     * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $
029     */
030    public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
031        implements HypergeometricDistribution, Serializable 
032    {
033    
034        /** Serializable version identifier */
035        private static final long serialVersionUID = -436928820673516179L;
036    
037        /** The number of successes in the population. */
038        private int numberOfSuccesses;
039        
040        /** The population size. */
041        private int populationSize;
042        
043        /** The sample size. */
044        private int sampleSize;
045        
046        /**
047         * Construct a new hypergeometric distribution with the given the population
048         * size, the number of successes in the population, and the sample size.
049         * @param populationSize the population size.
050         * @param numberOfSuccesses number of successes in the population.
051         * @param sampleSize the sample size.
052         */
053        public HypergeometricDistributionImpl(int populationSize,
054            int numberOfSuccesses, int sampleSize) {
055            super();
056            if (numberOfSuccesses > populationSize) {
057                throw MathRuntimeException.createIllegalArgumentException(
058                    "number of successes ({0}) must be less than or equal to population size ({1})",
059                    numberOfSuccesses, populationSize);
060            }
061            if (sampleSize > populationSize) {
062                throw MathRuntimeException.createIllegalArgumentException(
063                      "sample size ({0}) must be less than or equal to population size ({1})",
064                      sampleSize, populationSize);
065            }
066            setPopulationSize(populationSize);
067            setSampleSize(sampleSize);
068            setNumberOfSuccesses(numberOfSuccesses);
069        }
070    
071        /**
072         * For this distribution, X, this method returns P(X ≤ x).
073         * @param x the value at which the PDF is evaluated.
074         * @return PDF for this distribution. 
075         */
076        @Override
077        public double cumulativeProbability(int x) {
078            double ret;
079            
080            int n = getPopulationSize();
081            int m = getNumberOfSuccesses();
082            int k = getSampleSize();
083    
084            int[] domain = getDomain(n, m, k);
085            if (x < domain[0]) {
086                ret = 0.0;
087            } else if(x >= domain[1]) {
088                ret = 1.0;
089            } else {
090                ret = innerCumulativeProbability(domain[0], x, 1, n, m, k);
091            }
092            
093            return ret;
094        }
095    
096        /**
097         * Return the domain for the given hypergeometric distribution parameters.
098         * @param n the population size.
099         * @param m number of successes in the population.
100         * @param k the sample size.
101         * @return a two element array containing the lower and upper bounds of the
102         *         hypergeometric distribution.  
103         */
104        private int[] getDomain(int n, int m, int k){
105            return new int[]{
106                getLowerDomain(n, m, k),
107                getUpperDomain(m, k)
108            };
109        }
110        
111        /**
112         * Access the domain value lower bound, based on <code>p</code>, used to
113         * bracket a PDF root.
114         * 
115         * @param p the desired probability for the critical value
116         * @return domain value lower bound, i.e.
117         *         P(X &lt; <i>lower bound</i>) &lt; <code>p</code> 
118         */
119        @Override
120        protected int getDomainLowerBound(double p) {
121            return getLowerDomain(getPopulationSize(), getNumberOfSuccesses(),
122                getSampleSize());
123        }
124        
125        /**
126         * Access the domain value upper bound, based on <code>p</code>, used to
127         * bracket a PDF root.
128         * 
129         * @param p the desired probability for the critical value
130         * @return domain value upper bound, i.e.
131         *         P(X &lt; <i>upper bound</i>) &gt; <code>p</code> 
132         */
133        @Override
134        protected int getDomainUpperBound(double p) {
135            return getUpperDomain(getSampleSize(), getNumberOfSuccesses());
136        }
137    
138        /**
139         * Return the lowest domain value for the given hypergeometric distribution
140         * parameters.
141         * @param n the population size.
142         * @param m number of successes in the population.
143         * @param k the sample size.
144         * @return the lowest domain value of the hypergeometric distribution.  
145         */
146        private int getLowerDomain(int n, int m, int k) {
147            return Math.max(0, m - (n - k));
148        }
149    
150        /**
151         * Access the number of successes.
152         * @return the number of successes.
153         */
154        public int getNumberOfSuccesses() {
155            return numberOfSuccesses;
156        }
157    
158        /**
159         * Access the population size.
160         * @return the population size.
161         */
162        public int getPopulationSize() {
163            return populationSize;
164        }
165    
166        /**
167         * Access the sample size.
168         * @return the sample size.
169         */
170        public int getSampleSize() {
171            return sampleSize;
172        }
173    
174        /**
175         * Return the highest domain value for the given hypergeometric distribution
176         * parameters.
177         * @param m number of successes in the population.
178         * @param k the sample size.
179         * @return the highest domain value of the hypergeometric distribution.  
180         */
181        private int getUpperDomain(int m, int k){
182            return Math.min(k, m);
183        }
184    
185        /**
186         * For this distribution, X, this method returns P(X = x).
187         * 
188         * @param x the value at which the PMF is evaluated.
189         * @return PMF for this distribution. 
190         */
191        public double probability(int x) {
192            double ret;
193            
194            int n = getPopulationSize();
195            int m = getNumberOfSuccesses();
196            int k = getSampleSize();
197    
198            int[] domain = getDomain(n, m, k);
199            if(x < domain[0] || x > domain[1]){
200                ret = 0.0;
201            } else {
202                ret = probability(n, m, k, x);
203            }
204            
205            return ret;
206        }
207        
208        /**
209         * For the distribution, X, defined by the given hypergeometric distribution
210         * parameters, this method returns P(X = x).
211         * 
212         * @param n the population size.
213         * @param m number of successes in the population.
214         * @param k the sample size.
215         * @param x the value at which the PMF is evaluated.
216         * @return PMF for the distribution. 
217         */
218        private double probability(int n, int m, int k, int x) {
219            return Math.exp(MathUtils.binomialCoefficientLog(m, x) +
220                MathUtils.binomialCoefficientLog(n - m, k - x) -
221                MathUtils.binomialCoefficientLog(n, k));
222        }
223    
224        /**
225         * Modify the number of successes.
226         * @param num the new number of successes.
227         * @throws IllegalArgumentException if <code>num</code> is negative.
228         */
229        public void setNumberOfSuccesses(int num) {
230            if(num < 0){
231                throw MathRuntimeException.createIllegalArgumentException(
232                      "number of successes must be non-negative ({0})",
233                      num);
234            }
235            numberOfSuccesses = num;
236        }
237    
238        /**
239         * Modify the population size.
240         * @param size the new population size.
241         * @throws IllegalArgumentException if <code>size</code> is not positive.
242         */
243        public void setPopulationSize(int size) {
244            if(size <= 0){
245                throw MathRuntimeException.createIllegalArgumentException(
246                      "population size must be positive ({0})",
247                      size);
248            }
249            populationSize = size;
250        }
251        
252        /**
253         * Modify the sample size.
254         * @param size the new sample size.
255         * @throws IllegalArgumentException if <code>size</code> is negative.
256         */
257        public void setSampleSize(int size) {
258            if (size < 0) {
259                throw MathRuntimeException.createIllegalArgumentException(
260                      "sample size must be positive ({0})",
261                      size);
262            }    
263            sampleSize = size;
264        }
265    
266        /**
267         * For this distribution, X, this method returns P(X &ge; x).
268         * @param x the value at which the CDF is evaluated.
269         * @return upper tail CDF for this distribution.
270         * @since 1.1
271         */
272        public double upperCumulativeProbability(int x) {
273            double ret;
274            
275            int n = getPopulationSize();
276            int m = getNumberOfSuccesses();
277            int k = getSampleSize();
278    
279            int[] domain = getDomain(n, m, k);
280            if (x < domain[0]) {
281                ret = 1.0;
282            } else if(x > domain[1]) {
283                ret = 0.0;
284            } else {
285                ret = innerCumulativeProbability(domain[1], x, -1, n, m, k);
286            }
287            
288            return ret;
289        }
290        
291        /**
292         * For this distribution, X, this method returns P(x0 &le; X &le; x1).  This
293         * probability is computed by summing the point probabilities for the values
294         * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx. 
295         * @param x0 the inclusive, lower bound
296         * @param x1 the inclusive, upper bound
297         * @param dx the direction of summation. 1 indicates summing from x0 to x1.
298         *           0 indicates summing from x1 to x0.
299         * @param n the population size.
300         * @param m number of successes in the population.
301         * @param k the sample size.
302         * @return P(x0 &le; X &le; x1). 
303         */
304        private double innerCumulativeProbability(
305            int x0, int x1, int dx, int n, int m, int k)
306        {
307            double ret = probability(n, m, k, x0);
308            while (x0 != x1) {
309                x0 += dx;
310                ret += probability(n, m, k, x0);
311            }
312            return ret;
313        }
314    }