View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.inference;
18  
19  import org.apache.commons.math.MathException;
20  import org.apache.commons.math.MathRuntimeException;
21  import org.apache.commons.math.distribution.ChiSquaredDistribution;
22  import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
23  
24  /**
25   * Implements Chi-Square test statistics defined in the
26   * {@link UnknownDistributionChiSquareTest} interface.
27   *
28   * @version $Revision: 775470 $ $Date: 2009-05-16 10:29:07 -0400 (Sat, 16 May 2009) $
29   */
30  public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
31  
32      /** Distribution used to compute inference statistics. */
33      private ChiSquaredDistribution distribution;
34    
35      /**
36       * Construct a ChiSquareTestImpl 
37       */
38      public ChiSquareTestImpl() {
39          this(new ChiSquaredDistributionImpl(1.0));
40      }
41  
42      /**
43       * Create a test instance using the given distribution for computing
44       * inference statistics.
45       * @param x distribution used to compute inference statistics.
46       * @since 1.2
47       */
48      public ChiSquareTestImpl(ChiSquaredDistribution x) {
49          super();
50          setDistribution(x);
51      }
52       /**
53       * {@inheritDoc}
54       * <p><strong>Note: </strong>This implementation rescales the 
55       * <code>expected</code> array if necessary to ensure that the sum of the
56       * expected and observed counts are equal.</p>
57       * 
58       * @param observed array of observed frequency counts
59       * @param expected array of expected frequency counts
60       * @return chi-square test statistic
61       * @throws IllegalArgumentException if preconditions are not met
62       * or length is less than 2
63       */
64      public double chiSquare(double[] expected, long[] observed)
65          throws IllegalArgumentException {
66          if (expected.length < 2) {
67              throw MathRuntimeException.createIllegalArgumentException(
68                    "expected array length = {0}, must be at least 2",
69                    expected.length);
70          }
71          if (expected.length != observed.length) {
72              throw MathRuntimeException.createIllegalArgumentException(
73                    "dimension mismatch {0} != {1}", expected.length, observed.length);
74          }
75          checkPositive(expected);
76          checkNonNegative(observed);
77          double sumExpected = 0d;
78          double sumObserved = 0d;
79          for (int i = 0; i < observed.length; i++) {
80              sumExpected += expected[i];
81              sumObserved += observed[i];
82          }
83          double ratio = 1.0d;
84          boolean rescale = false;
85          if (Math.abs(sumExpected - sumObserved) > 10E-6) {
86              ratio = sumObserved / sumExpected;
87              rescale = true;
88          }
89          double sumSq = 0.0d;
90          double dev = 0.0d;
91          for (int i = 0; i < observed.length; i++) {
92              if (rescale) {
93                  dev = (observed[i] - ratio * expected[i]);
94                  sumSq += dev * dev / (ratio * expected[i]);
95              } else {
96                  dev = (observed[i] - expected[i]);
97                  sumSq += dev * dev / expected[i];
98              }
99          }
100         return sumSq;
101     }
102 
103     /**
104      * {@inheritDoc}
105      * <p><strong>Note: </strong>This implementation rescales the 
106      * <code>expected</code> array if necessary to ensure that the sum of the
107      * expected and observed counts are equal.</p>
108      * 
109      * @param observed array of observed frequency counts
110      * @param expected array of expected frequency counts
111      * @return p-value
112      * @throws IllegalArgumentException if preconditions are not met
113      * @throws MathException if an error occurs computing the p-value
114      */
115     public double chiSquareTest(double[] expected, long[] observed)
116         throws IllegalArgumentException, MathException {
117         distribution.setDegreesOfFreedom(expected.length - 1.0);
118         return 1.0 - distribution.cumulativeProbability(
119             chiSquare(expected, observed));
120     }
121 
122     /**
123      * {@inheritDoc}
124      * <p><strong>Note: </strong>This implementation rescales the 
125      * <code>expected</code> array if necessary to ensure that the sum of the
126      * expected and observed counts are equal.</p>
127      * 
128      * @param observed array of observed frequency counts
129      * @param expected array of expected frequency counts
130      * @param alpha significance level of the test
131      * @return true iff null hypothesis can be rejected with confidence
132      * 1 - alpha
133      * @throws IllegalArgumentException if preconditions are not met
134      * @throws MathException if an error occurs performing the test
135      */
136     public boolean chiSquareTest(double[] expected, long[] observed, 
137             double alpha) throws IllegalArgumentException, MathException {
138         if ((alpha <= 0) || (alpha > 0.5)) {
139             throw MathRuntimeException.createIllegalArgumentException(
140                   "out of bounds significance level {0}, must be between {1} and {2}",
141                   alpha, 0, 0.5);
142         }
143         return (chiSquareTest(expected, observed) < alpha);
144     }
145     
146     /**
147      * @param counts array representation of 2-way table
148      * @return chi-square test statistic
149      * @throws IllegalArgumentException if preconditions are not met
150      */
151     public double chiSquare(long[][] counts) throws IllegalArgumentException {
152         
153         checkArray(counts);
154         int nRows = counts.length;
155         int nCols = counts[0].length;
156         
157         // compute row, column and total sums
158         double[] rowSum = new double[nRows];
159         double[] colSum = new double[nCols];
160         double total = 0.0d;
161         for (int row = 0; row < nRows; row++) {
162             for (int col = 0; col < nCols; col++) {
163                 rowSum[row] += counts[row][col];
164                 colSum[col] += counts[row][col];
165                 total += counts[row][col];
166             }
167         }
168         
169         // compute expected counts and chi-square
170         double sumSq = 0.0d;
171         double expected = 0.0d;
172         for (int row = 0; row < nRows; row++) {
173             for (int col = 0; col < nCols; col++) {
174                 expected = (rowSum[row] * colSum[col]) / total;
175                 sumSq += ((counts[row][col] - expected) * 
176                         (counts[row][col] - expected)) / expected; 
177             }
178         } 
179         return sumSq;
180     }
181 
182     /**
183      * @param counts array representation of 2-way table
184      * @return p-value
185      * @throws IllegalArgumentException if preconditions are not met
186      * @throws MathException if an error occurs computing the p-value
187      */
188     public double chiSquareTest(long[][] counts)
189     throws IllegalArgumentException, MathException {
190         checkArray(counts);
191         double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
192         distribution.setDegreesOfFreedom(df);
193         return 1 - distribution.cumulativeProbability(chiSquare(counts));
194     }
195 
196     /**
197      * @param counts array representation of 2-way table
198      * @param alpha significance level of the test
199      * @return true iff null hypothesis can be rejected with confidence
200      * 1 - alpha
201      * @throws IllegalArgumentException if preconditions are not met
202      * @throws MathException if an error occurs performing the test
203      */
204     public boolean chiSquareTest(long[][] counts, double alpha)
205     throws IllegalArgumentException, MathException {
206         if ((alpha <= 0) || (alpha > 0.5)) {
207             throw MathRuntimeException.createIllegalArgumentException(
208                   "out of bounds significance level {0}, must be between {1} and {2}",
209                   alpha, 0.0, 0.5);
210         }
211         return (chiSquareTest(counts) < alpha);
212     }
213     
214     /**
215      * @param observed1 array of observed frequency counts of the first data set
216      * @param observed2 array of observed frequency counts of the second data set
217      * @return chi-square test statistic
218      * @throws IllegalArgumentException if preconditions are not met
219      * @since 1.2
220      */
221     public double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
222         throws IllegalArgumentException {
223         
224         // Make sure lengths are same
225         if (observed1.length < 2) {
226             throw MathRuntimeException.createIllegalArgumentException(
227                   "observed array length = {0}, must be at least 2",
228                   observed1.length);
229         }
230         if (observed1.length != observed2.length) {
231             throw MathRuntimeException.createIllegalArgumentException(
232                   "dimension mismatch {0} != {1}",
233                   observed1.length, observed2.length);
234         }
235 
236         // Ensure non-negative counts
237         checkNonNegative(observed1);
238         checkNonNegative(observed2);
239 
240         // Compute and compare count sums
241         long countSum1 = 0;
242         long countSum2 = 0;
243         boolean unequalCounts = false;
244         double weight = 0.0;
245         for (int i = 0; i < observed1.length; i++) {
246             countSum1 += observed1[i];
247             countSum2 += observed2[i];   
248         }
249         // Ensure neither sample is uniformly 0
250         if (countSum1 == 0) {
251             throw MathRuntimeException.createIllegalArgumentException(
252                   "observed counts are all 0 in first observed array"); 
253         }
254         if (countSum2 == 0) {
255             throw MathRuntimeException.createIllegalArgumentException(
256                   "observed counts are all 0 in second observed array"); 
257         }
258         // Compare and compute weight only if different
259         unequalCounts = (countSum1 != countSum2);
260         if (unequalCounts) {
261             weight = Math.sqrt((double) countSum1 / (double) countSum2);
262         }
263         // Compute ChiSquare statistic
264         double sumSq = 0.0d;
265         double dev = 0.0d;
266         double obs1 = 0.0d;
267         double obs2 = 0.0d;
268         for (int i = 0; i < observed1.length; i++) {
269             if (observed1[i] == 0 && observed2[i] == 0) {
270                 throw MathRuntimeException.createIllegalArgumentException(
271                       "observed counts are both zero for entry {0}", i);
272             } else {
273                 obs1 = observed1[i];
274                 obs2 = observed2[i];
275                 if (unequalCounts) { // apply weights
276                     dev = obs1/weight - obs2 * weight;
277                 } else {
278                     dev = obs1 - obs2;
279                 }
280                 sumSq += (dev * dev) / (obs1 + obs2);
281             }
282         }
283         return sumSq;
284     }
285 
286     /**
287      * @param observed1 array of observed frequency counts of the first data set
288      * @param observed2 array of observed frequency counts of the second data set
289      * @return p-value
290      * @throws IllegalArgumentException if preconditions are not met
291      * @throws MathException if an error occurs computing the p-value
292      * @since 1.2
293      */
294     public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
295         throws IllegalArgumentException, MathException {
296         distribution.setDegreesOfFreedom((double) observed1.length - 1);
297         return 1 - distribution.cumulativeProbability(
298                 chiSquareDataSetsComparison(observed1, observed2));
299     }
300 
301     /**
302      * @param observed1 array of observed frequency counts of the first data set
303      * @param observed2 array of observed frequency counts of the second data set
304      * @param alpha significance level of the test
305      * @return true iff null hypothesis can be rejected with confidence
306      * 1 - alpha
307      * @throws IllegalArgumentException if preconditions are not met
308      * @throws MathException if an error occurs performing the test
309      * @since 1.2
310      */
311     public boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
312             double alpha) throws IllegalArgumentException, MathException {
313         if ((alpha <= 0) || (alpha > 0.5)) {
314             throw MathRuntimeException.createIllegalArgumentException(
315                   "out of bounds significance level {0}, must be between {1} and {2}",
316                   alpha, 0.0, 0.5);
317         }
318         return (chiSquareTestDataSetsComparison(observed1, observed2) < alpha);
319     }
320 
321     /**
322      * Checks to make sure that the input long[][] array is rectangular,
323      * has at least 2 rows and 2 columns, and has all non-negative entries,
324      * throwing IllegalArgumentException if any of these checks fail.
325      * 
326      * @param in input 2-way table to check
327      * @throws IllegalArgumentException if the array is not valid
328      */
329     private void checkArray(long[][] in) throws IllegalArgumentException {
330         
331         if (in.length < 2) {
332             throw MathRuntimeException.createIllegalArgumentException(
333                   "invalid row dimension: {0} (must be at least 2)",
334                   in.length);
335         }
336         
337         if (in[0].length < 2) {
338             throw MathRuntimeException.createIllegalArgumentException(
339                   "invalid column dimension: {0} (must be at least 2)",
340                   in[0].length);
341         }    
342         
343         checkRectangular(in);
344         checkNonNegative(in);
345         
346     }
347     
348     //---------------------  Private array methods -- should find a utility home for these
349     
350     /**
351      * Throws IllegalArgumentException if the input array is not rectangular.
352      * 
353      * @param in array to be tested
354      * @throws NullPointerException if input array is null
355      * @throws IllegalArgumentException if input array is not rectangular
356      */
357     private void checkRectangular(long[][] in) {
358         for (int i = 1; i < in.length; i++) {
359             if (in[i].length != in[0].length) {
360                 throw MathRuntimeException.createIllegalArgumentException(
361                       "some rows have length {0} while others have length {1}",
362                       in[i].length, in[0].length);
363             }
364         }  
365     }
366     
367     /**
368      * Check all entries of the input array are > 0.
369      * 
370      * @param in array to be tested
371      * @exception IllegalArgumentException if one entry is not positive
372      */
373     private void checkPositive(double[] in) throws IllegalArgumentException {
374         for (int i = 0; i < in.length; i++) {
375             if (in[i] <= 0) {
376                 throw MathRuntimeException.createIllegalArgumentException(
377                       "element {0} is not positive: {1}",
378                       i, in[i]);
379             }
380         }
381     }
382     
383     /**
384      * Check all entries of the input array are >= 0.
385      * 
386      * @param in array to be tested
387      * @exception IllegalArgumentException if one entry is negative
388      */
389     private void checkNonNegative(long[] in) throws IllegalArgumentException {
390         for (int i = 0; i < in.length; i++) {
391             if (in[i] < 0) {
392                 throw MathRuntimeException.createIllegalArgumentException(
393                       "element {0} is negative: {1}",
394                       i, in[i]);
395             }
396         }
397     }
398     
399     /**
400      * Check all entries of the input array are >= 0.
401      * 
402      * @param in array to be tested
403      * @exception IllegalArgumentException if one entry is negative
404      */
405     private void checkNonNegative(long[][] in) throws IllegalArgumentException {
406         for (int i = 0; i < in.length; i ++) {
407             for (int j = 0; j < in[i].length; j++) {
408                 if (in[i][j] < 0) {
409                     throw MathRuntimeException.createIllegalArgumentException(
410                           "element ({0}, {1}) is negative: {2}",
411                           i, j, in[i][j]);
412                 }
413             }
414         }
415     }
416  
417     /**
418      * Modify the distribution used to compute inference statistics.
419      * 
420      * @param value
421      *            the new distribution
422      * @since 1.2
423      */
424     public void setDistribution(ChiSquaredDistribution value) {
425         distribution = value;
426     }
427 }