1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math.stat.inference; 18 19 import java.util.Collection; 20 21 import org.apache.commons.math.MathException; 22 import org.apache.commons.math.MathRuntimeException; 23 import org.apache.commons.math.distribution.FDistribution; 24 import org.apache.commons.math.distribution.FDistributionImpl; 25 import org.apache.commons.math.stat.descriptive.summary.Sum; 26 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 27 28 29 /** 30 * Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl} 31 * interface. 32 * 33 * <p>Uses the 34 * {@link org.apache.commons.math.distribution.FDistribution 35 * commons-math F Distribution implementation} to estimate exact p-values.</p> 36 * 37 * <p>This implementation is based on a description at 38 * http://faculty.vassar.edu/lowry/ch13pt1.html</p> 39 * <pre> 40 * Abbreviations: bg = between groups, 41 * wg = within groups, 42 * ss = sum squared deviations 43 * </pre> 44 * 45 * @since 1.2 46 * @version $Revision: 773189 $ $Date: 2009-05-09 05:57:04 -0400 (Sat, 09 May 2009) $ 47 */ 48 public class OneWayAnovaImpl implements OneWayAnova { 49 50 /** 51 * Default constructor. 52 */ 53 public OneWayAnovaImpl() { 54 } 55 56 /** 57 * {@inheritDoc}<p> 58 * This implementation computes the F statistic using the definitional 59 * formula<pre> 60 * F = msbg/mswg</pre> 61 * where<pre> 62 * msbg = between group mean square 63 * mswg = within group mean square</pre> 64 * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html"> 65 * here</a></p> 66 */ 67 public double anovaFValue(Collection<double[]> categoryData) 68 throws IllegalArgumentException, MathException { 69 AnovaStats a = anovaStats(categoryData); 70 return a.F; 71 } 72 73 /** 74 * {@inheritDoc}<p> 75 * This implementation uses the 76 * {@link org.apache.commons.math.distribution.FDistribution 77 * commons-math F Distribution implementation} to estimate the exact 78 * p-value, using the formula<pre> 79 * p = 1 - cumulativeProbability(F)</pre> 80 * where <code>F</code> is the F value and <code>cumulativeProbability</code> 81 * is the commons-math implementation of the F distribution.</p> 82 */ 83 public double anovaPValue(Collection<double[]> categoryData) 84 throws IllegalArgumentException, MathException { 85 AnovaStats a = anovaStats(categoryData); 86 FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg); 87 return 1.0 - fdist.cumulativeProbability(a.F); 88 } 89 90 /** 91 * {@inheritDoc}<p> 92 * This implementation uses the 93 * {@link org.apache.commons.math.distribution.FDistribution 94 * commons-math F Distribution implementation} to estimate the exact 95 * p-value, using the formula<pre> 96 * p = 1 - cumulativeProbability(F)</pre> 97 * where <code>F</code> is the F value and <code>cumulativeProbability</code> 98 * is the commons-math implementation of the F distribution.</p> 99 * <p>True is returned iff the estimated p-value is less than alpha.</p> 100 */ 101 public boolean anovaTest(Collection<double[]> categoryData, double alpha) 102 throws IllegalArgumentException, MathException { 103 if ((alpha <= 0) || (alpha > 0.5)) { 104 throw MathRuntimeException.createIllegalArgumentException( 105 "out of bounds significance level {0}, must be between {1} and {2}", 106 alpha, 0, 0.5); 107 } 108 return (anovaPValue(categoryData) < alpha); 109 } 110 111 112 /** 113 * This method actually does the calculations (except P-value). 114 * 115 * @param categoryData <code>Collection</code> of <code>double[]</code> 116 * arrays each containing data for one category 117 * @return computed AnovaStats 118 * @throws IllegalArgumentException if categoryData does not meet 119 * preconditions specified in the interface definition 120 * @throws MathException if an error occurs computing the Anova stats 121 */ 122 private AnovaStats anovaStats(Collection<double[]> categoryData) 123 throws IllegalArgumentException, MathException { 124 125 // check if we have enough categories 126 if (categoryData.size() < 2) { 127 throw MathRuntimeException.createIllegalArgumentException( 128 "two or more categories required, got {0}", 129 categoryData.size()); 130 } 131 132 // check if each category has enough data and all is double[] 133 for (double[] array : categoryData) { 134 if (array.length <= 1) { 135 throw MathRuntimeException.createIllegalArgumentException( 136 "two or more values required in each category, one has {0}", 137 array.length); 138 } 139 } 140 141 int dfwg = 0; 142 double sswg = 0; 143 Sum totsum = new Sum(); 144 SumOfSquares totsumsq = new SumOfSquares(); 145 int totnum = 0; 146 147 for (double[] data : categoryData) { 148 149 Sum sum = new Sum(); 150 SumOfSquares sumsq = new SumOfSquares(); 151 int num = 0; 152 153 for (int i = 0; i < data.length; i++) { 154 double val = data[i]; 155 156 // within category 157 num++; 158 sum.increment(val); 159 sumsq.increment(val); 160 161 // for all categories 162 totnum++; 163 totsum.increment(val); 164 totsumsq.increment(val); 165 } 166 dfwg += num - 1; 167 double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num; 168 sswg += ss; 169 } 170 double sst = totsumsq.getResult() - totsum.getResult() * 171 totsum.getResult()/totnum; 172 double ssbg = sst - sswg; 173 int dfbg = categoryData.size() - 1; 174 double msbg = ssbg/dfbg; 175 double mswg = sswg/dfwg; 176 double F = msbg/mswg; 177 178 return new AnovaStats(dfbg, dfwg, F); 179 } 180 181 /** 182 Convenience class to pass dfbg,dfwg,F values around within AnovaImpl. 183 No get/set methods provided. 184 */ 185 private static class AnovaStats { 186 187 /** Degrees of freedom in numerator (between groups). */ 188 private int dfbg; 189 190 /** Degrees of freedom in denominator (within groups). */ 191 private int dfwg; 192 193 /** Statistic. */ 194 private double F; 195 196 /** 197 * Constructor 198 * @param dfbg degrees of freedom in numerator (between groups) 199 * @param dfwg degrees of freedom in denominator (within groups) 200 * @param F statistic 201 */ 202 AnovaStats(int dfbg, int dfwg, double F) { 203 this.dfbg = dfbg; 204 this.dfwg = dfwg; 205 this.F = F; 206 } 207 } 208 209 }