001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.math.stat.descriptive; 019 020 import junit.framework.Test; 021 import junit.framework.TestCase; 022 import junit.framework.TestSuite; 023 024 import java.util.Collection; 025 import java.util.ArrayList; 026 027 import org.apache.commons.math.random.RandomData; 028 import org.apache.commons.math.random.RandomDataImpl; 029 import org.apache.commons.math.TestUtils; 030 031 032 /** 033 * Test cases for {@link AggregateSummaryStatistics} 034 * 035 */ 036 public class AggregateSummaryStatisticsTest extends TestCase { 037 038 /** 039 * Creates and returns a {@code Test} representing all the test cases in this 040 * class 041 * 042 * @return a {@code Test} representing all the test cases in this class 043 */ 044 public static Test suite() { 045 TestSuite suite = new TestSuite(AggregateSummaryStatisticsTest.class); 046 suite.setName("AggregateSummaryStatistics tests"); 047 return suite; 048 } 049 050 /** 051 * Tests the standard aggregation behavior 052 */ 053 public void testAggregation() { 054 AggregateSummaryStatistics aggregate = new AggregateSummaryStatistics(); 055 SummaryStatistics setOneStats = aggregate.createContributingStatistics(); 056 SummaryStatistics setTwoStats = aggregate.createContributingStatistics(); 057 058 assertNotNull("The set one contributing stats are null", setOneStats); 059 assertNotNull("The set two contributing stats are null", setTwoStats); 060 assertNotSame("Contributing stats objects are the same", setOneStats, setTwoStats); 061 062 setOneStats.addValue(2); 063 setOneStats.addValue(3); 064 setOneStats.addValue(5); 065 setOneStats.addValue(7); 066 setOneStats.addValue(11); 067 assertEquals("Wrong number of set one values", 5, setOneStats.getN()); 068 assertEquals("Wrong sum of set one values", 28.0, setOneStats.getSum()); 069 070 setTwoStats.addValue(2); 071 setTwoStats.addValue(4); 072 setTwoStats.addValue(8); 073 assertEquals("Wrong number of set two values", 3, setTwoStats.getN()); 074 assertEquals("Wrong sum of set two values", 14.0, setTwoStats.getSum()); 075 076 assertEquals("Wrong number of aggregate values", 8, aggregate.getN()); 077 assertEquals("Wrong aggregate sum", 42.0, aggregate.getSum()); 078 } 079 080 /** 081 * Verify that aggregating over a partition gives the same results 082 * as direct computation. 083 * 084 * 1) Randomly generate a dataset of 10-100 values 085 * from [-100, 100] 086 * 2) Divide the dataset it into 2-5 partitions 087 * 3) Create an AggregateSummaryStatistic and ContributingStatistics 088 * for each partition 089 * 4) Compare results from the AggregateSummaryStatistic with values 090 * returned by a single SummaryStatistics instance that is provided 091 * the full dataset 092 */ 093 public void testAggregationConsistency() throws Exception { 094 095 // Generate a random sample and random partition 096 double[] totalSample = generateSample(); 097 double[][] subSamples = generatePartition(totalSample); 098 int nSamples = subSamples.length; 099 100 // Create aggregator and total stats for comparison 101 AggregateSummaryStatistics aggregate = new AggregateSummaryStatistics(); 102 SummaryStatistics totalStats = new SummaryStatistics(); 103 104 // Create array of component stats 105 SummaryStatistics componentStats[] = new SummaryStatistics[nSamples]; 106 107 for (int i = 0; i < nSamples; i++) { 108 109 // Make componentStats[i] a contributing statistic to aggregate 110 componentStats[i] = aggregate.createContributingStatistics(); 111 112 // Add values from subsample 113 for (int j = 0; j < subSamples[i].length; j++) { 114 componentStats[i].addValue(subSamples[i][j]); 115 } 116 } 117 118 // Compute totalStats directly 119 for (int i = 0; i < totalSample.length; i++) { 120 totalStats.addValue(totalSample[i]); 121 } 122 123 /* 124 * Compare statistics in totalStats with aggregate. 125 * Note that guaranteed success of this comparison depends on the 126 * fact that <aggregate> gets values in exactly the same order 127 * as <totalStats>. 128 * 129 */ 130 assertEquals(totalStats.getSummary(), aggregate.getSummary()); 131 132 } 133 134 /** 135 * Test aggregate function by randomly generating a dataset of 10-100 values 136 * from [-100, 100], dividing it into 2-5 partitions, computing stats for each 137 * partition and comparing the result of aggregate(...) applied to the collection 138 * of per-partition SummaryStatistics with a single SummaryStatistics computed 139 * over the full sample. 140 * 141 * @throws Exception 142 */ 143 public void testAggregate() throws Exception { 144 145 // Generate a random sample and random partition 146 double[] totalSample = generateSample(); 147 double[][] subSamples = generatePartition(totalSample); 148 int nSamples = subSamples.length; 149 150 // Compute combined stats directly 151 SummaryStatistics totalStats = new SummaryStatistics(); 152 for (int i = 0; i < totalSample.length; i++) { 153 totalStats.addValue(totalSample[i]); 154 } 155 156 // Now compute subsample stats individually and aggregate 157 SummaryStatistics[] subSampleStats = new SummaryStatistics[nSamples]; 158 for (int i = 0; i < nSamples; i++) { 159 subSampleStats[i] = new SummaryStatistics(); 160 } 161 Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>(); 162 for (int i = 0; i < nSamples; i++) { 163 for (int j = 0; j < subSamples[i].length; j++) { 164 subSampleStats[i].addValue(subSamples[i][j]); 165 } 166 aggregate.add(subSampleStats[i]); 167 } 168 169 // Compare values 170 StatisticalSummary aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate); 171 assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12); 172 } 173 174 175 public void testAggregateDegenerate() throws Exception { 176 double[] totalSample = {1, 2, 3, 4, 5}; 177 double[][] subSamples = {{1}, {2}, {3}, {4}, {5}}; 178 179 // Compute combined stats directly 180 SummaryStatistics totalStats = new SummaryStatistics(); 181 for (int i = 0; i < totalSample.length; i++) { 182 totalStats.addValue(totalSample[i]); 183 } 184 185 // Now compute subsample stats individually and aggregate 186 SummaryStatistics[] subSampleStats = new SummaryStatistics[5]; 187 for (int i = 0; i < 5; i++) { 188 subSampleStats[i] = new SummaryStatistics(); 189 } 190 Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>(); 191 for (int i = 0; i < 5; i++) { 192 for (int j = 0; j < subSamples[i].length; j++) { 193 subSampleStats[i].addValue(subSamples[i][j]); 194 } 195 aggregate.add(subSampleStats[i]); 196 } 197 198 // Compare values 199 StatisticalSummaryValues aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate); 200 assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12); 201 } 202 203 public void testAggregateSpecialValues() throws Exception { 204 double[] totalSample = {Double.POSITIVE_INFINITY, 2, 3, Double.NaN, 5}; 205 double[][] subSamples = {{Double.POSITIVE_INFINITY, 2}, {3}, {Double.NaN}, {5}}; 206 207 // Compute combined stats directly 208 SummaryStatistics totalStats = new SummaryStatistics(); 209 for (int i = 0; i < totalSample.length; i++) { 210 totalStats.addValue(totalSample[i]); 211 } 212 213 // Now compute subsample stats individually and aggregate 214 SummaryStatistics[] subSampleStats = new SummaryStatistics[5]; 215 for (int i = 0; i < 4; i++) { 216 subSampleStats[i] = new SummaryStatistics(); 217 } 218 Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>(); 219 for (int i = 0; i < 4; i++) { 220 for (int j = 0; j < subSamples[i].length; j++) { 221 subSampleStats[i].addValue(subSamples[i][j]); 222 } 223 aggregate.add(subSampleStats[i]); 224 } 225 226 // Compare values 227 StatisticalSummaryValues aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate); 228 assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12); 229 230 } 231 232 /** 233 * Verifies that a StatisticalSummary and a StatisticalSummaryValues are equal up 234 * to delta, with NaNs, infinities returned in the same spots. For max, min, n, values 235 * have to agree exactly, delta is used only for sum, mean, variance, std dev. 236 */ 237 protected static void assertEquals(StatisticalSummary expected, StatisticalSummary observed, double delta) { 238 TestUtils.assertEquals(expected.getMax(), observed.getMax(), 0); 239 TestUtils.assertEquals(expected.getMin(), observed.getMin(), 0); 240 assertEquals(expected.getN(), observed.getN()); 241 TestUtils.assertEquals(expected.getSum(), observed.getSum(), delta); 242 TestUtils.assertEquals(expected.getMean(), observed.getMean(), delta); 243 TestUtils.assertEquals(expected.getStandardDeviation(), observed.getStandardDeviation(), delta); 244 TestUtils.assertEquals(expected.getVariance(), observed.getVariance(), delta); 245 } 246 247 248 /** 249 * Generates a random sample of double values. 250 * Sample size is random, between 10 and 100 and values are 251 * uniformly distributed over [-100, 100]. 252 * 253 * @return array of random double values 254 */ 255 private double[] generateSample() { 256 final RandomData randomData = new RandomDataImpl(); 257 final int sampleSize = randomData.nextInt(10,100); 258 double[] out = new double[sampleSize]; 259 for (int i = 0; i < out.length; i++) { 260 out[i] = randomData.nextUniform(-100, 100); 261 } 262 return out; 263 } 264 265 /** 266 * Generates a partition of <sample> into up to 5 sequentially selected 267 * subsamples with randomly selected partition points. 268 * 269 * @param sample array to partition 270 * @return rectangular array with rows = subsamples 271 */ 272 private double[][] generatePartition(double[] sample) { 273 final int length = sample.length; 274 final double[][] out = new double[5][]; 275 final RandomData randomData = new RandomDataImpl(); 276 int cur = 0; 277 int offset = 0; 278 int sampleCount = 0; 279 for (int i = 0; i < 5; i++) { 280 if (cur == length || offset == length) { 281 break; 282 } 283 final int next = (i == 4 || cur == length - 1) ? length - 1 : randomData.nextInt(cur, length - 1); 284 final int subLength = next - cur + 1; 285 out[i] = new double[subLength]; 286 System.arraycopy(sample, offset, out[i], 0, subLength); 287 cur = next + 1; 288 sampleCount++; 289 offset += subLength; 290 } 291 if (sampleCount < 5) { 292 double[][] out2 = new double[sampleCount][]; 293 for (int j = 0; j < sampleCount; j++) { 294 final int curSize = out[j].length; 295 out2[j] = new double[curSize]; 296 System.arraycopy(out[j], 0, out2[j], 0, curSize); 297 } 298 return out2; 299 } else { 300 return out; 301 } 302 } 303 304 }