View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math.stat.descriptive;
19  
20  import java.io.Serializable;
21  import java.util.Collection;
22  import java.util.Iterator;
23  
24  /**
25   * <p>
26   * An aggregator for {@code SummaryStatistics} from several data sets or
27   * data set partitions.  In its simplest usage mode, the client creates an
28   * instance via the zero-argument constructor, then uses
29   * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
30   * for each individual data set / partition.  The per-set statistics objects
31   * are used as normal, and at any time the aggregate statistics for all the
32   * contributors can be obtained from this object.
33   * </p><p>
34   * Clients with specialized requirements can use alternative constructors to
35   * control the statistics implementations and initial values used by the
36   * contributing and the internal aggregate {@code SummaryStatistics} objects.
37   * </p><p>
38   * A static {@link #aggregate(Collection)} method is also included that computes
39   * aggregate statistics directly from a Collection of SummaryStatistics instances.
40   * </p><p>
41   * When {@link #createContributingStatistics()} is used to create SummaryStatistics
42   * instances to be aggregated concurrently, the created instances' 
43   * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
44   * instance maintained by this class.  In multithreaded environments, if the functionality
45   * provided by {@link #aggregate(Collection)} is adequate, that method should be used
46   * to avoid unecessary computation and synchronization delays.</p>
47   *
48   * @since 2.0
49   * @version $Revision: 799857 $ $Date: 2009-08-01 09:07:12 -0400 (Sat, 01 Aug 2009) $
50   * 
51   */
52  public class AggregateSummaryStatistics implements StatisticalSummary,
53          Serializable {
54  
55   
56      /** Serializable version identifier */
57      private static final long serialVersionUID = -8207112444016386906L;
58  
59      /**
60       * A SummaryStatistics serving as a prototype for creating SummaryStatistics
61       * contributing to this aggregate 
62       */
63      private final SummaryStatistics statisticsPrototype;
64      
65      /**
66       * The SummaryStatistics in which aggregate statistics are accumulated.
67       */
68      private final SummaryStatistics statistics;
69      
70      /**
71       * Initializes a new AggregateSummaryStatistics with default statistics
72       * implementations.
73       * 
74       */
75      public AggregateSummaryStatistics() {
76          this(new SummaryStatistics());
77      }
78      
79      /**
80       * Initializes a new AggregateSummaryStatistics with the specified statistics
81       * object as a prototype for contributing statistics and for the internal
82       * aggregate statistics.  This provides for customized statistics implementations
83       * to be used by contributing and aggregate statistics.
84       *
85       * @param prototypeStatistics a {@code SummaryStatistics} serving as a
86       *      prototype both for the internal aggregate statistics and for
87       *      contributing statistics obtained via the
88       *      {@code createContributingStatistics()} method.  Being a prototype
89       *      means that other objects are initialized by copying this object's state. 
90       *      If {@code null}, a new, default statistics object is used.  Any statistic
91       *      values in the prototype are propagated to contributing statistics
92       *      objects and (once) into these aggregate statistics.
93       * @see #createContributingStatistics()
94       */
95      public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) {
96          this(prototypeStatistics, (prototypeStatistics == null ? null :
97                  new SummaryStatistics(prototypeStatistics)));
98      }
99      
100     /**
101      * Initializes a new AggregateSummaryStatistics with the specified statistics
102      * object as a prototype for contributing statistics and for the internal
103      * aggregate statistics.  This provides for different statistics implementations
104      * to be used by contributing and aggregate statistics and for an initial
105      * state to be supplied for the aggregate statistics.
106      *
107      * @param prototypeStatistics a {@code SummaryStatistics} serving as a
108      *      prototype both for the internal aggregate statistics and for
109      *      contributing statistics obtained via the
110      *      {@code createContributingStatistics()} method.  Being a prototype
111      *      means that other objects are initialized by copying this object's state. 
112      *      If {@code null}, a new, default statistics object is used.  Any statistic
113      *      values in the prototype are propagated to contributing statistics
114      *      objects, but not into these aggregate statistics.
115      * @param initialStatistics a {@code SummaryStatistics} to serve as the
116      *      internal aggregate statistics object.  If {@code null}, a new, default
117      *      statistics object is used.
118      * @see #createContributingStatistics()
119      */
120     public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
121             SummaryStatistics initialStatistics) {
122         this.statisticsPrototype = ((prototypeStatistics == null) ?
123                 new SummaryStatistics() : prototypeStatistics);
124         this.statistics = ((initialStatistics == null) ?
125                 new SummaryStatistics() : initialStatistics);
126     }
127     
128     /**
129      * {@inheritDoc}.  This version returns the maximum over all the aggregated
130      * data.
131      *
132      * @see StatisticalSummary#getMax()
133      */
134     public double getMax() {
135         synchronized (statistics) {
136             return statistics.getMax();
137         }
138     }
139 
140     /**
141      * {@inheritDoc}.  This version returns the mean of all the aggregated data.
142      *
143      * @see StatisticalSummary#getMean()
144      */
145     public double getMean() {
146         synchronized (statistics) {
147             return statistics.getMean();
148         }
149     }
150 
151     /**
152      * {@inheritDoc}.  This version returns the minimum over all the aggregated
153      * data.
154      *
155      * @see StatisticalSummary#getMin()
156      */
157     public double getMin() {
158         synchronized (statistics) {
159             return statistics.getMin();
160         }
161     }
162 
163     /**
164      * {@inheritDoc}.  This version returns a count of all the aggregated data.
165      *
166      * @see StatisticalSummary#getN()
167      */
168     public long getN() {
169         synchronized (statistics) {
170             return statistics.getN();
171         }
172     }
173 
174     /**
175      * {@inheritDoc}.  This version returns the standard deviation of all the
176      * aggregated data.
177      *
178      * @see StatisticalSummary#getStandardDeviation()
179      */
180     public double getStandardDeviation() {
181         synchronized (statistics) {
182             return statistics.getStandardDeviation();
183         }
184     }
185 
186     /**
187      * {@inheritDoc}.  This version returns a sum of all the aggregated data.
188      *
189      * @see StatisticalSummary#getSum()
190      */
191     public double getSum() {
192         synchronized (statistics) {
193             return statistics.getSum();
194         }
195     }
196 
197     /**
198      * {@inheritDoc}.  This version returns the variance of all the aggregated
199      * data.
200      *
201      * @see StatisticalSummary#getVariance()
202      */
203     public double getVariance() {
204         synchronized (statistics) {
205             return statistics.getVariance();
206         }
207     }
208     
209     /**
210      * Returns the sum of the logs of all the aggregated data.
211      * 
212      * @return the sum of logs
213      * @see SummaryStatistics#getSumOfLogs()
214      */
215     public double getSumOfLogs() {
216         synchronized (statistics) {
217             return statistics.getSumOfLogs();
218         }
219     }
220     
221     /**
222      * Returns the geometric mean of all the aggregated data.
223      * 
224      * @return the geometric mean
225      * @see SummaryStatistics#getGeometricMean()
226      */
227     public double getGeometricMean() {
228         synchronized (statistics) {
229             return statistics.getGeometricMean();
230         }
231     }
232     
233     /**
234      * Returns the sum of the squares of all the aggregated data.
235      * 
236      * @return The sum of squares
237      * @see SummaryStatistics#getSumsq()
238      */
239     public double getSumsq() {
240         synchronized (statistics) {
241             return statistics.getSumsq();
242         }
243     }
244     
245     /**
246      * Returns a statistic related to the Second Central Moment.  Specifically,
247      * what is returned is the sum of squared deviations from the sample mean
248      * among the all of the aggregated data.
249      * 
250      * @return second central moment statistic
251      * @see SummaryStatistics#getSecondMoment()
252      */
253     public double getSecondMoment() {
254         synchronized (statistics) {
255             return statistics.getSecondMoment();
256         }
257     }
258     
259     /**
260      * Return a {@link StatisticalSummaryValues} instance reporting current
261      * aggregate statistics.
262      * 
263      * @return Current values of aggregate statistics
264      */
265     public StatisticalSummary getSummary() {
266         synchronized (statistics) {
267             return new StatisticalSummaryValues(getMean(), getVariance(), getN(), 
268                     getMax(), getMin(), getSum());
269         }
270     }
271 
272     /**
273      * Creates and returns a {@code SummaryStatistics} whose data will be
274      * aggregated with those of this {@code AggregateSummaryStatistics}. 
275      *
276      * @return a {@code SummaryStatistics} whose data will be aggregated with
277      *      those of this {@code AggregateSummaryStatistics}.  The initial state
278      *      is a copy of the configured prototype statistics.
279      */
280     public SummaryStatistics createContributingStatistics() {
281         SummaryStatistics contributingStatistics
282                 = new AggregatingSummaryStatistics(statistics);
283         
284         SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
285         
286         return contributingStatistics;
287     }
288     
289     /**
290      * Computes aggregate summary statistics. This method can be used to combine statistics
291      * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
292      * should contain the same values that would have been obtained by computing a single
293      * StatisticalSummary over the combined dataset.
294      * <p>
295      * Returns null if the collection is empty or null.
296      * </p>
297      * 
298      * @param statistics collection of SummaryStatistics to aggregate
299      * @return summary statistics for the combined dataset
300      */
301     public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) {
302         if (statistics == null) {
303             return null;
304         }
305         Iterator<SummaryStatistics> iterator = statistics.iterator();
306         if (!iterator.hasNext()) {
307             return null;
308         }
309         SummaryStatistics current = iterator.next();
310         long n = current.getN();
311         double min = current.getMin();
312         double sum = current.getSum();
313         double max = current.getMax();
314         double m2 = current.getSecondMoment();
315         double mean = current.getMean();
316         while (iterator.hasNext()) {
317             current = iterator.next();
318             if (current.getMin() < min || Double.isNaN(min)) {
319                 min = current.getMin();
320             }
321             if (current.getMax() > max || Double.isNaN(max)) {
322                 max = current.getMax();
323             }
324             sum += current.getSum();
325             final double oldN = n;
326             final double curN = current.getN();
327             n += curN;
328             final double meanDiff = current.getMean() - mean;
329             mean = sum / n;
330             m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n; 
331         }
332         final double variance;
333         if (n == 0) {
334             variance = Double.NaN;
335         } else if (n == 1) {
336             variance = 0d;
337         } else {
338             variance = m2 / (n - 1);
339         }
340         return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
341     }
342     
343     /**
344      * A SummaryStatistics that also forwards all values added to it to a second
345      * {@code SummaryStatistics} for aggregation.
346      *
347      * @since 2.0
348      */
349     private static class AggregatingSummaryStatistics extends SummaryStatistics {
350         
351         /**
352          * The serialization version of this class
353          */
354         private static final long serialVersionUID = 1L;
355         
356         /**
357          * An additional SummaryStatistics into which values added to these
358          * statistics (and possibly others) are aggregated
359          */
360         private final SummaryStatistics aggregateStatistics;
361         
362         /**
363          * Initializes a new AggregatingSummaryStatistics with the specified
364          * aggregate statistics object
365          *
366          * @param aggregateStatistics a {@code SummaryStatistics} into which
367          *      values added to this statistics object should be aggregated
368          */
369         public AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
370             this.aggregateStatistics = aggregateStatistics;
371         }
372 
373         /**
374          * {@inheritDoc}.  This version adds the provided value to the configured
375          * aggregate after adding it to these statistics.
376          *
377          * @see SummaryStatistics#addValue(double)
378          */
379         @Override
380         public void addValue(double value) {
381             super.addValue(value);
382             synchronized (aggregateStatistics) {
383                 aggregateStatistics.addValue(value);
384             }
385         }
386 
387         /**
388          * Returns true iff <code>object</code> is a
389          * <code>SummaryStatistics</code> instance and all statistics have the
390          * same values as this.
391          * @param object the object to test equality against.
392          * @return true if object equals this
393          */
394         @Override
395         public boolean equals(Object object) {
396             if (object == this) {
397                 return true;
398             }
399             if (object instanceof AggregatingSummaryStatistics == false) {
400                 return false;
401             }
402             AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
403             return (super.equals(stat) &&
404                     aggregateStatistics.equals(stat.aggregateStatistics));
405         }
406 
407         /**
408          * Returns hash code based on values of statistics
409          * @return hash code
410          */
411         @Override
412         public int hashCode() {
413             return 123 + super.hashCode() + aggregateStatistics.hashCode();
414         }
415     }
416 }