001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.stat.descriptive;
019    
020    import java.io.Serializable;
021    import java.util.Collection;
022    import java.util.Iterator;
023    
024    /**
025     * <p>
026     * An aggregator for {@code SummaryStatistics} from several data sets or
027     * data set partitions.  In its simplest usage mode, the client creates an
028     * instance via the zero-argument constructor, then uses
029     * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
030     * for each individual data set / partition.  The per-set statistics objects
031     * are used as normal, and at any time the aggregate statistics for all the
032     * contributors can be obtained from this object.
033     * </p><p>
034     * Clients with specialized requirements can use alternative constructors to
035     * control the statistics implementations and initial values used by the
036     * contributing and the internal aggregate {@code SummaryStatistics} objects.
037     * </p><p>
038     * A static {@link #aggregate(Collection)} method is also included that computes
039     * aggregate statistics directly from a Collection of SummaryStatistics instances.
040     * </p><p>
041     * When {@link #createContributingStatistics()} is used to create SummaryStatistics
042     * instances to be aggregated concurrently, the created instances' 
043     * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
044     * instance maintained by this class.  In multithreaded environments, if the functionality
045     * provided by {@link #aggregate(Collection)} is adequate, that method should be used
046     * to avoid unecessary computation and synchronization delays.</p>
047     *
048     * @since 2.0
049     * @version $Revision: 799857 $ $Date: 2009-08-01 09:07:12 -0400 (Sat, 01 Aug 2009) $
050     * 
051     */
052    public class AggregateSummaryStatistics implements StatisticalSummary,
053            Serializable {
054    
055     
056        /** Serializable version identifier */
057        private static final long serialVersionUID = -8207112444016386906L;
058    
059        /**
060         * A SummaryStatistics serving as a prototype for creating SummaryStatistics
061         * contributing to this aggregate 
062         */
063        private final SummaryStatistics statisticsPrototype;
064        
065        /**
066         * The SummaryStatistics in which aggregate statistics are accumulated.
067         */
068        private final SummaryStatistics statistics;
069        
070        /**
071         * Initializes a new AggregateSummaryStatistics with default statistics
072         * implementations.
073         * 
074         */
075        public AggregateSummaryStatistics() {
076            this(new SummaryStatistics());
077        }
078        
079        /**
080         * Initializes a new AggregateSummaryStatistics with the specified statistics
081         * object as a prototype for contributing statistics and for the internal
082         * aggregate statistics.  This provides for customized statistics implementations
083         * to be used by contributing and aggregate statistics.
084         *
085         * @param prototypeStatistics a {@code SummaryStatistics} serving as a
086         *      prototype both for the internal aggregate statistics and for
087         *      contributing statistics obtained via the
088         *      {@code createContributingStatistics()} method.  Being a prototype
089         *      means that other objects are initialized by copying this object's state. 
090         *      If {@code null}, a new, default statistics object is used.  Any statistic
091         *      values in the prototype are propagated to contributing statistics
092         *      objects and (once) into these aggregate statistics.
093         * @see #createContributingStatistics()
094         */
095        public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) {
096            this(prototypeStatistics, (prototypeStatistics == null ? null :
097                    new SummaryStatistics(prototypeStatistics)));
098        }
099        
100        /**
101         * Initializes a new AggregateSummaryStatistics with the specified statistics
102         * object as a prototype for contributing statistics and for the internal
103         * aggregate statistics.  This provides for different statistics implementations
104         * to be used by contributing and aggregate statistics and for an initial
105         * state to be supplied for the aggregate statistics.
106         *
107         * @param prototypeStatistics a {@code SummaryStatistics} serving as a
108         *      prototype both for the internal aggregate statistics and for
109         *      contributing statistics obtained via the
110         *      {@code createContributingStatistics()} method.  Being a prototype
111         *      means that other objects are initialized by copying this object's state. 
112         *      If {@code null}, a new, default statistics object is used.  Any statistic
113         *      values in the prototype are propagated to contributing statistics
114         *      objects, but not into these aggregate statistics.
115         * @param initialStatistics a {@code SummaryStatistics} to serve as the
116         *      internal aggregate statistics object.  If {@code null}, a new, default
117         *      statistics object is used.
118         * @see #createContributingStatistics()
119         */
120        public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
121                SummaryStatistics initialStatistics) {
122            this.statisticsPrototype = ((prototypeStatistics == null) ?
123                    new SummaryStatistics() : prototypeStatistics);
124            this.statistics = ((initialStatistics == null) ?
125                    new SummaryStatistics() : initialStatistics);
126        }
127        
128        /**
129         * {@inheritDoc}.  This version returns the maximum over all the aggregated
130         * data.
131         *
132         * @see StatisticalSummary#getMax()
133         */
134        public double getMax() {
135            synchronized (statistics) {
136                return statistics.getMax();
137            }
138        }
139    
140        /**
141         * {@inheritDoc}.  This version returns the mean of all the aggregated data.
142         *
143         * @see StatisticalSummary#getMean()
144         */
145        public double getMean() {
146            synchronized (statistics) {
147                return statistics.getMean();
148            }
149        }
150    
151        /**
152         * {@inheritDoc}.  This version returns the minimum over all the aggregated
153         * data.
154         *
155         * @see StatisticalSummary#getMin()
156         */
157        public double getMin() {
158            synchronized (statistics) {
159                return statistics.getMin();
160            }
161        }
162    
163        /**
164         * {@inheritDoc}.  This version returns a count of all the aggregated data.
165         *
166         * @see StatisticalSummary#getN()
167         */
168        public long getN() {
169            synchronized (statistics) {
170                return statistics.getN();
171            }
172        }
173    
174        /**
175         * {@inheritDoc}.  This version returns the standard deviation of all the
176         * aggregated data.
177         *
178         * @see StatisticalSummary#getStandardDeviation()
179         */
180        public double getStandardDeviation() {
181            synchronized (statistics) {
182                return statistics.getStandardDeviation();
183            }
184        }
185    
186        /**
187         * {@inheritDoc}.  This version returns a sum of all the aggregated data.
188         *
189         * @see StatisticalSummary#getSum()
190         */
191        public double getSum() {
192            synchronized (statistics) {
193                return statistics.getSum();
194            }
195        }
196    
197        /**
198         * {@inheritDoc}.  This version returns the variance of all the aggregated
199         * data.
200         *
201         * @see StatisticalSummary#getVariance()
202         */
203        public double getVariance() {
204            synchronized (statistics) {
205                return statistics.getVariance();
206            }
207        }
208        
209        /**
210         * Returns the sum of the logs of all the aggregated data.
211         * 
212         * @return the sum of logs
213         * @see SummaryStatistics#getSumOfLogs()
214         */
215        public double getSumOfLogs() {
216            synchronized (statistics) {
217                return statistics.getSumOfLogs();
218            }
219        }
220        
221        /**
222         * Returns the geometric mean of all the aggregated data.
223         * 
224         * @return the geometric mean
225         * @see SummaryStatistics#getGeometricMean()
226         */
227        public double getGeometricMean() {
228            synchronized (statistics) {
229                return statistics.getGeometricMean();
230            }
231        }
232        
233        /**
234         * Returns the sum of the squares of all the aggregated data.
235         * 
236         * @return The sum of squares
237         * @see SummaryStatistics#getSumsq()
238         */
239        public double getSumsq() {
240            synchronized (statistics) {
241                return statistics.getSumsq();
242            }
243        }
244        
245        /**
246         * Returns a statistic related to the Second Central Moment.  Specifically,
247         * what is returned is the sum of squared deviations from the sample mean
248         * among the all of the aggregated data.
249         * 
250         * @return second central moment statistic
251         * @see SummaryStatistics#getSecondMoment()
252         */
253        public double getSecondMoment() {
254            synchronized (statistics) {
255                return statistics.getSecondMoment();
256            }
257        }
258        
259        /**
260         * Return a {@link StatisticalSummaryValues} instance reporting current
261         * aggregate statistics.
262         * 
263         * @return Current values of aggregate statistics
264         */
265        public StatisticalSummary getSummary() {
266            synchronized (statistics) {
267                return new StatisticalSummaryValues(getMean(), getVariance(), getN(), 
268                        getMax(), getMin(), getSum());
269            }
270        }
271    
272        /**
273         * Creates and returns a {@code SummaryStatistics} whose data will be
274         * aggregated with those of this {@code AggregateSummaryStatistics}. 
275         *
276         * @return a {@code SummaryStatistics} whose data will be aggregated with
277         *      those of this {@code AggregateSummaryStatistics}.  The initial state
278         *      is a copy of the configured prototype statistics.
279         */
280        public SummaryStatistics createContributingStatistics() {
281            SummaryStatistics contributingStatistics
282                    = new AggregatingSummaryStatistics(statistics);
283            
284            SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
285            
286            return contributingStatistics;
287        }
288        
289        /**
290         * Computes aggregate summary statistics. This method can be used to combine statistics
291         * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
292         * should contain the same values that would have been obtained by computing a single
293         * StatisticalSummary over the combined dataset.
294         * <p>
295         * Returns null if the collection is empty or null.
296         * </p>
297         * 
298         * @param statistics collection of SummaryStatistics to aggregate
299         * @return summary statistics for the combined dataset
300         */
301        public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) {
302            if (statistics == null) {
303                return null;
304            }
305            Iterator<SummaryStatistics> iterator = statistics.iterator();
306            if (!iterator.hasNext()) {
307                return null;
308            }
309            SummaryStatistics current = iterator.next();
310            long n = current.getN();
311            double min = current.getMin();
312            double sum = current.getSum();
313            double max = current.getMax();
314            double m2 = current.getSecondMoment();
315            double mean = current.getMean();
316            while (iterator.hasNext()) {
317                current = iterator.next();
318                if (current.getMin() < min || Double.isNaN(min)) {
319                    min = current.getMin();
320                }
321                if (current.getMax() > max || Double.isNaN(max)) {
322                    max = current.getMax();
323                }
324                sum += current.getSum();
325                final double oldN = n;
326                final double curN = current.getN();
327                n += curN;
328                final double meanDiff = current.getMean() - mean;
329                mean = sum / n;
330                m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n; 
331            }
332            final double variance;
333            if (n == 0) {
334                variance = Double.NaN;
335            } else if (n == 1) {
336                variance = 0d;
337            } else {
338                variance = m2 / (n - 1);
339            }
340            return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
341        }
342        
343        /**
344         * A SummaryStatistics that also forwards all values added to it to a second
345         * {@code SummaryStatistics} for aggregation.
346         *
347         * @since 2.0
348         */
349        private static class AggregatingSummaryStatistics extends SummaryStatistics {
350            
351            /**
352             * The serialization version of this class
353             */
354            private static final long serialVersionUID = 1L;
355            
356            /**
357             * An additional SummaryStatistics into which values added to these
358             * statistics (and possibly others) are aggregated
359             */
360            private final SummaryStatistics aggregateStatistics;
361            
362            /**
363             * Initializes a new AggregatingSummaryStatistics with the specified
364             * aggregate statistics object
365             *
366             * @param aggregateStatistics a {@code SummaryStatistics} into which
367             *      values added to this statistics object should be aggregated
368             */
369            public AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
370                this.aggregateStatistics = aggregateStatistics;
371            }
372    
373            /**
374             * {@inheritDoc}.  This version adds the provided value to the configured
375             * aggregate after adding it to these statistics.
376             *
377             * @see SummaryStatistics#addValue(double)
378             */
379            @Override
380            public void addValue(double value) {
381                super.addValue(value);
382                synchronized (aggregateStatistics) {
383                    aggregateStatistics.addValue(value);
384                }
385            }
386    
387            /**
388             * Returns true iff <code>object</code> is a
389             * <code>SummaryStatistics</code> instance and all statistics have the
390             * same values as this.
391             * @param object the object to test equality against.
392             * @return true if object equals this
393             */
394            @Override
395            public boolean equals(Object object) {
396                if (object == this) {
397                    return true;
398                }
399                if (object instanceof AggregatingSummaryStatistics == false) {
400                    return false;
401                }
402                AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
403                return (super.equals(stat) &&
404                        aggregateStatistics.equals(stat.aggregateStatistics));
405            }
406    
407            /**
408             * Returns hash code based on values of statistics
409             * @return hash code
410             */
411            @Override
412            public int hashCode() {
413                return 123 + super.hashCode() + aggregateStatistics.hashCode();
414            }
415        }
416    }