001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.descriptive.moment;
018    
019    import java.io.Serializable;
020    
021    import org.apache.commons.math.MathRuntimeException;
022    import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
023    
024    /**
025     * Computes the variance of the available values.  By default, the unbiased
026     * "sample variance" definitional formula is used: 
027     * <p>
028     * variance = sum((x_i - mean)^2) / (n - 1) </p>
029     * <p>
030     * where mean is the {@link Mean} and <code>n</code> is the number
031     * of sample observations.</p>
032     * <p>
033     * The definitional formula does not have good numerical properties, so
034     * this implementation does not compute the statistic using the definitional
035     * formula. <ul>
036     * <li> The <code>getResult</code> method computes the variance using 
037     * updating formulas based on West's algorithm, as described in
038     * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and
039     * J. G. Lewis 1979, <i>Communications of the ACM</i>,
040     * vol. 22 no. 9, pp. 526-531.</a></li>
041     * <li> The <code>evaluate</code> methods leverage the fact that they have the
042     * full array of values in memory to execute a two-pass algorithm. 
043     * Specifically, these methods use the "corrected two-pass algorithm" from
044     * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>,
045     * American Statistician, August 1983.</li></ul>
046     * Note that adding values using <code>increment</code> or 
047     * <code>incrementAll</code> and then executing <code>getResult</code> will
048     * sometimes give a different, less accurate, result than executing 
049     * <code>evaluate</code> with the full array of values. The former approach
050     * should only be used when the full array of values is not available.</p>
051     * <p>
052     * The "population variance"  ( sum((x_i - mean)^2) / n ) can also
053     * be computed using this statistic.  The <code>isBiasCorrected</code>
054     * property determines whether the "population" or "sample" value is
055     * returned by the <code>evaluate</code> and <code>getResult</code> methods.
056     * To compute population variances, set this property to <code>false.</code>
057     * </p>
058     * <p>
059     * <strong>Note that this implementation is not synchronized.</strong> If 
060     * multiple threads access an instance of this class concurrently, and at least
061     * one of the threads invokes the <code>increment()</code> or 
062     * <code>clear()</code> method, it must be synchronized externally.</p>
063     * 
064     * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $
065     */
066    public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable {
067    
068        /** Serializable version identifier */
069        private static final long serialVersionUID = -9111962718267217978L;  
070          
071        /** SecondMoment is used in incremental calculation of Variance*/
072        protected SecondMoment moment = null;
073    
074        /**
075         * Boolean test to determine if this Variance should also increment
076         * the second moment, this evaluates to false when this Variance is
077         * constructed with an external SecondMoment as a parameter.
078         */
079        protected boolean incMoment = true;
080        
081        /**
082         * Determines whether or not bias correction is applied when computing the
083         * value of the statisic.  True means that bias is corrected.  See 
084         * {@link Variance} for details on the formula.
085         */
086        private boolean isBiasCorrected = true;
087    
088        /**
089         * Constructs a Variance with default (true) <code>isBiasCorrected</code>
090         * property.
091         */
092        public Variance() {
093            moment = new SecondMoment();
094        }
095    
096        /**
097         * Constructs a Variance based on an external second moment.
098         * 
099         * @param m2 the SecondMoment (Third or Fourth moments work
100         * here as well.)
101         */
102        public Variance(final SecondMoment m2) {
103            incMoment = false;
104            this.moment = m2;
105        }
106        
107        /**
108         * Constructs a Variance with the specified <code>isBiasCorrected</code>
109         * property
110         * 
111         * @param isBiasCorrected  setting for bias correction - true means
112         * bias will be corrected and is equivalent to using the argumentless
113         * constructor
114         */
115        public Variance(boolean isBiasCorrected) {
116            moment = new SecondMoment();
117            this.isBiasCorrected = isBiasCorrected;
118        }
119        
120        /**
121         * Constructs a Variance with the specified <code>isBiasCorrected</code>
122         * property and the supplied external second moment.
123         * 
124         * @param isBiasCorrected  setting for bias correction - true means
125         * bias will be corrected
126         * @param m2 the SecondMoment (Third or Fourth moments work
127         * here as well.)
128         */
129        public Variance(boolean isBiasCorrected, SecondMoment m2) {
130            incMoment = false;
131            this.moment = m2;
132            this.isBiasCorrected = isBiasCorrected;      
133        }
134       
135        /**
136         * Copy constructor, creates a new {@code Variance} identical
137         * to the {@code original}
138         * 
139         * @param original the {@code Variance} instance to copy
140         */
141        public Variance(Variance original) {
142            copy(original, this);
143        }           
144        
145        /**
146         * {@inheritDoc}  
147         * <p>If all values are available, it is more accurate to use 
148         * {@link #evaluate(double[])} rather than adding values one at a time
149         * using this method and then executing {@link #getResult}, since
150         * <code>evaluate</code> leverages the fact that is has the full 
151         * list of values together to execute a two-pass algorithm.  
152         * See {@link Variance}.</p>
153         */
154        @Override
155        public void increment(final double d) {
156            if (incMoment) {
157                moment.increment(d);
158            }
159        }
160    
161        /**
162         * {@inheritDoc}
163         */
164        @Override
165        public double getResult() {
166                if (moment.n == 0) {
167                    return Double.NaN;
168                } else if (moment.n == 1) {
169                    return 0d;
170                } else {
171                    if (isBiasCorrected) {
172                        return moment.m2 / (moment.n - 1d);
173                    } else {
174                        return moment.m2 / (moment.n);
175                    }
176                }
177        }
178    
179        /**
180         * {@inheritDoc}
181         */
182        public long getN() {
183            return moment.getN();
184        }
185        
186        /**
187         * {@inheritDoc}
188         */
189        @Override
190        public void clear() {
191            if (incMoment) {
192                moment.clear();
193            }
194        }
195        
196        /**
197         * Returns the variance of the entries in the input array, or 
198         * <code>Double.NaN</code> if the array is empty.
199         * <p>
200         * See {@link Variance} for details on the computing algorithm.</p>
201         * <p>
202         * Returns 0 for a single-value (i.e. length = 1) sample.</p>
203         * <p>
204         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
205         * <p>
206         * Does not change the internal state of the statistic.</p>
207         * 
208         * @param values the input array
209         * @return the variance of the values or Double.NaN if length = 0
210         * @throws IllegalArgumentException if the array is null
211         */
212        @Override
213        public double evaluate(final double[] values) {
214            if (values == null) {
215                throw MathRuntimeException.createIllegalArgumentException("input values array is null");
216            }
217            return evaluate(values, 0, values.length);
218        }
219    
220        /**
221         * Returns the variance of the entries in the specified portion of
222         * the input array, or <code>Double.NaN</code> if the designated subarray
223         * is empty.
224         * <p>
225         * See {@link Variance} for details on the computing algorithm.</p>
226         * <p>
227         * Returns 0 for a single-value (i.e. length = 1) sample.</p>
228         * <p>
229         * Does not change the internal state of the statistic.</p>
230         * <p>
231         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
232         * 
233         * @param values the input array
234         * @param begin index of the first array element to include
235         * @param length the number of elements to include
236         * @return the variance of the values or Double.NaN if length = 0
237         * @throws IllegalArgumentException if the array is null or the array index
238         *  parameters are not valid
239         */
240        @Override
241        public double evaluate(final double[] values, final int begin, final int length) {
242    
243            double var = Double.NaN;
244    
245            if (test(values, begin, length)) {
246                clear();
247                if (length == 1) {
248                    var = 0.0;
249                } else if (length > 1) {
250                    Mean mean = new Mean();
251                    double m = mean.evaluate(values, begin, length);
252                    var = evaluate(values, m, begin, length);
253                }
254            }
255            return var;
256        }
257        
258        /**
259         * Returns the variance of the entries in the specified portion of
260         * the input array, using the precomputed mean value.  Returns 
261         * <code>Double.NaN</code> if the designated subarray is empty.
262         * <p>
263         * See {@link Variance} for details on the computing algorithm.</p>
264         * <p>
265         * The formula used assumes that the supplied mean value is the arithmetic
266         * mean of the sample data, not a known population parameter.  This method
267         * is supplied only to save computation when the mean has already been
268         * computed.</p>
269         * <p>
270         * Returns 0 for a single-value (i.e. length = 1) sample.</p>
271         * <p>
272         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
273         * <p>
274         * Does not change the internal state of the statistic.</p>
275         * 
276         * @param values the input array
277         * @param mean the precomputed mean value
278         * @param begin index of the first array element to include
279         * @param length the number of elements to include
280         * @return the variance of the values or Double.NaN if length = 0
281         * @throws IllegalArgumentException if the array is null or the array index
282         *  parameters are not valid
283         */
284        public double evaluate(final double[] values, final double mean, 
285                final int begin, final int length) {
286            
287            double var = Double.NaN;
288    
289            if (test(values, begin, length)) {
290                if (length == 1) {
291                    var = 0.0;
292                } else if (length > 1) {
293                    double accum = 0.0;
294                    double dev = 0.0;
295                    double accum2 = 0.0;
296                    for (int i = begin; i < begin + length; i++) {
297                        dev = values[i] - mean;
298                        accum += dev * dev;
299                        accum2 += dev;
300                    }
301                    double len = length;            
302                    if (isBiasCorrected) {
303                        var = (accum - (accum2 * accum2 / len)) / (len - 1.0);
304                    } else {
305                        var = (accum - (accum2 * accum2 / len)) / len;
306                    }
307                }
308            }
309            return var;
310        }
311        
312        /**
313         * Returns the variance of the entries in the input array, using the
314         * precomputed mean value.  Returns <code>Double.NaN</code> if the array
315         * is empty.
316         * <p>
317         * See {@link Variance} for details on the computing algorithm.</p>
318         * <p>
319         * If <code>isBiasCorrected</code> is <code>true</code> the formula used
320         * assumes that the supplied mean value is the arithmetic mean of the
321         * sample data, not a known population parameter.  If the mean is a known
322         * population parameter, or if the "population" version of the variance is
323         * desired, set <code>isBiasCorrected</code> to <code>false</code> before
324         * invoking this method.</p>
325         * <p>
326         * Returns 0 for a single-value (i.e. length = 1) sample.</p>
327         * <p>
328         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
329         * <p>
330         * Does not change the internal state of the statistic.</p>
331         * 
332         * @param values the input array
333         * @param mean the precomputed mean value
334         * @return the variance of the values or Double.NaN if the array is empty
335         * @throws IllegalArgumentException if the array is null
336         */
337        public double evaluate(final double[] values, final double mean) {
338            return evaluate(values, mean, 0, values.length);
339        }
340    
341        /**
342         * @return Returns the isBiasCorrected.
343         */
344        public boolean isBiasCorrected() {
345            return isBiasCorrected;
346        }
347    
348        /**
349         * @param isBiasCorrected The isBiasCorrected to set.
350         */
351        public void setBiasCorrected(boolean isBiasCorrected) {
352            this.isBiasCorrected = isBiasCorrected;
353        }
354        
355        /**
356         * {@inheritDoc}
357         */
358        @Override
359        public Variance copy() {
360            Variance result = new Variance();
361            copy(this, result);
362            return result;
363        }
364        
365        
366        /**
367         * Copies source to dest.
368         * <p>Neither source nor dest can be null.</p>
369         * 
370         * @param source Variance to copy
371         * @param dest Variance to copy to
372         * @throws NullPointerException if either source or dest is null
373         */
374        public static void copy(Variance source, Variance dest) {
375            dest.moment = source.moment.copy();
376            dest.isBiasCorrected = source.isBiasCorrected;
377            dest.incMoment = source.incMoment;
378        }
379    
380    }