View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.descriptive.moment;
18  
19  import java.io.Serializable;
20  
21  import org.apache.commons.math.MathRuntimeException;
22  import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
23  
24  /**
25   * Computes the variance of the available values.  By default, the unbiased
26   * "sample variance" definitional formula is used: 
27   * <p>
28   * variance = sum((x_i - mean)^2) / (n - 1) </p>
29   * <p>
30   * where mean is the {@link Mean} and <code>n</code> is the number
31   * of sample observations.</p>
32   * <p>
33   * The definitional formula does not have good numerical properties, so
34   * this implementation does not compute the statistic using the definitional
35   * formula. <ul>
36   * <li> The <code>getResult</code> method computes the variance using 
37   * updating formulas based on West's algorithm, as described in
38   * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and
39   * J. G. Lewis 1979, <i>Communications of the ACM</i>,
40   * vol. 22 no. 9, pp. 526-531.</a></li>
41   * <li> The <code>evaluate</code> methods leverage the fact that they have the
42   * full array of values in memory to execute a two-pass algorithm. 
43   * Specifically, these methods use the "corrected two-pass algorithm" from
44   * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>,
45   * American Statistician, August 1983.</li></ul>
46   * Note that adding values using <code>increment</code> or 
47   * <code>incrementAll</code> and then executing <code>getResult</code> will
48   * sometimes give a different, less accurate, result than executing 
49   * <code>evaluate</code> with the full array of values. The former approach
50   * should only be used when the full array of values is not available.</p>
51   * <p>
52   * The "population variance"  ( sum((x_i - mean)^2) / n ) can also
53   * be computed using this statistic.  The <code>isBiasCorrected</code>
54   * property determines whether the "population" or "sample" value is
55   * returned by the <code>evaluate</code> and <code>getResult</code> methods.
56   * To compute population variances, set this property to <code>false.</code>
57   * </p>
58   * <p>
59   * <strong>Note that this implementation is not synchronized.</strong> If 
60   * multiple threads access an instance of this class concurrently, and at least
61   * one of the threads invokes the <code>increment()</code> or 
62   * <code>clear()</code> method, it must be synchronized externally.</p>
63   * 
64   * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $
65   */
66  public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable {
67  
68      /** Serializable version identifier */
69      private static final long serialVersionUID = -9111962718267217978L;  
70        
71      /** SecondMoment is used in incremental calculation of Variance*/
72      protected SecondMoment moment = null;
73  
74      /**
75       * Boolean test to determine if this Variance should also increment
76       * the second moment, this evaluates to false when this Variance is
77       * constructed with an external SecondMoment as a parameter.
78       */
79      protected boolean incMoment = true;
80      
81      /**
82       * Determines whether or not bias correction is applied when computing the
83       * value of the statisic.  True means that bias is corrected.  See 
84       * {@link Variance} for details on the formula.
85       */
86      private boolean isBiasCorrected = true;
87  
88      /**
89       * Constructs a Variance with default (true) <code>isBiasCorrected</code>
90       * property.
91       */
92      public Variance() {
93          moment = new SecondMoment();
94      }
95  
96      /**
97       * Constructs a Variance based on an external second moment.
98       * 
99       * @param m2 the SecondMoment (Third or Fourth moments work
100      * here as well.)
101      */
102     public Variance(final SecondMoment m2) {
103         incMoment = false;
104         this.moment = m2;
105     }
106     
107     /**
108      * Constructs a Variance with the specified <code>isBiasCorrected</code>
109      * property
110      * 
111      * @param isBiasCorrected  setting for bias correction - true means
112      * bias will be corrected and is equivalent to using the argumentless
113      * constructor
114      */
115     public Variance(boolean isBiasCorrected) {
116         moment = new SecondMoment();
117         this.isBiasCorrected = isBiasCorrected;
118     }
119     
120     /**
121      * Constructs a Variance with the specified <code>isBiasCorrected</code>
122      * property and the supplied external second moment.
123      * 
124      * @param isBiasCorrected  setting for bias correction - true means
125      * bias will be corrected
126      * @param m2 the SecondMoment (Third or Fourth moments work
127      * here as well.)
128      */
129     public Variance(boolean isBiasCorrected, SecondMoment m2) {
130         incMoment = false;
131         this.moment = m2;
132         this.isBiasCorrected = isBiasCorrected;      
133     }
134    
135     /**
136      * Copy constructor, creates a new {@code Variance} identical
137      * to the {@code original}
138      * 
139      * @param original the {@code Variance} instance to copy
140      */
141     public Variance(Variance original) {
142         copy(original, this);
143     }           
144     
145     /**
146      * {@inheritDoc}  
147      * <p>If all values are available, it is more accurate to use 
148      * {@link #evaluate(double[])} rather than adding values one at a time
149      * using this method and then executing {@link #getResult}, since
150      * <code>evaluate</code> leverages the fact that is has the full 
151      * list of values together to execute a two-pass algorithm.  
152      * See {@link Variance}.</p>
153      */
154     @Override
155     public void increment(final double d) {
156         if (incMoment) {
157             moment.increment(d);
158         }
159     }
160 
161     /**
162      * {@inheritDoc}
163      */
164     @Override
165     public double getResult() {
166             if (moment.n == 0) {
167                 return Double.NaN;
168             } else if (moment.n == 1) {
169                 return 0d;
170             } else {
171                 if (isBiasCorrected) {
172                     return moment.m2 / (moment.n - 1d);
173                 } else {
174                     return moment.m2 / (moment.n);
175                 }
176             }
177     }
178 
179     /**
180      * {@inheritDoc}
181      */
182     public long getN() {
183         return moment.getN();
184     }
185     
186     /**
187      * {@inheritDoc}
188      */
189     @Override
190     public void clear() {
191         if (incMoment) {
192             moment.clear();
193         }
194     }
195     
196     /**
197      * Returns the variance of the entries in the input array, or 
198      * <code>Double.NaN</code> if the array is empty.
199      * <p>
200      * See {@link Variance} for details on the computing algorithm.</p>
201      * <p>
202      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
203      * <p>
204      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
205      * <p>
206      * Does not change the internal state of the statistic.</p>
207      * 
208      * @param values the input array
209      * @return the variance of the values or Double.NaN if length = 0
210      * @throws IllegalArgumentException if the array is null
211      */
212     @Override
213     public double evaluate(final double[] values) {
214         if (values == null) {
215             throw MathRuntimeException.createIllegalArgumentException("input values array is null");
216         }
217         return evaluate(values, 0, values.length);
218     }
219 
220     /**
221      * Returns the variance of the entries in the specified portion of
222      * the input array, or <code>Double.NaN</code> if the designated subarray
223      * is empty.
224      * <p>
225      * See {@link Variance} for details on the computing algorithm.</p>
226      * <p>
227      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
228      * <p>
229      * Does not change the internal state of the statistic.</p>
230      * <p>
231      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
232      * 
233      * @param values the input array
234      * @param begin index of the first array element to include
235      * @param length the number of elements to include
236      * @return the variance of the values or Double.NaN if length = 0
237      * @throws IllegalArgumentException if the array is null or the array index
238      *  parameters are not valid
239      */
240     @Override
241     public double evaluate(final double[] values, final int begin, final int length) {
242 
243         double var = Double.NaN;
244 
245         if (test(values, begin, length)) {
246             clear();
247             if (length == 1) {
248                 var = 0.0;
249             } else if (length > 1) {
250                 Mean mean = new Mean();
251                 double m = mean.evaluate(values, begin, length);
252                 var = evaluate(values, m, begin, length);
253             }
254         }
255         return var;
256     }
257     
258     /**
259      * Returns the variance of the entries in the specified portion of
260      * the input array, using the precomputed mean value.  Returns 
261      * <code>Double.NaN</code> if the designated subarray is empty.
262      * <p>
263      * See {@link Variance} for details on the computing algorithm.</p>
264      * <p>
265      * The formula used assumes that the supplied mean value is the arithmetic
266      * mean of the sample data, not a known population parameter.  This method
267      * is supplied only to save computation when the mean has already been
268      * computed.</p>
269      * <p>
270      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
271      * <p>
272      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
273      * <p>
274      * Does not change the internal state of the statistic.</p>
275      * 
276      * @param values the input array
277      * @param mean the precomputed mean value
278      * @param begin index of the first array element to include
279      * @param length the number of elements to include
280      * @return the variance of the values or Double.NaN if length = 0
281      * @throws IllegalArgumentException if the array is null or the array index
282      *  parameters are not valid
283      */
284     public double evaluate(final double[] values, final double mean, 
285             final int begin, final int length) {
286         
287         double var = Double.NaN;
288 
289         if (test(values, begin, length)) {
290             if (length == 1) {
291                 var = 0.0;
292             } else if (length > 1) {
293                 double accum = 0.0;
294                 double dev = 0.0;
295                 double accum2 = 0.0;
296                 for (int i = begin; i < begin + length; i++) {
297                     dev = values[i] - mean;
298                     accum += dev * dev;
299                     accum2 += dev;
300                 }
301                 double len = length;            
302                 if (isBiasCorrected) {
303                     var = (accum - (accum2 * accum2 / len)) / (len - 1.0);
304                 } else {
305                     var = (accum - (accum2 * accum2 / len)) / len;
306                 }
307             }
308         }
309         return var;
310     }
311     
312     /**
313      * Returns the variance of the entries in the input array, using the
314      * precomputed mean value.  Returns <code>Double.NaN</code> if the array
315      * is empty.
316      * <p>
317      * See {@link Variance} for details on the computing algorithm.</p>
318      * <p>
319      * If <code>isBiasCorrected</code> is <code>true</code> the formula used
320      * assumes that the supplied mean value is the arithmetic mean of the
321      * sample data, not a known population parameter.  If the mean is a known
322      * population parameter, or if the "population" version of the variance is
323      * desired, set <code>isBiasCorrected</code> to <code>false</code> before
324      * invoking this method.</p>
325      * <p>
326      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
327      * <p>
328      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
329      * <p>
330      * Does not change the internal state of the statistic.</p>
331      * 
332      * @param values the input array
333      * @param mean the precomputed mean value
334      * @return the variance of the values or Double.NaN if the array is empty
335      * @throws IllegalArgumentException if the array is null
336      */
337     public double evaluate(final double[] values, final double mean) {
338         return evaluate(values, mean, 0, values.length);
339     }
340 
341     /**
342      * @return Returns the isBiasCorrected.
343      */
344     public boolean isBiasCorrected() {
345         return isBiasCorrected;
346     }
347 
348     /**
349      * @param isBiasCorrected The isBiasCorrected to set.
350      */
351     public void setBiasCorrected(boolean isBiasCorrected) {
352         this.isBiasCorrected = isBiasCorrected;
353     }
354     
355     /**
356      * {@inheritDoc}
357      */
358     @Override
359     public Variance copy() {
360         Variance result = new Variance();
361         copy(this, result);
362         return result;
363     }
364     
365     
366     /**
367      * Copies source to dest.
368      * <p>Neither source nor dest can be null.</p>
369      * 
370      * @param source Variance to copy
371      * @param dest Variance to copy to
372      * @throws NullPointerException if either source or dest is null
373      */
374     public static void copy(Variance source, Variance dest) {
375         dest.moment = source.moment.copy();
376         dest.isBiasCorrected = source.isBiasCorrected;
377         dest.incMoment = source.incMoment;
378     }
379 
380 }