1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math.stat.descriptive.moment;
18
19 import java.io.Serializable;
20
21 import org.apache.commons.math.MathRuntimeException;
22 import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
23
24 /**
25 * Computes the variance of the available values. By default, the unbiased
26 * "sample variance" definitional formula is used:
27 * <p>
28 * variance = sum((x_i - mean)^2) / (n - 1) </p>
29 * <p>
30 * where mean is the {@link Mean} and <code>n</code> is the number
31 * of sample observations.</p>
32 * <p>
33 * The definitional formula does not have good numerical properties, so
34 * this implementation does not compute the statistic using the definitional
35 * formula. <ul>
36 * <li> The <code>getResult</code> method computes the variance using
37 * updating formulas based on West's algorithm, as described in
38 * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and
39 * J. G. Lewis 1979, <i>Communications of the ACM</i>,
40 * vol. 22 no. 9, pp. 526-531.</a></li>
41 * <li> The <code>evaluate</code> methods leverage the fact that they have the
42 * full array of values in memory to execute a two-pass algorithm.
43 * Specifically, these methods use the "corrected two-pass algorithm" from
44 * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>,
45 * American Statistician, August 1983.</li></ul>
46 * Note that adding values using <code>increment</code> or
47 * <code>incrementAll</code> and then executing <code>getResult</code> will
48 * sometimes give a different, less accurate, result than executing
49 * <code>evaluate</code> with the full array of values. The former approach
50 * should only be used when the full array of values is not available.</p>
51 * <p>
52 * The "population variance" ( sum((x_i - mean)^2) / n ) can also
53 * be computed using this statistic. The <code>isBiasCorrected</code>
54 * property determines whether the "population" or "sample" value is
55 * returned by the <code>evaluate</code> and <code>getResult</code> methods.
56 * To compute population variances, set this property to <code>false.</code>
57 * </p>
58 * <p>
59 * <strong>Note that this implementation is not synchronized.</strong> If
60 * multiple threads access an instance of this class concurrently, and at least
61 * one of the threads invokes the <code>increment()</code> or
62 * <code>clear()</code> method, it must be synchronized externally.</p>
63 *
64 * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $
65 */
66 public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable {
67
68 /** Serializable version identifier */
69 private static final long serialVersionUID = -9111962718267217978L;
70
71 /** SecondMoment is used in incremental calculation of Variance*/
72 protected SecondMoment moment = null;
73
74 /**
75 * Boolean test to determine if this Variance should also increment
76 * the second moment, this evaluates to false when this Variance is
77 * constructed with an external SecondMoment as a parameter.
78 */
79 protected boolean incMoment = true;
80
81 /**
82 * Determines whether or not bias correction is applied when computing the
83 * value of the statisic. True means that bias is corrected. See
84 * {@link Variance} for details on the formula.
85 */
86 private boolean isBiasCorrected = true;
87
88 /**
89 * Constructs a Variance with default (true) <code>isBiasCorrected</code>
90 * property.
91 */
92 public Variance() {
93 moment = new SecondMoment();
94 }
95
96 /**
97 * Constructs a Variance based on an external second moment.
98 *
99 * @param m2 the SecondMoment (Third or Fourth moments work
100 * here as well.)
101 */
102 public Variance(final SecondMoment m2) {
103 incMoment = false;
104 this.moment = m2;
105 }
106
107 /**
108 * Constructs a Variance with the specified <code>isBiasCorrected</code>
109 * property
110 *
111 * @param isBiasCorrected setting for bias correction - true means
112 * bias will be corrected and is equivalent to using the argumentless
113 * constructor
114 */
115 public Variance(boolean isBiasCorrected) {
116 moment = new SecondMoment();
117 this.isBiasCorrected = isBiasCorrected;
118 }
119
120 /**
121 * Constructs a Variance with the specified <code>isBiasCorrected</code>
122 * property and the supplied external second moment.
123 *
124 * @param isBiasCorrected setting for bias correction - true means
125 * bias will be corrected
126 * @param m2 the SecondMoment (Third or Fourth moments work
127 * here as well.)
128 */
129 public Variance(boolean isBiasCorrected, SecondMoment m2) {
130 incMoment = false;
131 this.moment = m2;
132 this.isBiasCorrected = isBiasCorrected;
133 }
134
135 /**
136 * Copy constructor, creates a new {@code Variance} identical
137 * to the {@code original}
138 *
139 * @param original the {@code Variance} instance to copy
140 */
141 public Variance(Variance original) {
142 copy(original, this);
143 }
144
145 /**
146 * {@inheritDoc}
147 * <p>If all values are available, it is more accurate to use
148 * {@link #evaluate(double[])} rather than adding values one at a time
149 * using this method and then executing {@link #getResult}, since
150 * <code>evaluate</code> leverages the fact that is has the full
151 * list of values together to execute a two-pass algorithm.
152 * See {@link Variance}.</p>
153 */
154 @Override
155 public void increment(final double d) {
156 if (incMoment) {
157 moment.increment(d);
158 }
159 }
160
161 /**
162 * {@inheritDoc}
163 */
164 @Override
165 public double getResult() {
166 if (moment.n == 0) {
167 return Double.NaN;
168 } else if (moment.n == 1) {
169 return 0d;
170 } else {
171 if (isBiasCorrected) {
172 return moment.m2 / (moment.n - 1d);
173 } else {
174 return moment.m2 / (moment.n);
175 }
176 }
177 }
178
179 /**
180 * {@inheritDoc}
181 */
182 public long getN() {
183 return moment.getN();
184 }
185
186 /**
187 * {@inheritDoc}
188 */
189 @Override
190 public void clear() {
191 if (incMoment) {
192 moment.clear();
193 }
194 }
195
196 /**
197 * Returns the variance of the entries in the input array, or
198 * <code>Double.NaN</code> if the array is empty.
199 * <p>
200 * See {@link Variance} for details on the computing algorithm.</p>
201 * <p>
202 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
203 * <p>
204 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
205 * <p>
206 * Does not change the internal state of the statistic.</p>
207 *
208 * @param values the input array
209 * @return the variance of the values or Double.NaN if length = 0
210 * @throws IllegalArgumentException if the array is null
211 */
212 @Override
213 public double evaluate(final double[] values) {
214 if (values == null) {
215 throw MathRuntimeException.createIllegalArgumentException("input values array is null");
216 }
217 return evaluate(values, 0, values.length);
218 }
219
220 /**
221 * Returns the variance of the entries in the specified portion of
222 * the input array, or <code>Double.NaN</code> if the designated subarray
223 * is empty.
224 * <p>
225 * See {@link Variance} for details on the computing algorithm.</p>
226 * <p>
227 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
228 * <p>
229 * Does not change the internal state of the statistic.</p>
230 * <p>
231 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
232 *
233 * @param values the input array
234 * @param begin index of the first array element to include
235 * @param length the number of elements to include
236 * @return the variance of the values or Double.NaN if length = 0
237 * @throws IllegalArgumentException if the array is null or the array index
238 * parameters are not valid
239 */
240 @Override
241 public double evaluate(final double[] values, final int begin, final int length) {
242
243 double var = Double.NaN;
244
245 if (test(values, begin, length)) {
246 clear();
247 if (length == 1) {
248 var = 0.0;
249 } else if (length > 1) {
250 Mean mean = new Mean();
251 double m = mean.evaluate(values, begin, length);
252 var = evaluate(values, m, begin, length);
253 }
254 }
255 return var;
256 }
257
258 /**
259 * Returns the variance of the entries in the specified portion of
260 * the input array, using the precomputed mean value. Returns
261 * <code>Double.NaN</code> if the designated subarray is empty.
262 * <p>
263 * See {@link Variance} for details on the computing algorithm.</p>
264 * <p>
265 * The formula used assumes that the supplied mean value is the arithmetic
266 * mean of the sample data, not a known population parameter. This method
267 * is supplied only to save computation when the mean has already been
268 * computed.</p>
269 * <p>
270 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
271 * <p>
272 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
273 * <p>
274 * Does not change the internal state of the statistic.</p>
275 *
276 * @param values the input array
277 * @param mean the precomputed mean value
278 * @param begin index of the first array element to include
279 * @param length the number of elements to include
280 * @return the variance of the values or Double.NaN if length = 0
281 * @throws IllegalArgumentException if the array is null or the array index
282 * parameters are not valid
283 */
284 public double evaluate(final double[] values, final double mean,
285 final int begin, final int length) {
286
287 double var = Double.NaN;
288
289 if (test(values, begin, length)) {
290 if (length == 1) {
291 var = 0.0;
292 } else if (length > 1) {
293 double accum = 0.0;
294 double dev = 0.0;
295 double accum2 = 0.0;
296 for (int i = begin; i < begin + length; i++) {
297 dev = values[i] - mean;
298 accum += dev * dev;
299 accum2 += dev;
300 }
301 double len = length;
302 if (isBiasCorrected) {
303 var = (accum - (accum2 * accum2 / len)) / (len - 1.0);
304 } else {
305 var = (accum - (accum2 * accum2 / len)) / len;
306 }
307 }
308 }
309 return var;
310 }
311
312 /**
313 * Returns the variance of the entries in the input array, using the
314 * precomputed mean value. Returns <code>Double.NaN</code> if the array
315 * is empty.
316 * <p>
317 * See {@link Variance} for details on the computing algorithm.</p>
318 * <p>
319 * If <code>isBiasCorrected</code> is <code>true</code> the formula used
320 * assumes that the supplied mean value is the arithmetic mean of the
321 * sample data, not a known population parameter. If the mean is a known
322 * population parameter, or if the "population" version of the variance is
323 * desired, set <code>isBiasCorrected</code> to <code>false</code> before
324 * invoking this method.</p>
325 * <p>
326 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
327 * <p>
328 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
329 * <p>
330 * Does not change the internal state of the statistic.</p>
331 *
332 * @param values the input array
333 * @param mean the precomputed mean value
334 * @return the variance of the values or Double.NaN if the array is empty
335 * @throws IllegalArgumentException if the array is null
336 */
337 public double evaluate(final double[] values, final double mean) {
338 return evaluate(values, mean, 0, values.length);
339 }
340
341 /**
342 * @return Returns the isBiasCorrected.
343 */
344 public boolean isBiasCorrected() {
345 return isBiasCorrected;
346 }
347
348 /**
349 * @param isBiasCorrected The isBiasCorrected to set.
350 */
351 public void setBiasCorrected(boolean isBiasCorrected) {
352 this.isBiasCorrected = isBiasCorrected;
353 }
354
355 /**
356 * {@inheritDoc}
357 */
358 @Override
359 public Variance copy() {
360 Variance result = new Variance();
361 copy(this, result);
362 return result;
363 }
364
365
366 /**
367 * Copies source to dest.
368 * <p>Neither source nor dest can be null.</p>
369 *
370 * @param source Variance to copy
371 * @param dest Variance to copy to
372 * @throws NullPointerException if either source or dest is null
373 */
374 public static void copy(Variance source, Variance dest) {
375 dest.moment = source.moment.copy();
376 dest.isBiasCorrected = source.isBiasCorrected;
377 dest.incMoment = source.incMoment;
378 }
379
380 }