1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.regression;
18  
19  import java.util.Random;
20  
21  import junit.framework.Test;
22  import junit.framework.TestCase;
23  import junit.framework.TestSuite;
24  /**
25   * Test cases for the TestStatistic class.
26   *
27   * @version $Revision: 764749 $ $Date: 2009-04-14 07:51:40 -0400 (Tue, 14 Apr 2009) $
28   */
29  
30  public final class SimpleRegressionTest extends TestCase {
31  
32      /* 
33       * NIST "Norris" refernce data set from 
34       * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
35       * Strangely, order is {y,x}
36       */
37      private double[][] data = { { 0.1, 0.2 }, {338.8, 337.4 }, {118.1, 118.2 }, 
38              {888.0, 884.6 }, {9.2, 10.1 }, {228.1, 226.5 }, {668.5, 666.3 }, {998.5, 996.3 }, 
39              {449.1, 448.6 }, {778.9, 777.0 }, {559.2, 558.2 }, {0.3, 0.4 }, {0.1, 0.6 }, {778.1, 775.5 }, 
40              {668.8, 666.9 }, {339.3, 338.0 }, {448.9, 447.5 }, {10.8, 11.6 }, {557.7, 556.0 }, 
41              {228.3, 228.1 }, {998.0, 995.8 }, {888.8, 887.6 }, {119.6, 120.2 }, {0.3, 0.3 }, 
42              {0.6, 0.3 }, {557.6, 556.8 }, {339.3, 339.1 }, {888.0, 887.2 }, {998.5, 999.0 }, 
43              {778.9, 779.0 }, {10.2, 11.1 }, {117.6, 118.3 }, {228.9, 229.2 }, {668.4, 669.1 }, 
44              {449.2, 448.9 }, {0.2, 0.5 }
45      };
46  
47      /* 
48       * Correlation example from 
49       * http://www.xycoon.com/correlation.htm
50       */
51      private double[][] corrData = { { 101.0, 99.2 }, {100.1, 99.0 }, {100.0, 100.0 }, 
52              {90.6, 111.6 }, {86.5, 122.2 }, {89.7, 117.6 }, {90.6, 121.1 }, {82.8, 136.0 }, 
53              {70.1, 154.2 }, {65.4, 153.6 }, {61.3, 158.5 }, {62.5, 140.6 }, {63.6, 136.2 }, 
54              {52.6, 168.0 }, {59.7, 154.3 }, {59.5, 149.0 }, {61.3, 165.5 }
55      };
56  
57      /*
58       * From Moore and Mcabe, "Introduction to the Practice of Statistics"
59       * Example 10.3 
60       */
61      private double[][] infData = { { 15.6, 5.2 }, {26.8, 6.1 }, {37.8, 8.7 }, {36.4, 8.5 },
62              {35.5, 8.8 }, {18.6, 4.9 }, {15.3, 4.5 }, {7.9, 2.5 }, {0.0, 1.1 }
63      };
64      
65      /*
66       * Points to remove in the remove tests
67       */
68      private double[][] removeSingle = {infData[1]};
69      private double[][] removeMultiple = { infData[1], infData[2] };
70      private double removeX = infData[0][0];
71      private double removeY = infData[0][1];
72      
73              
74      /*
75       * Data with bad linear fit
76       */
77      private double[][] infData2 = { { 1, 1 }, {2, 0 }, {3, 5 }, {4, 2 },
78              {5, -1 }, {6, 12 }
79      };
80  
81      public SimpleRegressionTest(String name) {
82          super(name);
83      }
84  
85      public static Test suite() {
86          TestSuite suite = new TestSuite(SimpleRegressionTest.class);
87          suite.setName("BivariateRegression Tests");
88          return suite;
89      }
90  
91      public void testNorris() {
92          SimpleRegression regression = new SimpleRegression();
93          for (int i = 0; i < data.length; i++) {
94              regression.addData(data[i][1], data[i][0]);
95          }
96          // Tests against certified values from  
97          // http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
98          assertEquals("slope", 1.00211681802045, regression.getSlope(), 10E-12);
99          assertEquals("slope std err", 0.429796848199937E-03,
100                 regression.getSlopeStdErr(),10E-12);
101         assertEquals("number of observations", 36, regression.getN());
102         assertEquals( "intercept", -0.262323073774029,
103             regression.getIntercept(),10E-12);
104         assertEquals("std err intercept", 0.232818234301152,
105             regression.getInterceptStdErr(),10E-12);
106         assertEquals("r-square", 0.999993745883712,
107             regression.getRSquare(), 10E-12);
108         assertEquals("SSR", 4255954.13232369,
109             regression.getRegressionSumSquares(), 10E-9);
110         assertEquals("MSE", 0.782864662630069,
111             regression.getMeanSquareError(), 10E-10);
112         assertEquals("SSE", 26.6173985294224,
113             regression.getSumSquaredErrors(),10E-9);
114         // ------------  End certified data tests
115           
116         assertEquals( "predict(0)",  -0.262323073774029,
117             regression.predict(0), 10E-12);
118         assertEquals("predict(1)", 1.00211681802045 - 0.262323073774029,
119             regression.predict(1), 10E-12);
120     }
121 
122     public void testCorr() {
123         SimpleRegression regression = new SimpleRegression();
124         regression.addData(corrData);
125         assertEquals("number of observations", 17, regression.getN());
126         assertEquals("r-square", .896123, regression.getRSquare(), 10E-6);
127         assertEquals("r", -0.94663767742, regression.getR(), 1E-10);
128     }
129 
130     public void testNaNs() {
131         SimpleRegression regression = new SimpleRegression();
132         assertTrue("intercept not NaN", Double.isNaN(regression.getIntercept()));
133         assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
134         assertTrue("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
135         assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
136         assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
137         assertTrue("e not NaN", Double.isNaN(regression.getR()));
138         assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare()));
139         assertTrue( "RSS not NaN", Double.isNaN(regression.getRegressionSumSquares()));
140         assertTrue("SSE not NaN",Double.isNaN(regression.getSumSquaredErrors()));
141         assertTrue("SSTO not NaN", Double.isNaN(regression.getTotalSumSquares()));
142         assertTrue("predict not NaN", Double.isNaN(regression.predict(0)));
143 
144         regression.addData(1, 2);
145         regression.addData(1, 3);
146 
147         // No x variation, so these should still blow...
148         assertTrue("intercept not NaN", Double.isNaN(regression.getIntercept()));
149         assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
150         assertTrue("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
151         assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
152         assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
153         assertTrue("e not NaN", Double.isNaN(regression.getR()));
154         assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare()));
155         assertTrue("RSS not NaN", Double.isNaN(regression.getRegressionSumSquares()));
156         assertTrue("SSE not NaN", Double.isNaN(regression.getSumSquaredErrors()));
157         assertTrue("predict not NaN", Double.isNaN(regression.predict(0)));
158 
159         // but SSTO should be OK
160         assertTrue("SSTO NaN", !Double.isNaN(regression.getTotalSumSquares()));
161 
162         regression = new SimpleRegression();
163 
164         regression.addData(1, 2);
165         regression.addData(3, 3);
166 
167         // All should be OK except MSE, s(b0), s(b1) which need one more df 
168         assertTrue("interceptNaN", !Double.isNaN(regression.getIntercept()));
169         assertTrue("slope NaN", !Double.isNaN(regression.getSlope()));
170         assertTrue ("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
171         assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
172         assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
173         assertTrue("r NaN", !Double.isNaN(regression.getR()));
174         assertTrue("r-square NaN", !Double.isNaN(regression.getRSquare()));
175         assertTrue("RSS NaN", !Double.isNaN(regression.getRegressionSumSquares()));
176         assertTrue("SSE NaN", !Double.isNaN(regression.getSumSquaredErrors()));
177         assertTrue("SSTO NaN", !Double.isNaN(regression.getTotalSumSquares()));
178         assertTrue("predict NaN", !Double.isNaN(regression.predict(0)));
179 
180         regression.addData(1, 4);
181 
182         // MSE, MSE, s(b0), s(b1) should all be OK now
183         assertTrue("MSE NaN", !Double.isNaN(regression.getMeanSquareError()));
184         assertTrue("slope std err NaN", !Double.isNaN(regression.getSlopeStdErr()));
185         assertTrue("intercept std err NaN", !Double.isNaN(regression.getInterceptStdErr()));
186     }
187 
188     public void testClear() {
189         SimpleRegression regression = new SimpleRegression();
190         regression.addData(corrData);
191         assertEquals("number of observations", 17, regression.getN());
192         regression.clear();
193         assertEquals("number of observations", 0, regression.getN());
194         regression.addData(corrData);
195         assertEquals("r-square", .896123, regression.getRSquare(), 10E-6);
196         regression.addData(data);
197         assertEquals("number of observations", 53, regression.getN());
198     }
199 
200     public void testInference() throws Exception {
201         //----------  verified against R, version 1.8.1 -----
202         // infData
203         SimpleRegression regression = new SimpleRegression();
204         regression.addData(infData);
205         assertEquals("slope std err", 0.011448491,
206                 regression.getSlopeStdErr(), 1E-10);
207         assertEquals("std err intercept", 0.286036932,
208                 regression.getInterceptStdErr(),1E-8);
209         assertEquals("significance", 4.596e-07,
210                 regression.getSignificance(),1E-8);    
211         assertEquals("slope conf interval half-width", 0.0270713794287, 
212                 regression.getSlopeConfidenceInterval(),1E-8);
213         // infData2
214         regression = new SimpleRegression();
215         regression.addData(infData2);
216         assertEquals("slope std err", 1.07260253,
217                 regression.getSlopeStdErr(), 1E-8);
218         assertEquals("std err intercept",4.17718672,
219                 regression.getInterceptStdErr(),1E-8);
220         assertEquals("significance", 0.261829133982,
221                 regression.getSignificance(),1E-11);    
222         assertEquals("slope conf interval half-width", 2.97802204827, 
223                 regression.getSlopeConfidenceInterval(),1E-8);
224         //------------- End R-verified tests -------------------------------
225         
226         //FIXME: get a real example to test against with alpha = .01
227         assertTrue("tighter means wider",
228                 regression.getSlopeConfidenceInterval() < regression.getSlopeConfidenceInterval(0.01));
229      
230         try {
231             regression.getSlopeConfidenceInterval(1);
232             fail("expecting IllegalArgumentException for alpha = 1");
233         } catch (IllegalArgumentException ex) {
234             // ignored
235         }  
236 
237     }
238 
239     public void testPerfect() throws Exception {
240         SimpleRegression regression = new SimpleRegression();
241         int n = 100;
242         for (int i = 0; i < n; i++) {
243             regression.addData(((double) i) / (n - 1), i);
244         }
245         assertEquals(0.0, regression.getSignificance(), 1.0e-5);
246         assertTrue(regression.getSlope() > 0.0);
247         assertTrue(regression.getSumSquaredErrors() >= 0.0);
248     }
249 
250     public void testPerfectNegative() throws Exception {
251         SimpleRegression regression = new SimpleRegression();
252         int n = 100;
253         for (int i = 0; i < n; i++) {
254             regression.addData(- ((double) i) / (n - 1), i);
255         }
256    
257         assertEquals(0.0, regression.getSignificance(), 1.0e-5);
258         assertTrue(regression.getSlope() < 0.0);   
259     }
260 
261     public void testRandom() throws Exception {
262         SimpleRegression regression = new SimpleRegression();
263         Random random = new Random(1);
264         int n = 100;
265         for (int i = 0; i < n; i++) {
266             regression.addData(((double) i) / (n - 1), random.nextDouble());
267         }
268 
269         assertTrue( 0.0 < regression.getSignificance()
270                     && regression.getSignificance() < 1.0);       
271     }
272     
273     
274     // Jira MATH-85 = Bugzilla 39432
275     public void testSSENonNegative() {
276         double[] y = { 8915.102, 8919.302, 8923.502 };
277         double[] x = { 1.107178495E2, 1.107264895E2, 1.107351295E2 };
278         SimpleRegression reg = new SimpleRegression();
279         for (int i = 0; i < x.length; i++) {
280             reg.addData(x[i], y[i]);
281         }
282         assertTrue(reg.getSumSquaredErrors() >= 0.0);
283     } 
284     
285     // Test remove X,Y (single observation)
286     public void testRemoveXY() throws Exception {
287         // Create regression with inference data then remove to test
288         SimpleRegression regression = new SimpleRegression();
289         regression.addData(infData);
290         regression.removeData(removeX, removeY);
291         regression.addData(removeX, removeY);
292         // Use the inference assertions to make sure that everything worked
293         assertEquals("slope std err", 0.011448491,
294                 regression.getSlopeStdErr(), 1E-10);
295         assertEquals("std err intercept", 0.286036932,
296                 regression.getInterceptStdErr(),1E-8);
297         assertEquals("significance", 4.596e-07,
298                 regression.getSignificance(),1E-8);    
299         assertEquals("slope conf interval half-width", 0.0270713794287, 
300                 regression.getSlopeConfidenceInterval(),1E-8);
301      }
302     
303     
304     // Test remove single observation in array
305     public void testRemoveSingle() throws Exception {
306         // Create regression with inference data then remove to test
307         SimpleRegression regression = new SimpleRegression();
308         regression.addData(infData);
309         regression.removeData(removeSingle);
310         regression.addData(removeSingle);
311         // Use the inference assertions to make sure that everything worked
312         assertEquals("slope std err", 0.011448491,
313                 regression.getSlopeStdErr(), 1E-10);
314         assertEquals("std err intercept", 0.286036932,
315                 regression.getInterceptStdErr(),1E-8);
316         assertEquals("significance", 4.596e-07,
317                 regression.getSignificance(),1E-8);    
318         assertEquals("slope conf interval half-width", 0.0270713794287, 
319                 regression.getSlopeConfidenceInterval(),1E-8);
320      }
321     
322     // Test remove multiple observations
323     public void testRemoveMultiple() throws Exception {
324         // Create regression with inference data then remove to test
325         SimpleRegression regression = new SimpleRegression();
326         regression.addData(infData);
327         regression.removeData(removeMultiple);
328         regression.addData(removeMultiple);
329         // Use the inference assertions to make sure that everything worked
330         assertEquals("slope std err", 0.011448491,
331                 regression.getSlopeStdErr(), 1E-10);
332         assertEquals("std err intercept", 0.286036932,
333                 regression.getInterceptStdErr(),1E-8);
334         assertEquals("significance", 4.596e-07,
335                 regression.getSignificance(),1E-8);    
336         assertEquals("slope conf interval half-width", 0.0270713794287, 
337                 regression.getSlopeConfidenceInterval(),1E-8);
338      }
339     
340     // Remove observation when empty
341     public void testRemoveObsFromEmpty() {
342         SimpleRegression regression = new SimpleRegression();
343         regression.removeData(removeX, removeY);
344         assertEquals(regression.getN(), 0);
345     }
346     
347     // Remove single observation to empty
348     public void testRemoveObsFromSingle() {
349         SimpleRegression regression = new SimpleRegression();
350         regression.addData(removeX, removeY);
351         regression.removeData(removeX, removeY);
352         assertEquals(regression.getN(), 0);
353     }
354     
355     // Remove multiple observations to empty
356     public void testRemoveMultipleToEmpty() {
357         SimpleRegression regression = new SimpleRegression();
358         regression.addData(removeMultiple);
359         regression.removeData(removeMultiple);
360         assertEquals(regression.getN(), 0);
361     }
362     
363     // Remove multiple observations past empty (i.e. size of array > n)
364     public void testRemoveMultiplePastEmpty() {
365         SimpleRegression regression = new SimpleRegression();
366         regression.addData(removeX, removeY);
367         regression.removeData(removeMultiple);
368         assertEquals(regression.getN(), 0);
369     }
370 }