001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.regression;
018    
019    import java.util.Random;
020    
021    import junit.framework.Test;
022    import junit.framework.TestCase;
023    import junit.framework.TestSuite;
024    /**
025     * Test cases for the TestStatistic class.
026     *
027     * @version $Revision: 764749 $ $Date: 2009-04-14 07:51:40 -0400 (Tue, 14 Apr 2009) $
028     */
029    
030    public final class SimpleRegressionTest extends TestCase {
031    
032        /* 
033         * NIST "Norris" refernce data set from 
034         * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
035         * Strangely, order is {y,x}
036         */
037        private double[][] data = { { 0.1, 0.2 }, {338.8, 337.4 }, {118.1, 118.2 }, 
038                {888.0, 884.6 }, {9.2, 10.1 }, {228.1, 226.5 }, {668.5, 666.3 }, {998.5, 996.3 }, 
039                {449.1, 448.6 }, {778.9, 777.0 }, {559.2, 558.2 }, {0.3, 0.4 }, {0.1, 0.6 }, {778.1, 775.5 }, 
040                {668.8, 666.9 }, {339.3, 338.0 }, {448.9, 447.5 }, {10.8, 11.6 }, {557.7, 556.0 }, 
041                {228.3, 228.1 }, {998.0, 995.8 }, {888.8, 887.6 }, {119.6, 120.2 }, {0.3, 0.3 }, 
042                {0.6, 0.3 }, {557.6, 556.8 }, {339.3, 339.1 }, {888.0, 887.2 }, {998.5, 999.0 }, 
043                {778.9, 779.0 }, {10.2, 11.1 }, {117.6, 118.3 }, {228.9, 229.2 }, {668.4, 669.1 }, 
044                {449.2, 448.9 }, {0.2, 0.5 }
045        };
046    
047        /* 
048         * Correlation example from 
049         * http://www.xycoon.com/correlation.htm
050         */
051        private double[][] corrData = { { 101.0, 99.2 }, {100.1, 99.0 }, {100.0, 100.0 }, 
052                {90.6, 111.6 }, {86.5, 122.2 }, {89.7, 117.6 }, {90.6, 121.1 }, {82.8, 136.0 }, 
053                {70.1, 154.2 }, {65.4, 153.6 }, {61.3, 158.5 }, {62.5, 140.6 }, {63.6, 136.2 }, 
054                {52.6, 168.0 }, {59.7, 154.3 }, {59.5, 149.0 }, {61.3, 165.5 }
055        };
056    
057        /*
058         * From Moore and Mcabe, "Introduction to the Practice of Statistics"
059         * Example 10.3 
060         */
061        private double[][] infData = { { 15.6, 5.2 }, {26.8, 6.1 }, {37.8, 8.7 }, {36.4, 8.5 },
062                {35.5, 8.8 }, {18.6, 4.9 }, {15.3, 4.5 }, {7.9, 2.5 }, {0.0, 1.1 }
063        };
064        
065        /*
066         * Points to remove in the remove tests
067         */
068        private double[][] removeSingle = {infData[1]};
069        private double[][] removeMultiple = { infData[1], infData[2] };
070        private double removeX = infData[0][0];
071        private double removeY = infData[0][1];
072        
073                
074        /*
075         * Data with bad linear fit
076         */
077        private double[][] infData2 = { { 1, 1 }, {2, 0 }, {3, 5 }, {4, 2 },
078                {5, -1 }, {6, 12 }
079        };
080    
081        public SimpleRegressionTest(String name) {
082            super(name);
083        }
084    
085        public static Test suite() {
086            TestSuite suite = new TestSuite(SimpleRegressionTest.class);
087            suite.setName("BivariateRegression Tests");
088            return suite;
089        }
090    
091        public void testNorris() {
092            SimpleRegression regression = new SimpleRegression();
093            for (int i = 0; i < data.length; i++) {
094                regression.addData(data[i][1], data[i][0]);
095            }
096            // Tests against certified values from  
097            // http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
098            assertEquals("slope", 1.00211681802045, regression.getSlope(), 10E-12);
099            assertEquals("slope std err", 0.429796848199937E-03,
100                    regression.getSlopeStdErr(),10E-12);
101            assertEquals("number of observations", 36, regression.getN());
102            assertEquals( "intercept", -0.262323073774029,
103                regression.getIntercept(),10E-12);
104            assertEquals("std err intercept", 0.232818234301152,
105                regression.getInterceptStdErr(),10E-12);
106            assertEquals("r-square", 0.999993745883712,
107                regression.getRSquare(), 10E-12);
108            assertEquals("SSR", 4255954.13232369,
109                regression.getRegressionSumSquares(), 10E-9);
110            assertEquals("MSE", 0.782864662630069,
111                regression.getMeanSquareError(), 10E-10);
112            assertEquals("SSE", 26.6173985294224,
113                regression.getSumSquaredErrors(),10E-9);
114            // ------------  End certified data tests
115              
116            assertEquals( "predict(0)",  -0.262323073774029,
117                regression.predict(0), 10E-12);
118            assertEquals("predict(1)", 1.00211681802045 - 0.262323073774029,
119                regression.predict(1), 10E-12);
120        }
121    
122        public void testCorr() {
123            SimpleRegression regression = new SimpleRegression();
124            regression.addData(corrData);
125            assertEquals("number of observations", 17, regression.getN());
126            assertEquals("r-square", .896123, regression.getRSquare(), 10E-6);
127            assertEquals("r", -0.94663767742, regression.getR(), 1E-10);
128        }
129    
130        public void testNaNs() {
131            SimpleRegression regression = new SimpleRegression();
132            assertTrue("intercept not NaN", Double.isNaN(regression.getIntercept()));
133            assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
134            assertTrue("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
135            assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
136            assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
137            assertTrue("e not NaN", Double.isNaN(regression.getR()));
138            assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare()));
139            assertTrue( "RSS not NaN", Double.isNaN(regression.getRegressionSumSquares()));
140            assertTrue("SSE not NaN",Double.isNaN(regression.getSumSquaredErrors()));
141            assertTrue("SSTO not NaN", Double.isNaN(regression.getTotalSumSquares()));
142            assertTrue("predict not NaN", Double.isNaN(regression.predict(0)));
143    
144            regression.addData(1, 2);
145            regression.addData(1, 3);
146    
147            // No x variation, so these should still blow...
148            assertTrue("intercept not NaN", Double.isNaN(regression.getIntercept()));
149            assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
150            assertTrue("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
151            assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
152            assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
153            assertTrue("e not NaN", Double.isNaN(regression.getR()));
154            assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare()));
155            assertTrue("RSS not NaN", Double.isNaN(regression.getRegressionSumSquares()));
156            assertTrue("SSE not NaN", Double.isNaN(regression.getSumSquaredErrors()));
157            assertTrue("predict not NaN", Double.isNaN(regression.predict(0)));
158    
159            // but SSTO should be OK
160            assertTrue("SSTO NaN", !Double.isNaN(regression.getTotalSumSquares()));
161    
162            regression = new SimpleRegression();
163    
164            regression.addData(1, 2);
165            regression.addData(3, 3);
166    
167            // All should be OK except MSE, s(b0), s(b1) which need one more df 
168            assertTrue("interceptNaN", !Double.isNaN(regression.getIntercept()));
169            assertTrue("slope NaN", !Double.isNaN(regression.getSlope()));
170            assertTrue ("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
171            assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
172            assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
173            assertTrue("r NaN", !Double.isNaN(regression.getR()));
174            assertTrue("r-square NaN", !Double.isNaN(regression.getRSquare()));
175            assertTrue("RSS NaN", !Double.isNaN(regression.getRegressionSumSquares()));
176            assertTrue("SSE NaN", !Double.isNaN(regression.getSumSquaredErrors()));
177            assertTrue("SSTO NaN", !Double.isNaN(regression.getTotalSumSquares()));
178            assertTrue("predict NaN", !Double.isNaN(regression.predict(0)));
179    
180            regression.addData(1, 4);
181    
182            // MSE, MSE, s(b0), s(b1) should all be OK now
183            assertTrue("MSE NaN", !Double.isNaN(regression.getMeanSquareError()));
184            assertTrue("slope std err NaN", !Double.isNaN(regression.getSlopeStdErr()));
185            assertTrue("intercept std err NaN", !Double.isNaN(regression.getInterceptStdErr()));
186        }
187    
188        public void testClear() {
189            SimpleRegression regression = new SimpleRegression();
190            regression.addData(corrData);
191            assertEquals("number of observations", 17, regression.getN());
192            regression.clear();
193            assertEquals("number of observations", 0, regression.getN());
194            regression.addData(corrData);
195            assertEquals("r-square", .896123, regression.getRSquare(), 10E-6);
196            regression.addData(data);
197            assertEquals("number of observations", 53, regression.getN());
198        }
199    
200        public void testInference() throws Exception {
201            //----------  verified against R, version 1.8.1 -----
202            // infData
203            SimpleRegression regression = new SimpleRegression();
204            regression.addData(infData);
205            assertEquals("slope std err", 0.011448491,
206                    regression.getSlopeStdErr(), 1E-10);
207            assertEquals("std err intercept", 0.286036932,
208                    regression.getInterceptStdErr(),1E-8);
209            assertEquals("significance", 4.596e-07,
210                    regression.getSignificance(),1E-8);    
211            assertEquals("slope conf interval half-width", 0.0270713794287, 
212                    regression.getSlopeConfidenceInterval(),1E-8);
213            // infData2
214            regression = new SimpleRegression();
215            regression.addData(infData2);
216            assertEquals("slope std err", 1.07260253,
217                    regression.getSlopeStdErr(), 1E-8);
218            assertEquals("std err intercept",4.17718672,
219                    regression.getInterceptStdErr(),1E-8);
220            assertEquals("significance", 0.261829133982,
221                    regression.getSignificance(),1E-11);    
222            assertEquals("slope conf interval half-width", 2.97802204827, 
223                    regression.getSlopeConfidenceInterval(),1E-8);
224            //------------- End R-verified tests -------------------------------
225            
226            //FIXME: get a real example to test against with alpha = .01
227            assertTrue("tighter means wider",
228                    regression.getSlopeConfidenceInterval() < regression.getSlopeConfidenceInterval(0.01));
229         
230            try {
231                regression.getSlopeConfidenceInterval(1);
232                fail("expecting IllegalArgumentException for alpha = 1");
233            } catch (IllegalArgumentException ex) {
234                // ignored
235            }  
236    
237        }
238    
239        public void testPerfect() throws Exception {
240            SimpleRegression regression = new SimpleRegression();
241            int n = 100;
242            for (int i = 0; i < n; i++) {
243                regression.addData(((double) i) / (n - 1), i);
244            }
245            assertEquals(0.0, regression.getSignificance(), 1.0e-5);
246            assertTrue(regression.getSlope() > 0.0);
247            assertTrue(regression.getSumSquaredErrors() >= 0.0);
248        }
249    
250        public void testPerfectNegative() throws Exception {
251            SimpleRegression regression = new SimpleRegression();
252            int n = 100;
253            for (int i = 0; i < n; i++) {
254                regression.addData(- ((double) i) / (n - 1), i);
255            }
256       
257            assertEquals(0.0, regression.getSignificance(), 1.0e-5);
258            assertTrue(regression.getSlope() < 0.0);   
259        }
260    
261        public void testRandom() throws Exception {
262            SimpleRegression regression = new SimpleRegression();
263            Random random = new Random(1);
264            int n = 100;
265            for (int i = 0; i < n; i++) {
266                regression.addData(((double) i) / (n - 1), random.nextDouble());
267            }
268    
269            assertTrue( 0.0 < regression.getSignificance()
270                        && regression.getSignificance() < 1.0);       
271        }
272        
273        
274        // Jira MATH-85 = Bugzilla 39432
275        public void testSSENonNegative() {
276            double[] y = { 8915.102, 8919.302, 8923.502 };
277            double[] x = { 1.107178495E2, 1.107264895E2, 1.107351295E2 };
278            SimpleRegression reg = new SimpleRegression();
279            for (int i = 0; i < x.length; i++) {
280                reg.addData(x[i], y[i]);
281            }
282            assertTrue(reg.getSumSquaredErrors() >= 0.0);
283        } 
284        
285        // Test remove X,Y (single observation)
286        public void testRemoveXY() throws Exception {
287            // Create regression with inference data then remove to test
288            SimpleRegression regression = new SimpleRegression();
289            regression.addData(infData);
290            regression.removeData(removeX, removeY);
291            regression.addData(removeX, removeY);
292            // Use the inference assertions to make sure that everything worked
293            assertEquals("slope std err", 0.011448491,
294                    regression.getSlopeStdErr(), 1E-10);
295            assertEquals("std err intercept", 0.286036932,
296                    regression.getInterceptStdErr(),1E-8);
297            assertEquals("significance", 4.596e-07,
298                    regression.getSignificance(),1E-8);    
299            assertEquals("slope conf interval half-width", 0.0270713794287, 
300                    regression.getSlopeConfidenceInterval(),1E-8);
301         }
302        
303        
304        // Test remove single observation in array
305        public void testRemoveSingle() throws Exception {
306            // Create regression with inference data then remove to test
307            SimpleRegression regression = new SimpleRegression();
308            regression.addData(infData);
309            regression.removeData(removeSingle);
310            regression.addData(removeSingle);
311            // Use the inference assertions to make sure that everything worked
312            assertEquals("slope std err", 0.011448491,
313                    regression.getSlopeStdErr(), 1E-10);
314            assertEquals("std err intercept", 0.286036932,
315                    regression.getInterceptStdErr(),1E-8);
316            assertEquals("significance", 4.596e-07,
317                    regression.getSignificance(),1E-8);    
318            assertEquals("slope conf interval half-width", 0.0270713794287, 
319                    regression.getSlopeConfidenceInterval(),1E-8);
320         }
321        
322        // Test remove multiple observations
323        public void testRemoveMultiple() throws Exception {
324            // Create regression with inference data then remove to test
325            SimpleRegression regression = new SimpleRegression();
326            regression.addData(infData);
327            regression.removeData(removeMultiple);
328            regression.addData(removeMultiple);
329            // Use the inference assertions to make sure that everything worked
330            assertEquals("slope std err", 0.011448491,
331                    regression.getSlopeStdErr(), 1E-10);
332            assertEquals("std err intercept", 0.286036932,
333                    regression.getInterceptStdErr(),1E-8);
334            assertEquals("significance", 4.596e-07,
335                    regression.getSignificance(),1E-8);    
336            assertEquals("slope conf interval half-width", 0.0270713794287, 
337                    regression.getSlopeConfidenceInterval(),1E-8);
338         }
339        
340        // Remove observation when empty
341        public void testRemoveObsFromEmpty() {
342            SimpleRegression regression = new SimpleRegression();
343            regression.removeData(removeX, removeY);
344            assertEquals(regression.getN(), 0);
345        }
346        
347        // Remove single observation to empty
348        public void testRemoveObsFromSingle() {
349            SimpleRegression regression = new SimpleRegression();
350            regression.addData(removeX, removeY);
351            regression.removeData(removeX, removeY);
352            assertEquals(regression.getN(), 0);
353        }
354        
355        // Remove multiple observations to empty
356        public void testRemoveMultipleToEmpty() {
357            SimpleRegression regression = new SimpleRegression();
358            regression.addData(removeMultiple);
359            regression.removeData(removeMultiple);
360            assertEquals(regression.getN(), 0);
361        }
362        
363        // Remove multiple observations past empty (i.e. size of array > n)
364        public void testRemoveMultiplePastEmpty() {
365            SimpleRegression regression = new SimpleRegression();
366            regression.addData(removeX, removeY);
367            regression.removeData(removeMultiple);
368            assertEquals(regression.getN(), 0);
369        }
370    }