001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.stat.correlation; 018 019 import org.apache.commons.math.TestUtils; 020 import org.apache.commons.math.distribution.TDistribution; 021 import org.apache.commons.math.distribution.TDistributionImpl; 022 import org.apache.commons.math.linear.RealMatrix; 023 import org.apache.commons.math.linear.BlockRealMatrix; 024 025 import junit.framework.TestCase; 026 027 public class PearsonsCorrelationTest extends TestCase { 028 029 protected final double[] longleyData = new double[] { 030 60323,83.0,234289,2356,1590,107608,1947, 031 61122,88.5,259426,2325,1456,108632,1948, 032 60171,88.2,258054,3682,1616,109773,1949, 033 61187,89.5,284599,3351,1650,110929,1950, 034 63221,96.2,328975,2099,3099,112075,1951, 035 63639,98.1,346999,1932,3594,113270,1952, 036 64989,99.0,365385,1870,3547,115094,1953, 037 63761,100.0,363112,3578,3350,116219,1954, 038 66019,101.2,397469,2904,3048,117388,1955, 039 67857,104.6,419180,2822,2857,118734,1956, 040 68169,108.4,442769,2936,2798,120445,1957, 041 66513,110.8,444546,4681,2637,121950,1958, 042 68655,112.6,482704,3813,2552,123366,1959, 043 69564,114.2,502601,3931,2514,125368,1960, 044 69331,115.7,518173,4806,2572,127852,1961, 045 70551,116.9,554894,4007,2827,130081,1962 046 }; 047 048 protected final double[] swissData = new double[] { 049 80.2,17.0,15,12,9.96, 050 83.1,45.1,6,9,84.84, 051 92.5,39.7,5,5,93.40, 052 85.8,36.5,12,7,33.77, 053 76.9,43.5,17,15,5.16, 054 76.1,35.3,9,7,90.57, 055 83.8,70.2,16,7,92.85, 056 92.4,67.8,14,8,97.16, 057 82.4,53.3,12,7,97.67, 058 82.9,45.2,16,13,91.38, 059 87.1,64.5,14,6,98.61, 060 64.1,62.0,21,12,8.52, 061 66.9,67.5,14,7,2.27, 062 68.9,60.7,19,12,4.43, 063 61.7,69.3,22,5,2.82, 064 68.3,72.6,18,2,24.20, 065 71.7,34.0,17,8,3.30, 066 55.7,19.4,26,28,12.11, 067 54.3,15.2,31,20,2.15, 068 65.1,73.0,19,9,2.84, 069 65.5,59.8,22,10,5.23, 070 65.0,55.1,14,3,4.52, 071 56.6,50.9,22,12,15.14, 072 57.4,54.1,20,6,4.20, 073 72.5,71.2,12,1,2.40, 074 74.2,58.1,14,8,5.23, 075 72.0,63.5,6,3,2.56, 076 60.5,60.8,16,10,7.72, 077 58.3,26.8,25,19,18.46, 078 65.4,49.5,15,8,6.10, 079 75.5,85.9,3,2,99.71, 080 69.3,84.9,7,6,99.68, 081 77.3,89.7,5,2,100.00, 082 70.5,78.2,12,6,98.96, 083 79.4,64.9,7,3,98.22, 084 65.0,75.9,9,9,99.06, 085 92.2,84.6,3,3,99.46, 086 79.3,63.1,13,13,96.83, 087 70.4,38.4,26,12,5.62, 088 65.7,7.7,29,11,13.79, 089 72.7,16.7,22,13,11.22, 090 64.4,17.6,35,32,16.92, 091 77.6,37.6,15,7,4.97, 092 67.6,18.7,25,7,8.65, 093 35.0,1.2,37,53,42.34, 094 44.7,46.6,16,29,50.43, 095 42.8,27.7,22,29,58.33 096 }; 097 098 099 /** 100 * Test Longley dataset against R. 101 */ 102 public void testLongly() throws Exception { 103 RealMatrix matrix = createRealMatrix(longleyData, 16, 7); 104 PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 105 RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix(); 106 double[] rData = new double[] { 107 1.000000000000000, 0.9708985250610560, 0.9835516111796693, 0.5024980838759942, 108 0.4573073999764817, 0.960390571594376, 0.9713294591921188, 109 0.970898525061056, 1.0000000000000000, 0.9915891780247822, 0.6206333925590966, 110 0.4647441876006747, 0.979163432977498, 0.9911491900672053, 111 0.983551611179669, 0.9915891780247822, 1.0000000000000000, 0.6042609398895580, 112 0.4464367918926265, 0.991090069458478, 0.9952734837647849, 113 0.502498083875994, 0.6206333925590966, 0.6042609398895580, 1.0000000000000000, 114 -0.1774206295018783, 0.686551516365312, 0.6682566045621746, 115 0.457307399976482, 0.4647441876006747, 0.4464367918926265, -0.1774206295018783, 116 1.0000000000000000, 0.364416267189032, 0.4172451498349454, 117 0.960390571594376, 0.9791634329774981, 0.9910900694584777, 0.6865515163653120, 118 0.3644162671890320, 1.000000000000000, 0.9939528462329257, 119 0.971329459192119, 0.9911491900672053, 0.9952734837647849, 0.6682566045621746, 120 0.4172451498349454, 0.993952846232926, 1.0000000000000000 121 }; 122 TestUtils.assertEquals("correlation matrix", createRealMatrix(rData, 7, 7), correlationMatrix, 10E-15); 123 124 double[] rPvalues = new double[] { 125 4.38904690369668e-10, 126 8.36353208910623e-12, 7.8159700933611e-14, 127 0.0472894097790304, 0.01030636128354301, 0.01316878049026582, 128 0.0749178049642416, 0.06971758330341182, 0.0830166169296545, 0.510948586323452, 129 3.693245043123738e-09, 4.327782576751815e-11, 1.167954621905665e-13, 0.00331028281967516, 0.1652293725106684, 130 3.95834476307755e-10, 1.114663916723657e-13, 1.332267629550188e-15, 0.00466039138541463, 0.1078477071581498, 7.771561172376096e-15 131 }; 132 RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 7); 133 fillUpper(rPMatrix, 0d); 134 TestUtils.assertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(), 10E-15); 135 } 136 137 /** 138 * Test R Swiss fertility dataset against R. 139 */ 140 public void testSwissFertility() throws Exception { 141 RealMatrix matrix = createRealMatrix(swissData, 47, 5); 142 PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 143 RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix(); 144 double[] rData = new double[] { 145 1.0000000000000000, 0.3530791836199747, -0.6458827064572875, -0.6637888570350691, 0.4636847006517939, 146 0.3530791836199747, 1.0000000000000000,-0.6865422086171366, -0.6395225189483201, 0.4010950530487398, 147 -0.6458827064572875, -0.6865422086171366, 1.0000000000000000, 0.6984152962884830, -0.5727418060641666, 148 -0.6637888570350691, -0.6395225189483201, 0.6984152962884830, 1.0000000000000000, -0.1538589170909148, 149 0.4636847006517939, 0.4010950530487398, -0.5727418060641666, -0.1538589170909148, 1.0000000000000000 150 }; 151 TestUtils.assertEquals("correlation matrix", createRealMatrix(rData, 5, 5), correlationMatrix, 10E-15); 152 153 double[] rPvalues = new double[] { 154 0.01491720061472623, 155 9.45043734069043e-07, 9.95151527133974e-08, 156 3.658616965962355e-07, 1.304590105694471e-06, 4.811397236181847e-08, 157 0.001028523190118147, 0.005204433539191644, 2.588307925380906e-05, 0.301807756132683 158 }; 159 RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 5); 160 fillUpper(rPMatrix, 0d); 161 TestUtils.assertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(), 10E-15); 162 } 163 164 /** 165 * Constant column 166 */ 167 public void testConstant() { 168 double[] noVariance = new double[] {1, 1, 1, 1}; 169 double[] values = new double[] {1, 2, 3, 4}; 170 assertTrue(Double.isNaN(new PearsonsCorrelation().correlation(noVariance, values))); 171 } 172 173 174 /** 175 * Insufficient data 176 */ 177 178 public void testInsufficientData() { 179 double[] one = new double[] {1}; 180 double[] two = new double[] {2}; 181 try { 182 new PearsonsCorrelation().correlation(one, two); 183 fail("Expecting IllegalArgumentException"); 184 } catch (IllegalArgumentException ex) { 185 // Expected 186 } 187 RealMatrix matrix = new BlockRealMatrix(new double[][] {{0},{1}}); 188 try { 189 new PearsonsCorrelation(matrix); 190 fail("Expecting IllegalArgumentException"); 191 } catch (IllegalArgumentException ex) { 192 // Expected 193 } 194 } 195 196 /** 197 * Verify that direct t-tests using standard error estimates are consistent 198 * with reported p-values 199 */ 200 public void testStdErrorConsistency() throws Exception { 201 TDistribution tDistribution = new TDistributionImpl(45); 202 RealMatrix matrix = createRealMatrix(swissData, 47, 5); 203 PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 204 RealMatrix rValues = corrInstance.getCorrelationMatrix(); 205 RealMatrix pValues = corrInstance.getCorrelationPValues(); 206 RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors(); 207 for (int i = 0; i < 5; i++) { 208 for (int j = 0; j < i; j++) { 209 double t = Math.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j); 210 double p = 2 * (1 - tDistribution.cumulativeProbability(t)); 211 assertEquals(p, pValues.getEntry(i, j), 10E-15); 212 } 213 } 214 } 215 216 /** 217 * Verify that creating correlation from covariance gives same results as 218 * direct computation from the original matrix 219 */ 220 public void testCovarianceConsistency() throws Exception { 221 RealMatrix matrix = createRealMatrix(longleyData, 16, 7); 222 PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 223 Covariance covInstance = new Covariance(matrix); 224 PearsonsCorrelation corrFromCovInstance = new PearsonsCorrelation(covInstance); 225 TestUtils.assertEquals("correlation values", corrInstance.getCorrelationMatrix(), 226 corrFromCovInstance.getCorrelationMatrix(), 10E-15); 227 TestUtils.assertEquals("p values", corrInstance.getCorrelationPValues(), 228 corrFromCovInstance.getCorrelationPValues(), 10E-15); 229 TestUtils.assertEquals("standard errors", corrInstance.getCorrelationStandardErrors(), 230 corrFromCovInstance.getCorrelationStandardErrors(), 10E-15); 231 232 PearsonsCorrelation corrFromCovInstance2 = 233 new PearsonsCorrelation(covInstance.getCovarianceMatrix(), 16); 234 TestUtils.assertEquals("correlation values", corrInstance.getCorrelationMatrix(), 235 corrFromCovInstance2.getCorrelationMatrix(), 10E-15); 236 TestUtils.assertEquals("p values", corrInstance.getCorrelationPValues(), 237 corrFromCovInstance2.getCorrelationPValues(), 10E-15); 238 TestUtils.assertEquals("standard errors", corrInstance.getCorrelationStandardErrors(), 239 corrFromCovInstance2.getCorrelationStandardErrors(), 10E-15); 240 } 241 242 243 public void testConsistency() { 244 RealMatrix matrix = createRealMatrix(longleyData, 16, 7); 245 PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 246 double[][] data = matrix.getData(); 247 double[] x = matrix.getColumn(0); 248 double[] y = matrix.getColumn(1); 249 assertEquals(new PearsonsCorrelation().correlation(x, y), 250 corrInstance.getCorrelationMatrix().getEntry(0, 1), Double.MIN_VALUE); 251 TestUtils.assertEquals("Correlation matrix", corrInstance.getCorrelationMatrix(), 252 new PearsonsCorrelation().computeCorrelationMatrix(data), Double.MIN_VALUE); 253 } 254 255 protected RealMatrix createRealMatrix(double[] data, int nRows, int nCols) { 256 double[][] matrixData = new double[nRows][nCols]; 257 int ptr = 0; 258 for (int i = 0; i < nRows; i++) { 259 System.arraycopy(data, ptr, matrixData[i], 0, nCols); 260 ptr += nCols; 261 } 262 return new BlockRealMatrix(matrixData); 263 } 264 265 protected RealMatrix createLowerTriangularRealMatrix(double[] data, int dimension) { 266 int ptr = 0; 267 RealMatrix result = new BlockRealMatrix(dimension, dimension); 268 for (int i = 1; i < dimension; i++) { 269 for (int j = 0; j < i; j++) { 270 result.setEntry(i, j, data[ptr]); 271 ptr++; 272 } 273 } 274 return result; 275 } 276 277 protected void fillUpper(RealMatrix matrix, double diagonalValue) { 278 int dimension = matrix.getColumnDimension(); 279 for (int i = 0; i < dimension; i++) { 280 matrix.setEntry(i, i, diagonalValue); 281 for (int j = i+1; j < dimension; j++) { 282 matrix.setEntry(i, j, matrix.getEntry(j, i)); 283 } 284 } 285 } 286 }