Skip to content

Commit 4107cb5

Browse files
committed
Added support for Spearman's rank correlation.
JIRA: MATH-136 Thanks to John Gant git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@778085 13f79535-47bb-0310-9956-ffa450edef68
1 parent daa2078 commit 4107cb5

5 files changed

Lines changed: 345 additions & 13 deletions

File tree

pom.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,9 @@
126126
<contributor>
127127
<name>Ted Dunning</name>
128128
</contributor>
129+
<contributor>
130+
<name>John Gant</name>
131+
</contributor>
129132
<contributor>
130133
<name>Ken Geis</name>
131134
</contributor>
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.math.stat.correlation;
19+
20+
import org.apache.commons.math.MathRuntimeException;
21+
import org.apache.commons.math.linear.DenseRealMatrix;
22+
import org.apache.commons.math.linear.RealMatrix;
23+
import org.apache.commons.math.stat.ranking.NaturalRanking;
24+
import org.apache.commons.math.stat.ranking.RankingAlgorithm;
25+
26+
/**
27+
* <p>Spearman's rank correlation. This implementation performs a rank
28+
* transformation on the input data and then computes {@link PearsonsCorrelation}
29+
* on the ranked data.</p>
30+
*
31+
* <p>By default, ranks are computed using {@link NaturalRanking} with default
32+
* strategies for handling NaNs and ties in the data (NaNs maximal, ties averaged).
33+
* The ranking algorithm can be set using a constructor argument.</p>
34+
*
35+
* @since 2.0
36+
* @version $Revision:$ $Date:$
37+
*/
38+
39+
public class SpearmansCorrelation {
40+
41+
/** Input data */
42+
private final RealMatrix data;
43+
44+
/** Ranking algorithm */
45+
private final RankingAlgorithm rankingAlgorithm;
46+
47+
/** Rank correlation */
48+
private final PearsonsCorrelation rankCorrelation;
49+
50+
/**
51+
* Create a SpearmansCorrelation with the given input data matrix
52+
* and ranking algorithm.
53+
*
54+
* @param dataMatrix matrix of data with columns representing
55+
* variables to correlate
56+
* @param rankingAlgorithm ranking algorithm
57+
*/
58+
public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm) {
59+
this.data = dataMatrix.copy();
60+
this.rankingAlgorithm = rankingAlgorithm;
61+
rankTransform(data);
62+
rankCorrelation = new PearsonsCorrelation(data);
63+
}
64+
65+
/**
66+
* Create a SpearmansCorrelation from the given data matrix.
67+
*
68+
* @param dataMatrix matrix of data with columns representing
69+
* variables to correlate
70+
*/
71+
public SpearmansCorrelation(final RealMatrix dataMatrix) {
72+
this(dataMatrix, new NaturalRanking());
73+
}
74+
75+
/**
76+
* Create a SpearmansCorrelation without data.
77+
*/
78+
public SpearmansCorrelation() {
79+
data = null;
80+
this.rankingAlgorithm = new NaturalRanking();
81+
rankCorrelation = null;
82+
}
83+
84+
/**
85+
* Calculate the Spearman Rank Correlation Matrix.
86+
*
87+
* @return Spearman Rank Correlation Matrix
88+
*/
89+
public RealMatrix getCorrelationMatrix() {
90+
return rankCorrelation.getCorrelationMatrix();
91+
}
92+
93+
/**
94+
* Returns a {@link PearsonsCorrelation} instance constructed from the
95+
* ranked input data. That is,
96+
* <code>new SpearmansCorrelation(matrix).getRankCorrelation()</code>
97+
* is equivalent to
98+
* <code>new PearsonsCorrelation(rankTransform(matrix))</code> where
99+
* <code>rankTransform(matrix)</code> is the result of applying the
100+
* configured <code>RankingAlgorithm</code> to each of the columns of
101+
* <code>matrix.</code>
102+
*
103+
* @return PearsonsCorrelation among ranked column data
104+
*/
105+
public PearsonsCorrelation getRankCorrelation() {
106+
return rankCorrelation;
107+
}
108+
109+
/**
110+
* Computes the Spearman's rank correlation matrix for the columns of the
111+
* input matrix.
112+
*
113+
* @param matrix matrix with columns representing variables to correlate
114+
* @return correlation matrix
115+
*/
116+
public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
117+
RealMatrix matrixCopy = matrix.copy();
118+
rankTransform(matrixCopy);
119+
return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy);
120+
}
121+
122+
/**
123+
* Computes the Spearman's rank correlation matrix for the columns of the
124+
* input rectangular array. The columns of the array represent values
125+
* of variables to be correlated.
126+
*
127+
* @param data matrix with columns representing variables to correlate
128+
* @return correlation matrix
129+
*/
130+
public RealMatrix computeCorrelationMatrix(double[][] data) {
131+
return computeCorrelationMatrix(new DenseRealMatrix(data));
132+
}
133+
134+
/**
135+
* Computes the Spearman's rank correlation coefficient between the two arrays.
136+
*
137+
* </p>Throws IllegalArgumentException if the arrays do not have the same length
138+
* or their common length is less than 2</p>
139+
*
140+
* @param xArray first data array
141+
* @param yArray second data array
142+
* @return Returns Spearman's rank correlation coefficient for the two arrays
143+
* @throws IllegalArgumentException if the arrays lengths do not match or
144+
* there is insufficient data
145+
*/
146+
public double correlation(final double[] xArray, final double[] yArray)
147+
throws IllegalArgumentException {
148+
if (xArray.length == yArray.length && xArray.length > 1) {
149+
return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(xArray),
150+
rankingAlgorithm.rank(yArray));
151+
}
152+
else {
153+
throw MathRuntimeException.createIllegalArgumentException(
154+
"invalid array dimensions. xArray has size {0}; yArray has {1} elements",
155+
xArray.length, yArray.length);
156+
}
157+
}
158+
159+
/**
160+
* Applies rank transform to each of the columns of <code>matrix</code>
161+
* using the current <code>rankingAlgorithm</code>
162+
*
163+
* @param matrix matrix to transform
164+
*/
165+
private void rankTransform(RealMatrix matrix) {
166+
for (int i = 0; i < matrix.getColumnDimension(); i++) {
167+
matrix.setColumn(i, rankingAlgorithm.rank(matrix.getColumn(i)));
168+
}
169+
}
170+
}

src/site/xdoc/changes.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ The <action> type attribute can be add,update,fix,remove.
3939
</properties>
4040
<body>
4141
<release version="2.0" date="TBD" description="TBD">
42+
<action dev="psteitz" type="add" issue="MATH-136" due=to="John Gant">
43+
Added Spearman's rank correlation (SpearmansCorrelation).
44+
</action>
4245
<action dev="psteitz" type="add">
4346
Added support for rank transformations.
4447
</action>

src/test/R/correlationTestCases

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,22 @@ tol <- 1E-15 # error tolerance for tests
2929
source("testFunctions") # utility test functions
3030
options(digits=16) # override number of digits displayed
3131

32-
# function to verify correlation computations
33-
verifyCorrelation <- function(matrix, expectedCorrelation, name) {
32+
# Verify Pearson's correlation
33+
verifyPearsonsCorrelation <- function(matrix, expectedCorrelation, name) {
3434
correlation <- cor(matrix)
35-
output <- c("Correlation matrix test dataset = ", name)
36-
if (assertEquals(expectedCorrelation, correlation,tol,"Correlations")) {
35+
output <- c("Pearson's Correlation matrix test dataset = ", name)
36+
if (assertEquals(expectedCorrelation, correlation,tol,"Pearson's Correlations")) {
37+
displayPadded(output, SUCCEEDED, WIDTH)
38+
} else {
39+
displayPadded(output, FAILED, WIDTH)
40+
}
41+
}
42+
43+
# Verify Spearman's correlation
44+
verifySpearmansCorrelation <- function(matrix, expectedCorrelation, name) {
45+
correlation <- cor(matrix, method="spearman")
46+
output <- c("Spearman's Correlation matrix test dataset = ", name)
47+
if (assertEquals(expectedCorrelation, correlation,tol,"Spearman's Correlations")) {
3748
displayPadded(output, SUCCEEDED, WIDTH)
3849
} else {
3950
displayPadded(output, FAILED, WIDTH)
@@ -94,6 +105,7 @@ longley <- matrix(c(60323,83.0,234289,2356,1590,107608,1947,
94105
70551,116.9,554894,4007,2827,130081,1962),
95106
nrow = 16, ncol = 7, byrow = TRUE)
96107

108+
# Pearson's
97109
expectedCorrelation <- matrix(c(
98110
1.000000000000000, 0.9708985250610560, 0.9835516111796693, 0.5024980838759942,
99111
0.4573073999764817, 0.960390571594376, 0.9713294591921188,
@@ -110,7 +122,7 @@ expectedCorrelation <- matrix(c(
110122
0.971329459192119, 0.9911491900672053, 0.9952734837647849, 0.6682566045621746,
111123
0.4172451498349454, 0.993952846232926, 1.0000000000000000),
112124
nrow = 7, ncol = 7, byrow = TRUE)
113-
verifyCorrelation(longley, expectedCorrelation, "longley")
125+
verifyPearsonsCorrelation(longley, expectedCorrelation, "longley")
114126

115127
expectedPValues <- c(
116128
4.38904690369668e-10,
@@ -121,6 +133,19 @@ expectedCorrelation <- matrix(c(
121133
3.95834476307755e-10, 1.114663916723657e-13, 1.332267629550188e-15, 0.00466039138541463, 0.1078477071581498, 7.771561172376096e-15)
122134
verifyPValues(longley, expectedPValues, "longley")
123135

136+
# Spearman's
137+
expectedCorrelation <- matrix(c(
138+
1, 0.982352941176471, 0.985294117647059, 0.564705882352941, 0.2264705882352941, 0.976470588235294,
139+
0.976470588235294, 0.982352941176471, 1, 0.997058823529412, 0.664705882352941, 0.2205882352941176,
140+
0.997058823529412, 0.997058823529412, 0.985294117647059, 0.997058823529412, 1, 0.638235294117647,
141+
0.2235294117647059, 0.9941176470588236, 0.9941176470588236, 0.564705882352941, 0.664705882352941,
142+
0.638235294117647, 1, -0.3411764705882353, 0.685294117647059, 0.685294117647059, 0.2264705882352941,
143+
0.2205882352941176, 0.2235294117647059, -0.3411764705882353, 1, 0.2264705882352941, 0.2264705882352941,
144+
0.976470588235294, 0.997058823529412, 0.9941176470588236, 0.685294117647059, 0.2264705882352941, 1, 1,
145+
0.976470588235294, 0.997058823529412, 0.9941176470588236, 0.685294117647059, 0.2264705882352941, 1, 1),
146+
nrow = 7, ncol = 7, byrow = TRUE)
147+
verifySpearmansCorrelation(longley, expectedCorrelation, "longley")
148+
124149
# Swiss Fertility
125150

126151
fertility <- matrix(c(80.2,17.0,15,12,9.96,
@@ -171,15 +196,14 @@ expectedCorrelation <- matrix(c(
171196
44.7,46.6,16,29,50.43,
172197
42.8,27.7,22,29,58.33),
173198
nrow = 47, ncol = 5, byrow = TRUE)
174-
175-
expectedCorrelation <- matrix(c(
176-
1.0000000000000000, 0.3530791836199747, -0.6458827064572875, -0.6637888570350691, 0.4636847006517939,
177-
0.3530791836199747, 1.0000000000000000,-0.6865422086171366, -0.6395225189483201, 0.4010950530487398,
178-
-0.6458827064572875, -0.6865422086171366, 1.0000000000000000, 0.6984152962884830, -0.5727418060641666,
179-
-0.6637888570350691, -0.6395225189483201, 0.6984152962884830, 1.0000000000000000, -0.1538589170909148,
180-
0.4636847006517939, 0.4010950530487398, -0.5727418060641666, -0.1538589170909148, 1.0000000000000000),
199+
expectedCorrelation <- matrix(c(
200+
1, 0.3530791836199747, -0.6458827064572875, -0.663788857035069, 0.463684700651794,
201+
0.3530791836199747, 1, -0.6865422086171366, -0.63952251894832, 0.4010950530487398,
202+
-0.6458827064572875, -0.6865422086171366, 1, 0.698415296288483, -0.572741806064167,
203+
-0.663788857035069, -0.63952251894832, 0.698415296288483, 1, -0.1538589170909148,
204+
0.463684700651794, 0.4010950530487398, -0.572741806064167, -0.1538589170909148, 1),
181205
nrow = 5, ncol = 5, byrow = TRUE)
182-
verifyCorrelation(fertility, expectedCorrelation, "swiss fertility")
206+
verifyPearsonsCorrelation(fertility, expectedCorrelation, "swiss fertility")
183207

184208
expectedPValues <- c(
185209
0.01491720061472623,
@@ -188,4 +212,14 @@ expectedPValues <- c(
188212
0.001028523190118147, 0.005204433539191644, 2.588307925380906e-05, 0.301807756132683)
189213
verifyPValues(fertility, expectedPValues, "swiss fertility")
190214

215+
# Spearman's
216+
expectedCorrelation <- matrix(c(
217+
1, 0.2426642769364176, -0.660902996352354, -0.443257690360988, 0.4136455623012432,
218+
0.2426642769364176, 1, -0.598859938748963, -0.650463814145816, 0.2886878090882852,
219+
-0.660902996352354, -0.598859938748963, 1, 0.674603831406147, -0.4750575257171745,
220+
-0.443257690360988, -0.650463814145816, 0.674603831406147, 1, -0.1444163088302244,
221+
0.4136455623012432, 0.2886878090882852, -0.4750575257171745, -0.1444163088302244, 1),
222+
nrow = 5, ncol = 5, byrow = TRUE)
223+
verifySpearmansCorrelation(fertility, expectedCorrelation, "swiss fertility")
224+
191225
displayDashes(WIDTH)

0 commit comments

Comments
 (0)