001 /* =========================================================== 002 * JFreeChart : a free chart library for the Java(tm) platform 003 * =========================================================== 004 * 005 * (C) Copyright 2000-2005, by Object Refinery Limited and Contributors. 006 * 007 * Project Info: http://www.jfree.org/jfreechart/index.html 008 * 009 * This library is free software; you can redistribute it and/or modify it 010 * under the terms of the GNU Lesser General Public License as published by 011 * the Free Software Foundation; either version 2.1 of the License, or 012 * (at your option) any later version. 013 * 014 * This library is distributed in the hope that it will be useful, but 015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 017 * License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this library; if not, write to the Free Software 021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 022 * USA. 023 * 024 * [Java is a trademark or registered trademark of Sun Microsystems, Inc. 025 * in the United States and other countries.] 026 * 027 * --------------- 028 * Statistics.java 029 * --------------- 030 * (C) Copyright 2000-2005, by Matthew Wright and Contributors. 031 * 032 * Original Author: Matthew Wright; 033 * Contributor(s): David Gilbert (for Object Refinery Limited); 034 * 035 * $Id: Statistics.java,v 1.5.2.1 2005/10/25 21:34:46 mungady Exp $ 036 * 037 * Changes (from 08-Nov-2001) 038 * -------------------------- 039 * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG); 040 * Moved from JFreeChart to package com.jrefinery.data.* in 041 * JCommon class library (DG); 042 * 24-Jun-2002 : Removed unnecessary local variable (DG); 043 * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG); 044 * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG); 045 * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG); 046 * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0 047 * release (DG); 048 * 049 */ 050 051 package org.jfree.data.statistics; 052 053 import java.util.ArrayList; 054 import java.util.Collection; 055 import java.util.Collections; 056 import java.util.Iterator; 057 import java.util.List; 058 059 /** 060 * A utility class that provides some simple statistical functions. 061 */ 062 public abstract class Statistics { 063 064 /** 065 * Returns the mean of an array of numbers. 066 * 067 * @param values the values (<code>null</code> permitted, returns 068 * <code>Double.NaN</code>). 069 * 070 * @return The mean. 071 */ 072 public static double calculateMean(Number[] values) { 073 double result = Double.NaN; 074 if (values != null && values.length > 0) { 075 double sum = 0.0; 076 int counter = 0; 077 for (; counter < values.length; counter++) { 078 sum = sum + values[counter].doubleValue(); 079 } 080 result = (sum / counter); 081 } 082 return result; 083 } 084 085 /** 086 * Returns the mean of a collection of <code>Number</code> objects. 087 * 088 * @param values the values (<code>null</code> permitted, returns 089 * <code>Double.NaN</code>). 090 * 091 * @return The mean. 092 */ 093 public static double calculateMean(Collection values) { 094 095 double result = Double.NaN; 096 int count = 0; 097 double total = 0.0; 098 Iterator iterator = values.iterator(); 099 while (iterator.hasNext()) { 100 Object object = iterator.next(); 101 if (object != null && object instanceof Number) { 102 Number number = (Number) object; 103 total = total + number.doubleValue(); 104 count = count + 1; 105 } 106 } 107 if (count > 0) { 108 result = total / count; 109 } 110 return result; 111 112 } 113 114 /** 115 * Calculates the median for a list of values (<code>Number</code> objects). 116 * The list of values will be sorted first. 117 * 118 * @param values the values. 119 * 120 * @return The median. 121 */ 122 public static double calculateMedian(List values) { 123 return calculateMedian(values, true); 124 } 125 126 /** 127 * Calculates the median for a list of values (<code>Number</code> objects) 128 * that are assumed to be in ascending order. 129 * 130 * @param values the values. 131 * @param copyAndSort a flag that controls whether the list of values is 132 * copied and sorted. 133 * 134 * @return The median. 135 */ 136 public static double calculateMedian(List values, boolean copyAndSort) { 137 138 double result = Double.NaN; 139 if (values != null) { 140 if (copyAndSort) { 141 int itemCount = values.size(); 142 List copy = new ArrayList(itemCount); 143 for (int i = 0; i < itemCount; i++) { 144 copy.add(i, values.get(i)); 145 } 146 Collections.sort(copy); 147 values = copy; 148 } 149 int count = values.size(); 150 if (count > 0) { 151 if (count % 2 == 1) { 152 if (count > 1) { 153 Number value = (Number) values.get((count - 1) / 2); 154 result = value.doubleValue(); 155 } 156 else { 157 Number value = (Number) values.get(0); 158 result = value.doubleValue(); 159 } 160 } 161 else { 162 Number value1 = (Number) values.get(count / 2 - 1); 163 Number value2 = (Number) values.get(count / 2); 164 result = (value1.doubleValue() + value2.doubleValue()) 165 / 2.0; 166 } 167 } 168 } 169 return result; 170 } 171 172 /** 173 * Calculates the median for a sublist within a list of values 174 * (<code>Number</code> objects). 175 * 176 * @param values the values (in any order). 177 * @param start the start index. 178 * @param end the end index. 179 * 180 * @return The median. 181 */ 182 public static double calculateMedian(List values, int start, int end) { 183 return calculateMedian(values, start, end, true); 184 } 185 186 /** 187 * Calculates the median for a sublist within a list of values 188 * (<code>Number</code> objects). The entire list will be sorted if the 189 * <code>ascending</code< argument is <code>false</code>. 190 * 191 * @param values the values. 192 * @param start the start index. 193 * @param end the end index. 194 * @param copyAndSort a flag that that controls whether the list of values 195 * is copied and sorted. 196 * 197 * @return The median. 198 */ 199 public static double calculateMedian(List values, int start, int end, 200 boolean copyAndSort) { 201 202 double result = Double.NaN; 203 if (copyAndSort) { 204 List working = new ArrayList(end - start + 1); 205 for (int i = start; i <= end; i++) { 206 working.add(values.get(i)); 207 } 208 Collections.sort(working); 209 result = calculateMedian(working, false); 210 } 211 else { 212 int count = end - start + 1; 213 if (count > 0) { 214 if (count % 2 == 1) { 215 if (count > 1) { 216 Number value 217 = (Number) values.get(start + (count - 1) / 2); 218 result = value.doubleValue(); 219 } 220 else { 221 Number value = (Number) values.get(start); 222 result = value.doubleValue(); 223 } 224 } 225 else { 226 Number value1 = (Number) values.get(start + count / 2 - 1); 227 Number value2 = (Number) values.get(start + count / 2); 228 result 229 = (value1.doubleValue() + value2.doubleValue()) / 2.0; 230 } 231 } 232 } 233 return result; 234 235 } 236 237 /** 238 * Returns the standard deviation of a set of numbers. 239 * 240 * @param data the data. 241 * 242 * @return The standard deviation of a set of numbers. 243 */ 244 public static double getStdDev(Number[] data) { 245 double avg = calculateMean(data); 246 double sum = 0.0; 247 248 for (int counter = 0; counter < data.length; counter++) { 249 double diff = data[counter].doubleValue() - avg; 250 sum = sum + diff * diff; 251 } 252 return Math.sqrt(sum / (data.length - 1)); 253 } 254 255 /** 256 * Fits a straight line to a set of (x, y) data, returning the slope and 257 * intercept. 258 * 259 * @param xData the x-data. 260 * @param yData the y-data. 261 * 262 * @return A double array with the intercept in [0] and the slope in [1]. 263 */ 264 public static double[] getLinearFit(Number[] xData, Number[] yData) { 265 266 // check arguments... 267 if (xData.length != yData.length) { 268 throw new IllegalArgumentException( 269 "Statistics.getLinearFit(): array lengths must be equal."); 270 } 271 272 double[] result = new double[2]; 273 // slope 274 result[1] = getSlope(xData, yData); 275 // intercept 276 result[0] = calculateMean(yData) - result[1] * calculateMean(xData); 277 278 return result; 279 280 } 281 282 /** 283 * Finds the slope of a regression line using least squares. 284 * 285 * @param xData an array of Numbers (the x values). 286 * @param yData an array of Numbers (the y values). 287 * 288 * @return The slope. 289 */ 290 public static double getSlope(Number[] xData, Number[] yData) { 291 292 // check arguments... 293 if (xData.length != yData.length) { 294 throw new IllegalArgumentException("Array lengths must be equal."); 295 } 296 297 // ********* stat function for linear slope ******** 298 // y = a + bx 299 // a = ybar - b * xbar 300 // sum(x * y) - (sum (x) * sum(y)) / n 301 // b = ------------------------------------ 302 // sum (x^2) - (sum(x)^2 / n 303 // ************************************************* 304 305 // sum of x, x^2, x * y, y 306 double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0; 307 int counter; 308 for (counter = 0; counter < xData.length; counter++) { 309 sx = sx + xData[counter].doubleValue(); 310 sxx = sxx + Math.pow(xData[counter].doubleValue(), 2); 311 sxy = sxy + yData[counter].doubleValue() 312 * xData[counter].doubleValue(); 313 sy = sy + yData[counter].doubleValue(); 314 } 315 return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter); 316 317 } 318 319 /** 320 * Calculates the correlation between two datasets. Both arrays should 321 * contain the same number of items. Null values are treated as zero. 322 * <P> 323 * Information about the correlation calculation was obtained from: 324 * 325 * http://trochim.human.cornell.edu/kb/statcorr.htm 326 * 327 * @param data1 the first dataset. 328 * @param data2 the second dataset. 329 * 330 * @return The correlation. 331 */ 332 public static double getCorrelation(Number[] data1, Number[] data2) { 333 if (data1 == null) { 334 throw new IllegalArgumentException("Null 'data1' argument."); 335 } 336 if (data2 == null) { 337 throw new IllegalArgumentException("Null 'data2' argument."); 338 } 339 if (data1.length != data2.length) { 340 throw new IllegalArgumentException( 341 "'data1' and 'data2' arrays must have same length." 342 ); 343 } 344 int n = data1.length; 345 double sumX = 0.0; 346 double sumY = 0.0; 347 double sumX2 = 0.0; 348 double sumY2 = 0.0; 349 double sumXY = 0.0; 350 for (int i = 0; i < n; i++) { 351 double x = 0.0; 352 if (data1[i] != null) { 353 x = data1[i].doubleValue(); 354 } 355 double y = 0.0; 356 if (data2[i] != null) { 357 y = data2[i].doubleValue(); 358 } 359 sumX = sumX + x; 360 sumY = sumY + y; 361 sumXY = sumXY + (x * y); 362 sumX2 = sumX2 + (x * x); 363 sumY2 = sumY2 + (y * y); 364 } 365 return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX) 366 * (n * sumY2 - sumY * sumY), 0.5); 367 } 368 369 /** 370 * Returns a data set for a moving average on the data set passed in. 371 * 372 * @param xData an array of the x data. 373 * @param yData an array of the y data. 374 * @param period the number of data points to average 375 * 376 * @return A double[][] the length of the data set in the first dimension, 377 * with two doubles for x and y in the second dimension 378 */ 379 public static double[][] getMovingAverage(Number[] xData, 380 Number[] yData, 381 int period) { 382 383 // check arguments... 384 if (xData.length != yData.length) { 385 throw new IllegalArgumentException("Array lengths must be equal."); 386 } 387 388 if (period > xData.length) { 389 throw new IllegalArgumentException( 390 "Period can't be longer than dataset." 391 ); 392 } 393 394 double[][] result = new double[xData.length - period][2]; 395 for (int i = 0; i < result.length; i++) { 396 result[i][0] = xData[i + period].doubleValue(); 397 // holds the moving average sum 398 double sum = 0.0; 399 for (int j = 0; j < period; j++) { 400 sum += yData[i + j].doubleValue(); 401 } 402 sum = sum / period; 403 result[i][1] = sum; 404 } 405 return result; 406 407 } 408 409 }