StatisticsSolver.java
package ac.essex.ooechs.imaging.commons;
import org.jfree.data.xy.XYSeries;
import java.util.Hashtable;
import java.util.Arrays;
/**
* Statistics Solver
* A quick implementation of a few statistical techniques. Instantiate the StatisticsSolver
* and then call the addData(float) method to insert as many values as required.
* You can then call some of the Statistical Methods on the values, such as getStandardDeviation()
* or get Mean();
* @version 1.1 07-Nov-2006 Added support for float data.
* @version 1.2 22-Jan-2007 Removed arraylist and replaced with custom primitive array for better performance.
*/
public class StatisticsSolver {
private float[] v;
private int cursor;
public float total = -1;
protected int numElements;
protected float highest = Float.MIN_VALUE;
protected float lowest = Float.MAX_VALUE;
public float mean = -1;
public float variance = -1;
public static final int TOTAL = 1;
public static final int MIN = 2;
public static final int MAX = 3;
public static final int RANGE = 4;
public static final int MEAN = 5;
public static final int MODE = 6;
public static final int VARIANCE = 7;
public static final int STANDARD_DEVIATION = 8;
/**
* Allows you to get the statistic by a numeric type instead of calling the right method.
* Useful if you want to choose which method during runtime.
*/
public float getStatistic(int type) {
switch (type) {
case TOTAL:
return getTotal();
case MIN:
return getMin();
case MAX:
return getMax();
case RANGE:
return getRange();
case MEAN:
return getMean();
case MODE:
return getMode();
case VARIANCE:
return getVariance();
case STANDARD_DEVIATION:
return getStandardDeviation();
}
throw new RuntimeException("Invalid statistic type: " + type);
}
/**
* Returns the name of each type, if you want it as a java variable.
*/
public static String getStatisticName(int type) {
switch (type) {
case TOTAL:
return "StatisticsSolver.TOTAL";
case MIN:
return "StatisticsSolver.MIN";
case MAX:
return "StatisticsSolver.MAX";
case RANGE:
return "StatisticsSolver.RANGE";
case MEAN:
return "StatisticsSolver.MEAN";
case MODE:
return "StatisticsSolver.MODE";
case VARIANCE:
return "StatisticsSolver.VARIANCE";
case STANDARD_DEVIATION:
return "StatisticsSolver.STANDARD_DEVIATION";
}
throw new RuntimeException("Unknown statistic type: " + type);
}
public static void main(String[] args) {
StatisticsSolver s = new StatisticsSolver();
for (int i =0; i < 10; i++) {
s.addData(4);
s.addData(2);
s.addData(1);
s.addData(3);
s.addData(5);
System.out.println(s.getRange());
System.out.println(s.getMean());
System.out.println(s.getMedian());
s.clear();
s.addData(5);
s.addData(5);
s.addData(5);
s.addData(5);
s.addData(5);
System.out.println(s.getRange());
System.out.println(s.getMean());
System.out.println(s.getMedian());
s.clear();
}
}
/**
* Initialises the solver, initially with space for 20 items of data.
*/
public StatisticsSolver() {
this(20);
}
/**
* Returns how many items of data are in the statistics solver
*/
public int size() {
return numElements;
}
/**
* Initialises the solver
*/
public StatisticsSolver(int size) {
v = new float[size + 1];
cursor = 0;
}
/**
* Gets the statistics solver as a series so it can be plotted by JFreeChart.
* @param name
* @return
*/
public XYSeries getXYSeries(String name) {
XYSeries series = new XYSeries(name);
for (int i = 0; i < v.length; i++) {
float v1 = v[i];
series.add(i, v1);
}
return series;
}
/**
* Returns a percentage term of how much two distributions overlap, with
* 100% meaning that this distribution is entirely inside the other distribution
* and 0% indicating that they share no points in common at all.
*/
public float getDistributionOverlapWith(StatisticsSolver other) {
if (other.numElements != this.numElements) {
throw new RuntimeException("Distribution Overlap failed: Both statistics objects need the same number of entries.");
}
float total = 0, share_amount = 0;
for (int i = 0; i < numElements; i++) {
float v1 = v[i];
if (v1 > 0) {
total += v1;
if (other.v[i] > 0) {
share_amount += Math.min(v1, other.v[i]);
}
}
}
return total / share_amount;
}
public StatisticsSolver getDistribution(float min, float max, int steps) {
float step = (max - min) / steps;
StatisticsSolver distribution = new StatisticsSolver(steps);
for (float x = min; x <= max; x += step) {
distribution.addData(countDataInRange(x, x + step));
}
return distribution;
}
public int countDataInRange(float min, float max) {
int total = 0;
for (int i = 0; i < v.length; i++) {
float v1 = v[i];
if (v1 <= max && v1 >= min) total++;
}
return total;
}
/**
* Adds a set of value to the statistics solver.
*/
public void addData(float[] data) {
for (int i = 0; i < data.length; i++) {
addData(data[i]);
}
}
/**
*
* @deprecated
*/
public void addData(double data) {
addData((float) data);
}
/**
* Add a value to the statistics solver to be used in any calculations
* @param data The number to save
*/
public void addData(float data) {
v[cursor] = data;
// expand the array if necessary
cursor++;
if (cursor == v.length) {
// increase the size of the array
float[] newV = new float[v.length * 2];
System.arraycopy(v,0,newV,0,v.length);
v = newV;
//System.out.println("Statistics Solver - ArrayCopy");
}
if (data > highest) highest = data;
if (data < lowest) lowest = data;
numElements++;
variance = -1;
mean = -1;
total = -1;
}
/**
* Calculates the correlation between this set of data and the data
* in another statistics solver.
*/
public double getCorrelationWith(StatisticsSolver other) {
if (other.numElements != this.numElements) {
throw new RuntimeException("Correlation failed: Both statistics objects need the same number of entries.");
}
float sumXY = 0;
float sumX = 0;
float sumY = 0;
float sumXSquared = 0;
float sumYSquared = 0;
float N = numElements;
for (int i = 0; i < numElements; i++) {
sumXY += v[i] * other.v[i];
sumX += v[i];
sumY += other.v[i];
sumXSquared += v[i] * v[i];
sumYSquared += other.v[i] * other.v[i];
}
float numerator = sumXY - ((sumX * sumY) / N);
double denominator = Math.sqrt((sumXSquared - ((sumX * sumX) / N)) * (sumYSquared - ((sumY * sumY) / N)));
return numerator / denominator;
}
/**
* @return Returns the sum of all the values stored in the StatistcsSolver.
*/
public float getTotal() {
if (total == -1) {
total = 0;
for (int i = 0; i < numElements; i++) {
total += v[i];
}
}
return total;
}
/**
* @return Gets the highest value
*/
public float getMax() {
return highest;
}
/**
* @return Gets the lowest value in the solver.
*/
public float getMin() {
return lowest;
}
public float getRange() {
if (numElements == 0) return 0;
return highest - lowest;
}
/**
* @return Returns the average of all the values stored in the StatistcsSolver.
* @deprecated Use the FastStatistics object instead.
*/
public float getMean() {
if (mean == -1) {
if (numElements == 0) mean = 0;
else mean = getTotal() / numElements;
}
return mean;
}
public float getMedian() {
if (numElements == 0) return 0;
// have to sort the elements
float[] elements = new float[numElements];
System.arraycopy(v, 0, elements, 0, numElements);
Arrays.sort(elements);
if (numElements % 2 == 0) {
int mid = numElements / 2;
return (elements[mid] + elements[mid - 1]) / 2;
} else {
return elements[numElements / 2];
}
}
/**
* Calculates the sample variance
* @return Returns the variance between all the values stored in the StatistcsSolver.
* @deprecated Use the FastStatistics object instead.
*/
public float getVariance() {
if (variance == -1) {
float mean = getMean();
float numerator = 0;
for (int i = 0; i < numElements; i++) {
float d = v[i] - mean;
numerator += d * d;
}
variance = numerator / (numElements - 1);
}
return variance;
}
/**
* @return Returns the Standard Deviation of the values, which is the Square Root
* of the Variance.
* @deprecated Use the FastStatistics object instead.*
*/
public float getStandardDeviation() {
return (float) Math.sqrt(getVariance());
}
/**
* @return Gets the most popular unique value in the set.
*/
public float getMode() {
Hashtable<Float, Integer> colours = new Hashtable<Float, Integer>(255);
float mode = -1;
int highestColourCount = 0;
for (int i = 0; i < numElements; i++) {
// see if there is already an entry for this colour
Integer entry = colours.get(v[i]);
if (entry != null) {
colours.put(v[i], entry + 1);
if ((entry + 1) > highestColourCount) {
mode = v[i];
highestColourCount = entry + 1;
}
} else {
colours.put(v[i], 1);
}
}
return mode;
}
/**
* @return Counts the number of unique values that have been added to the solver.
*/
public int countUnique() {
Hashtable<Float, Integer> colours = new Hashtable<Float, Integer>(255);
for (int i = 0; i < numElements; i++) {
// see if there is already an entry for this colour
Integer entry = colours.get(v[i]);
// if not, make one
if (entry == null) colours.put(v[i], 1);
}
return colours.size();
}
/**
* Clears all values so StatisticsSolver can be used again from scratch.
*/
public void clear() {
highest = Float.MIN_VALUE;
lowest = Float.MAX_VALUE;
variance = -1;
mean = -1;
total = -1;
numElements = 0;
cursor = 0;
}
}