001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.math3.stat.descriptive; 019 020 import java.io.Serializable; 021 import java.util.Collection; 022 import java.util.Iterator; 023 024 import org.apache.commons.math3.exception.NullArgumentException; 025 026 /** 027 * <p> 028 * An aggregator for {@code SummaryStatistics} from several data sets or 029 * data set partitions. In its simplest usage mode, the client creates an 030 * instance via the zero-argument constructor, then uses 031 * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics} 032 * for each individual data set / partition. The per-set statistics objects 033 * are used as normal, and at any time the aggregate statistics for all the 034 * contributors can be obtained from this object. 035 * </p><p> 036 * Clients with specialized requirements can use alternative constructors to 037 * control the statistics implementations and initial values used by the 038 * contributing and the internal aggregate {@code SummaryStatistics} objects. 039 * </p><p> 040 * A static {@link #aggregate(Collection)} method is also included that computes 041 * aggregate statistics directly from a Collection of SummaryStatistics instances. 042 * </p><p> 043 * When {@link #createContributingStatistics()} is used to create SummaryStatistics 044 * instances to be aggregated concurrently, the created instances' 045 * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating 046 * instance maintained by this class. In multithreaded environments, if the functionality 047 * provided by {@link #aggregate(Collection)} is adequate, that method should be used 048 * to avoid unnecessary computation and synchronization delays.</p> 049 * 050 * @since 2.0 051 * @version $Id: AggregateSummaryStatistics.java 1416643 2012-12-03 19:37:14Z tn $ 052 * 053 */ 054 public class AggregateSummaryStatistics implements StatisticalSummary, 055 Serializable { 056 057 058 /** Serializable version identifier */ 059 private static final long serialVersionUID = -8207112444016386906L; 060 061 /** 062 * A SummaryStatistics serving as a prototype for creating SummaryStatistics 063 * contributing to this aggregate 064 */ 065 private final SummaryStatistics statisticsPrototype; 066 067 /** 068 * The SummaryStatistics in which aggregate statistics are accumulated. 069 */ 070 private final SummaryStatistics statistics; 071 072 /** 073 * Initializes a new AggregateSummaryStatistics with default statistics 074 * implementations. 075 * 076 */ 077 public AggregateSummaryStatistics() { 078 // No try-catch or throws NAE because arg is guaranteed non-null 079 this(new SummaryStatistics()); 080 } 081 082 /** 083 * Initializes a new AggregateSummaryStatistics with the specified statistics 084 * object as a prototype for contributing statistics and for the internal 085 * aggregate statistics. This provides for customized statistics implementations 086 * to be used by contributing and aggregate statistics. 087 * 088 * @param prototypeStatistics a {@code SummaryStatistics} serving as a 089 * prototype both for the internal aggregate statistics and for 090 * contributing statistics obtained via the 091 * {@code createContributingStatistics()} method. Being a prototype 092 * means that other objects are initialized by copying this object's state. 093 * If {@code null}, a new, default statistics object is used. Any statistic 094 * values in the prototype are propagated to contributing statistics 095 * objects and (once) into these aggregate statistics. 096 * @throws NullArgumentException if prototypeStatistics is null 097 * @see #createContributingStatistics() 098 */ 099 public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) throws NullArgumentException { 100 this(prototypeStatistics, 101 prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics)); 102 } 103 104 /** 105 * Initializes a new AggregateSummaryStatistics with the specified statistics 106 * object as a prototype for contributing statistics and for the internal 107 * aggregate statistics. This provides for different statistics implementations 108 * to be used by contributing and aggregate statistics and for an initial 109 * state to be supplied for the aggregate statistics. 110 * 111 * @param prototypeStatistics a {@code SummaryStatistics} serving as a 112 * prototype both for the internal aggregate statistics and for 113 * contributing statistics obtained via the 114 * {@code createContributingStatistics()} method. Being a prototype 115 * means that other objects are initialized by copying this object's state. 116 * If {@code null}, a new, default statistics object is used. Any statistic 117 * values in the prototype are propagated to contributing statistics 118 * objects, but not into these aggregate statistics. 119 * @param initialStatistics a {@code SummaryStatistics} to serve as the 120 * internal aggregate statistics object. If {@code null}, a new, default 121 * statistics object is used. 122 * @see #createContributingStatistics() 123 */ 124 public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics, 125 SummaryStatistics initialStatistics) { 126 this.statisticsPrototype = 127 (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics; 128 this.statistics = 129 (initialStatistics == null) ? new SummaryStatistics() : initialStatistics; 130 } 131 132 /** 133 * {@inheritDoc}. This version returns the maximum over all the aggregated 134 * data. 135 * 136 * @see StatisticalSummary#getMax() 137 */ 138 public double getMax() { 139 synchronized (statistics) { 140 return statistics.getMax(); 141 } 142 } 143 144 /** 145 * {@inheritDoc}. This version returns the mean of all the aggregated data. 146 * 147 * @see StatisticalSummary#getMean() 148 */ 149 public double getMean() { 150 synchronized (statistics) { 151 return statistics.getMean(); 152 } 153 } 154 155 /** 156 * {@inheritDoc}. This version returns the minimum over all the aggregated 157 * data. 158 * 159 * @see StatisticalSummary#getMin() 160 */ 161 public double getMin() { 162 synchronized (statistics) { 163 return statistics.getMin(); 164 } 165 } 166 167 /** 168 * {@inheritDoc}. This version returns a count of all the aggregated data. 169 * 170 * @see StatisticalSummary#getN() 171 */ 172 public long getN() { 173 synchronized (statistics) { 174 return statistics.getN(); 175 } 176 } 177 178 /** 179 * {@inheritDoc}. This version returns the standard deviation of all the 180 * aggregated data. 181 * 182 * @see StatisticalSummary#getStandardDeviation() 183 */ 184 public double getStandardDeviation() { 185 synchronized (statistics) { 186 return statistics.getStandardDeviation(); 187 } 188 } 189 190 /** 191 * {@inheritDoc}. This version returns a sum of all the aggregated data. 192 * 193 * @see StatisticalSummary#getSum() 194 */ 195 public double getSum() { 196 synchronized (statistics) { 197 return statistics.getSum(); 198 } 199 } 200 201 /** 202 * {@inheritDoc}. This version returns the variance of all the aggregated 203 * data. 204 * 205 * @see StatisticalSummary#getVariance() 206 */ 207 public double getVariance() { 208 synchronized (statistics) { 209 return statistics.getVariance(); 210 } 211 } 212 213 /** 214 * Returns the sum of the logs of all the aggregated data. 215 * 216 * @return the sum of logs 217 * @see SummaryStatistics#getSumOfLogs() 218 */ 219 public double getSumOfLogs() { 220 synchronized (statistics) { 221 return statistics.getSumOfLogs(); 222 } 223 } 224 225 /** 226 * Returns the geometric mean of all the aggregated data. 227 * 228 * @return the geometric mean 229 * @see SummaryStatistics#getGeometricMean() 230 */ 231 public double getGeometricMean() { 232 synchronized (statistics) { 233 return statistics.getGeometricMean(); 234 } 235 } 236 237 /** 238 * Returns the sum of the squares of all the aggregated data. 239 * 240 * @return The sum of squares 241 * @see SummaryStatistics#getSumsq() 242 */ 243 public double getSumsq() { 244 synchronized (statistics) { 245 return statistics.getSumsq(); 246 } 247 } 248 249 /** 250 * Returns a statistic related to the Second Central Moment. Specifically, 251 * what is returned is the sum of squared deviations from the sample mean 252 * among the all of the aggregated data. 253 * 254 * @return second central moment statistic 255 * @see SummaryStatistics#getSecondMoment() 256 */ 257 public double getSecondMoment() { 258 synchronized (statistics) { 259 return statistics.getSecondMoment(); 260 } 261 } 262 263 /** 264 * Return a {@link StatisticalSummaryValues} instance reporting current 265 * aggregate statistics. 266 * 267 * @return Current values of aggregate statistics 268 */ 269 public StatisticalSummary getSummary() { 270 synchronized (statistics) { 271 return new StatisticalSummaryValues(getMean(), getVariance(), getN(), 272 getMax(), getMin(), getSum()); 273 } 274 } 275 276 /** 277 * Creates and returns a {@code SummaryStatistics} whose data will be 278 * aggregated with those of this {@code AggregateSummaryStatistics}. 279 * 280 * @return a {@code SummaryStatistics} whose data will be aggregated with 281 * those of this {@code AggregateSummaryStatistics}. The initial state 282 * is a copy of the configured prototype statistics. 283 */ 284 public SummaryStatistics createContributingStatistics() { 285 SummaryStatistics contributingStatistics 286 = new AggregatingSummaryStatistics(statistics); 287 288 // No try - catch or advertising NAE because neither argument will ever be null 289 SummaryStatistics.copy(statisticsPrototype, contributingStatistics); 290 291 return contributingStatistics; 292 } 293 294 /** 295 * Computes aggregate summary statistics. This method can be used to combine statistics 296 * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned 297 * should contain the same values that would have been obtained by computing a single 298 * StatisticalSummary over the combined dataset. 299 * <p> 300 * Returns null if the collection is empty or null. 301 * </p> 302 * 303 * @param statistics collection of SummaryStatistics to aggregate 304 * @return summary statistics for the combined dataset 305 */ 306 public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) { 307 if (statistics == null) { 308 return null; 309 } 310 Iterator<SummaryStatistics> iterator = statistics.iterator(); 311 if (!iterator.hasNext()) { 312 return null; 313 } 314 SummaryStatistics current = iterator.next(); 315 long n = current.getN(); 316 double min = current.getMin(); 317 double sum = current.getSum(); 318 double max = current.getMax(); 319 double m2 = current.getSecondMoment(); 320 double mean = current.getMean(); 321 while (iterator.hasNext()) { 322 current = iterator.next(); 323 if (current.getMin() < min || Double.isNaN(min)) { 324 min = current.getMin(); 325 } 326 if (current.getMax() > max || Double.isNaN(max)) { 327 max = current.getMax(); 328 } 329 sum += current.getSum(); 330 final double oldN = n; 331 final double curN = current.getN(); 332 n += curN; 333 final double meanDiff = current.getMean() - mean; 334 mean = sum / n; 335 m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n; 336 } 337 final double variance; 338 if (n == 0) { 339 variance = Double.NaN; 340 } else if (n == 1) { 341 variance = 0d; 342 } else { 343 variance = m2 / (n - 1); 344 } 345 return new StatisticalSummaryValues(mean, variance, n, max, min, sum); 346 } 347 348 /** 349 * A SummaryStatistics that also forwards all values added to it to a second 350 * {@code SummaryStatistics} for aggregation. 351 * 352 * @since 2.0 353 */ 354 private static class AggregatingSummaryStatistics extends SummaryStatistics { 355 356 /** 357 * The serialization version of this class 358 */ 359 private static final long serialVersionUID = 1L; 360 361 /** 362 * An additional SummaryStatistics into which values added to these 363 * statistics (and possibly others) are aggregated 364 */ 365 private final SummaryStatistics aggregateStatistics; 366 367 /** 368 * Initializes a new AggregatingSummaryStatistics with the specified 369 * aggregate statistics object 370 * 371 * @param aggregateStatistics a {@code SummaryStatistics} into which 372 * values added to this statistics object should be aggregated 373 */ 374 public AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) { 375 this.aggregateStatistics = aggregateStatistics; 376 } 377 378 /** 379 * {@inheritDoc}. This version adds the provided value to the configured 380 * aggregate after adding it to these statistics. 381 * 382 * @see SummaryStatistics#addValue(double) 383 */ 384 @Override 385 public void addValue(double value) { 386 super.addValue(value); 387 synchronized (aggregateStatistics) { 388 aggregateStatistics.addValue(value); 389 } 390 } 391 392 /** 393 * Returns true iff <code>object</code> is a 394 * <code>SummaryStatistics</code> instance and all statistics have the 395 * same values as this. 396 * @param object the object to test equality against. 397 * @return true if object equals this 398 */ 399 @Override 400 public boolean equals(Object object) { 401 if (object == this) { 402 return true; 403 } 404 if (object instanceof AggregatingSummaryStatistics == false) { 405 return false; 406 } 407 AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object; 408 return super.equals(stat) && 409 aggregateStatistics.equals(stat.aggregateStatistics); 410 } 411 412 /** 413 * Returns hash code based on values of statistics 414 * @return hash code 415 */ 416 @Override 417 public int hashCode() { 418 return 123 + super.hashCode() + aggregateStatistics.hashCode(); 419 } 420 } 421 }