001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math3.analysis.interpolation;
018    
019    import java.io.Serializable;
020    import java.util.Arrays;
021    
022    import org.apache.commons.math3.analysis.polynomials.PolynomialSplineFunction;
023    import org.apache.commons.math3.exception.NotPositiveException;
024    import org.apache.commons.math3.exception.OutOfRangeException;
025    import org.apache.commons.math3.exception.DimensionMismatchException;
026    import org.apache.commons.math3.exception.NoDataException;
027    import org.apache.commons.math3.exception.NumberIsTooSmallException;
028    import org.apache.commons.math3.exception.NonMonotonicSequenceException;
029    import org.apache.commons.math3.exception.NotFiniteNumberException;
030    import org.apache.commons.math3.exception.util.LocalizedFormats;
031    import org.apache.commons.math3.util.FastMath;
032    import org.apache.commons.math3.util.MathUtils;
033    import org.apache.commons.math3.util.MathArrays;
034    
035    /**
036     * Implements the <a href="http://en.wikipedia.org/wiki/Local_regression">
037     * Local Regression Algorithm</a> (also Loess, Lowess) for interpolation of
038     * real univariate functions.
039     * <p/>
040     * For reference, see
041     * <a href="http://www.math.tau.ac.il/~yekutiel/MA seminar/Cleveland 1979.pdf">
042     * William S. Cleveland - Robust Locally Weighted Regression and Smoothing
043     * Scatterplots</a>
044     * <p/>
045     * This class implements both the loess method and serves as an interpolation
046     * adapter to it, allowing one to build a spline on the obtained loess fit.
047     *
048     * @version $Id: LoessInterpolator.java 1379904 2012-09-01 23:54:52Z erans $
049     * @since 2.0
050     */
051    public class LoessInterpolator
052        implements UnivariateInterpolator, Serializable {
053        /** Default value of the bandwidth parameter. */
054        public static final double DEFAULT_BANDWIDTH = 0.3;
055        /** Default value of the number of robustness iterations. */
056        public static final int DEFAULT_ROBUSTNESS_ITERS = 2;
057        /**
058         * Default value for accuracy.
059         * @since 2.1
060         */
061        public static final double DEFAULT_ACCURACY = 1e-12;
062        /** serializable version identifier. */
063        private static final long serialVersionUID = 5204927143605193821L;
064        /**
065         * The bandwidth parameter: when computing the loess fit at
066         * a particular point, this fraction of source points closest
067         * to the current point is taken into account for computing
068         * a least-squares regression.
069         * <p/>
070         * A sensible value is usually 0.25 to 0.5.
071         */
072        private final double bandwidth;
073        /**
074         * The number of robustness iterations parameter: this many
075         * robustness iterations are done.
076         * <p/>
077         * A sensible value is usually 0 (just the initial fit without any
078         * robustness iterations) to 4.
079         */
080        private final int robustnessIters;
081        /**
082         * If the median residual at a certain robustness iteration
083         * is less than this amount, no more iterations are done.
084         */
085        private final double accuracy;
086    
087        /**
088         * Constructs a new {@link LoessInterpolator}
089         * with a bandwidth of {@link #DEFAULT_BANDWIDTH},
090         * {@link #DEFAULT_ROBUSTNESS_ITERS} robustness iterations
091         * and an accuracy of {#link #DEFAULT_ACCURACY}.
092         * See {@link #LoessInterpolator(double, int, double)} for an explanation of
093         * the parameters.
094         */
095        public LoessInterpolator() {
096            this.bandwidth = DEFAULT_BANDWIDTH;
097            this.robustnessIters = DEFAULT_ROBUSTNESS_ITERS;
098            this.accuracy = DEFAULT_ACCURACY;
099        }
100    
101        /**
102         * Construct a new {@link LoessInterpolator}
103         * with given bandwidth and number of robustness iterations.
104         * <p>
105         * Calling this constructor is equivalent to calling {link {@link
106         * #LoessInterpolator(double, int, double) LoessInterpolator(bandwidth,
107         * robustnessIters, LoessInterpolator.DEFAULT_ACCURACY)}
108         * </p>
109         *
110         * @param bandwidth  when computing the loess fit at
111         * a particular point, this fraction of source points closest
112         * to the current point is taken into account for computing
113         * a least-squares regression.</br>
114         * A sensible value is usually 0.25 to 0.5, the default value is
115         * {@link #DEFAULT_BANDWIDTH}.
116         * @param robustnessIters This many robustness iterations are done.</br>
117         * A sensible value is usually 0 (just the initial fit without any
118         * robustness iterations) to 4, the default value is
119         * {@link #DEFAULT_ROBUSTNESS_ITERS}.
120    
121         * @see #LoessInterpolator(double, int, double)
122         */
123        public LoessInterpolator(double bandwidth, int robustnessIters) {
124            this(bandwidth, robustnessIters, DEFAULT_ACCURACY);
125        }
126    
127        /**
128         * Construct a new {@link LoessInterpolator}
129         * with given bandwidth, number of robustness iterations and accuracy.
130         *
131         * @param bandwidth  when computing the loess fit at
132         * a particular point, this fraction of source points closest
133         * to the current point is taken into account for computing
134         * a least-squares regression.</br>
135         * A sensible value is usually 0.25 to 0.5, the default value is
136         * {@link #DEFAULT_BANDWIDTH}.
137         * @param robustnessIters This many robustness iterations are done.</br>
138         * A sensible value is usually 0 (just the initial fit without any
139         * robustness iterations) to 4, the default value is
140         * {@link #DEFAULT_ROBUSTNESS_ITERS}.
141         * @param accuracy If the median residual at a certain robustness iteration
142         * is less than this amount, no more iterations are done.
143         * @throws OutOfRangeException if bandwidth does not lie in the interval [0,1].
144         * @throws NotPositiveException if {@code robustnessIters} is negative.
145         * @see #LoessInterpolator(double, int)
146         * @since 2.1
147         */
148        public LoessInterpolator(double bandwidth, int robustnessIters, double accuracy)
149            throws OutOfRangeException,
150                   NotPositiveException {
151            if (bandwidth < 0 ||
152                bandwidth > 1) {
153                throw new OutOfRangeException(LocalizedFormats.BANDWIDTH, bandwidth, 0, 1);
154            }
155            this.bandwidth = bandwidth;
156            if (robustnessIters < 0) {
157                throw new NotPositiveException(LocalizedFormats.ROBUSTNESS_ITERATIONS, robustnessIters);
158            }
159            this.robustnessIters = robustnessIters;
160            this.accuracy = accuracy;
161        }
162    
163        /**
164         * Compute an interpolating function by performing a loess fit
165         * on the data at the original abscissae and then building a cubic spline
166         * with a
167         * {@link org.apache.commons.math3.analysis.interpolation.SplineInterpolator}
168         * on the resulting fit.
169         *
170         * @param xval the arguments for the interpolation points
171         * @param yval the values for the interpolation points
172         * @return A cubic spline built upon a loess fit to the data at the original abscissae
173         * @throws NonMonotonicSequenceException if {@code xval} not sorted in
174         * strictly increasing order.
175         * @throws DimensionMismatchException if {@code xval} and {@code yval} have
176         * different sizes.
177         * @throws NoDataException if {@code xval} or {@code yval} has zero size.
178         * @throws NotFiniteNumberException if any of the arguments and values are
179         * not finite real numbers.
180         * @throws NumberIsTooSmallException if the bandwidth is too small to
181         * accomodate the size of the input data (i.e. the bandwidth must be
182         * larger than 2/n).
183         */
184        public final PolynomialSplineFunction interpolate(final double[] xval,
185                                                          final double[] yval)
186            throws NonMonotonicSequenceException,
187                   DimensionMismatchException,
188                   NoDataException,
189                   NotFiniteNumberException,
190                   NumberIsTooSmallException {
191            return new SplineInterpolator().interpolate(xval, smooth(xval, yval));
192        }
193    
194        /**
195         * Compute a weighted loess fit on the data at the original abscissae.
196         *
197         * @param xval Arguments for the interpolation points.
198         * @param yval Values for the interpolation points.
199         * @param weights point weights: coefficients by which the robustness weight
200         * of a point is multiplied.
201         * @return the values of the loess fit at corresponding original abscissae.
202         * @throws NonMonotonicSequenceException if {@code xval} not sorted in
203         * strictly increasing order.
204         * @throws DimensionMismatchException if {@code xval} and {@code yval} have
205         * different sizes.
206         * @throws NoDataException if {@code xval} or {@code yval} has zero size.
207         * @throws NotFiniteNumberException if any of the arguments and values are
208         not finite real numbers.
209         * @throws NumberIsTooSmallException if the bandwidth is too small to
210         * accomodate the size of the input data (i.e. the bandwidth must be
211         * larger than 2/n).
212         * @since 2.1
213         */
214        public final double[] smooth(final double[] xval, final double[] yval,
215                                     final double[] weights)
216            throws NonMonotonicSequenceException,
217                   DimensionMismatchException,
218                   NoDataException,
219                   NotFiniteNumberException,
220                   NumberIsTooSmallException {
221            if (xval.length != yval.length) {
222                throw new DimensionMismatchException(xval.length, yval.length);
223            }
224    
225            final int n = xval.length;
226    
227            if (n == 0) {
228                throw new NoDataException();
229            }
230    
231            checkAllFiniteReal(xval);
232            checkAllFiniteReal(yval);
233            checkAllFiniteReal(weights);
234    
235            MathArrays.checkOrder(xval);
236    
237            if (n == 1) {
238                return new double[]{yval[0]};
239            }
240    
241            if (n == 2) {
242                return new double[]{yval[0], yval[1]};
243            }
244    
245            int bandwidthInPoints = (int) (bandwidth * n);
246    
247            if (bandwidthInPoints < 2) {
248                throw new NumberIsTooSmallException(LocalizedFormats.BANDWIDTH,
249                                                    bandwidthInPoints, 2, true);
250            }
251    
252            final double[] res = new double[n];
253    
254            final double[] residuals = new double[n];
255            final double[] sortedResiduals = new double[n];
256    
257            final double[] robustnessWeights = new double[n];
258    
259            // Do an initial fit and 'robustnessIters' robustness iterations.
260            // This is equivalent to doing 'robustnessIters+1' robustness iterations
261            // starting with all robustness weights set to 1.
262            Arrays.fill(robustnessWeights, 1);
263    
264            for (int iter = 0; iter <= robustnessIters; ++iter) {
265                final int[] bandwidthInterval = {0, bandwidthInPoints - 1};
266                // At each x, compute a local weighted linear regression
267                for (int i = 0; i < n; ++i) {
268                    final double x = xval[i];
269    
270                    // Find out the interval of source points on which
271                    // a regression is to be made.
272                    if (i > 0) {
273                        updateBandwidthInterval(xval, weights, i, bandwidthInterval);
274                    }
275    
276                    final int ileft = bandwidthInterval[0];
277                    final int iright = bandwidthInterval[1];
278    
279                    // Compute the point of the bandwidth interval that is
280                    // farthest from x
281                    final int edge;
282                    if (xval[i] - xval[ileft] > xval[iright] - xval[i]) {
283                        edge = ileft;
284                    } else {
285                        edge = iright;
286                    }
287    
288                    // Compute a least-squares linear fit weighted by
289                    // the product of robustness weights and the tricube
290                    // weight function.
291                    // See http://en.wikipedia.org/wiki/Linear_regression
292                    // (section "Univariate linear case")
293                    // and http://en.wikipedia.org/wiki/Weighted_least_squares
294                    // (section "Weighted least squares")
295                    double sumWeights = 0;
296                    double sumX = 0;
297                    double sumXSquared = 0;
298                    double sumY = 0;
299                    double sumXY = 0;
300                    double denom = FastMath.abs(1.0 / (xval[edge] - x));
301                    for (int k = ileft; k <= iright; ++k) {
302                        final double xk   = xval[k];
303                        final double yk   = yval[k];
304                        final double dist = (k < i) ? x - xk : xk - x;
305                        final double w    = tricube(dist * denom) * robustnessWeights[k] * weights[k];
306                        final double xkw  = xk * w;
307                        sumWeights += w;
308                        sumX += xkw;
309                        sumXSquared += xk * xkw;
310                        sumY += yk * w;
311                        sumXY += yk * xkw;
312                    }
313    
314                    final double meanX = sumX / sumWeights;
315                    final double meanY = sumY / sumWeights;
316                    final double meanXY = sumXY / sumWeights;
317                    final double meanXSquared = sumXSquared / sumWeights;
318    
319                    final double beta;
320                    if (FastMath.sqrt(FastMath.abs(meanXSquared - meanX * meanX)) < accuracy) {
321                        beta = 0;
322                    } else {
323                        beta = (meanXY - meanX * meanY) / (meanXSquared - meanX * meanX);
324                    }
325    
326                    final double alpha = meanY - beta * meanX;
327    
328                    res[i] = beta * x + alpha;
329                    residuals[i] = FastMath.abs(yval[i] - res[i]);
330                }
331    
332                // No need to recompute the robustness weights at the last
333                // iteration, they won't be needed anymore
334                if (iter == robustnessIters) {
335                    break;
336                }
337    
338                // Recompute the robustness weights.
339    
340                // Find the median residual.
341                // An arraycopy and a sort are completely tractable here,
342                // because the preceding loop is a lot more expensive
343                System.arraycopy(residuals, 0, sortedResiduals, 0, n);
344                Arrays.sort(sortedResiduals);
345                final double medianResidual = sortedResiduals[n / 2];
346    
347                if (FastMath.abs(medianResidual) < accuracy) {
348                    break;
349                }
350    
351                for (int i = 0; i < n; ++i) {
352                    final double arg = residuals[i] / (6 * medianResidual);
353                    if (arg >= 1) {
354                        robustnessWeights[i] = 0;
355                    } else {
356                        final double w = 1 - arg * arg;
357                        robustnessWeights[i] = w * w;
358                    }
359                }
360            }
361    
362            return res;
363        }
364    
365        /**
366         * Compute a loess fit on the data at the original abscissae.
367         *
368         * @param xval the arguments for the interpolation points
369         * @param yval the values for the interpolation points
370         * @return values of the loess fit at corresponding original abscissae
371         * @throws NonMonotonicSequenceException if {@code xval} not sorted in
372         * strictly increasing order.
373         * @throws DimensionMismatchException if {@code xval} and {@code yval} have
374         * different sizes.
375         * @throws NoDataException if {@code xval} or {@code yval} has zero size.
376         * @throws NotFiniteNumberException if any of the arguments and values are
377         * not finite real numbers.
378         * @throws NumberIsTooSmallException if the bandwidth is too small to
379         * accomodate the size of the input data (i.e. the bandwidth must be
380         * larger than 2/n).
381         */
382        public final double[] smooth(final double[] xval, final double[] yval)
383            throws NonMonotonicSequenceException,
384                   DimensionMismatchException,
385                   NoDataException,
386                   NotFiniteNumberException,
387                   NumberIsTooSmallException {
388            if (xval.length != yval.length) {
389                throw new DimensionMismatchException(xval.length, yval.length);
390            }
391    
392            final double[] unitWeights = new double[xval.length];
393            Arrays.fill(unitWeights, 1.0);
394    
395            return smooth(xval, yval, unitWeights);
396        }
397    
398        /**
399         * Given an index interval into xval that embraces a certain number of
400         * points closest to {@code xval[i-1]}, update the interval so that it
401         * embraces the same number of points closest to {@code xval[i]},
402         * ignoring zero weights.
403         *
404         * @param xval Arguments array.
405         * @param weights Weights array.
406         * @param i Index around which the new interval should be computed.
407         * @param bandwidthInterval a two-element array {left, right} such that:
408         * {@code (left==0 or xval[i] - xval[left-1] > xval[right] - xval[i])}
409         * and
410         * {@code (right==xval.length-1 or xval[right+1] - xval[i] > xval[i] - xval[left])}.
411         * The array will be updated.
412         */
413        private static void updateBandwidthInterval(final double[] xval, final double[] weights,
414                                                    final int i,
415                                                    final int[] bandwidthInterval) {
416            final int left = bandwidthInterval[0];
417            final int right = bandwidthInterval[1];
418    
419            // The right edge should be adjusted if the next point to the right
420            // is closer to xval[i] than the leftmost point of the current interval
421            int nextRight = nextNonzero(weights, right);
422            if (nextRight < xval.length && xval[nextRight] - xval[i] < xval[i] - xval[left]) {
423                int nextLeft = nextNonzero(weights, bandwidthInterval[0]);
424                bandwidthInterval[0] = nextLeft;
425                bandwidthInterval[1] = nextRight;
426            }
427        }
428    
429        /**
430         * Return the smallest index {@code j} such that
431         * {@code j > i && (j == weights.length || weights[j] != 0)}.
432         *
433         * @param weights Weights array.
434         * @param i Index from which to start search.
435         * @return the smallest compliant index.
436         */
437        private static int nextNonzero(final double[] weights, final int i) {
438            int j = i + 1;
439            while(j < weights.length && weights[j] == 0) {
440                ++j;
441            }
442            return j;
443        }
444    
445        /**
446         * Compute the
447         * <a href="http://en.wikipedia.org/wiki/Local_regression#Weight_function">tricube</a>
448         * weight function
449         *
450         * @param x Argument.
451         * @return <code>(1 - |x|<sup>3</sup>)<sup>3</sup></code> for |x| &lt; 1, 0 otherwise.
452         */
453        private static double tricube(final double x) {
454            final double absX = FastMath.abs(x);
455            if (absX >= 1.0) {
456                return 0.0;
457            }
458            final double tmp = 1 - absX * absX * absX;
459            return tmp * tmp * tmp;
460        }
461    
462        /**
463         * Check that all elements of an array are finite real numbers.
464         *
465         * @param values Values array.
466         * @throws org.apache.commons.math3.exception.NotFiniteNumberException
467         * if one of the values is not a finite real number.
468         */
469        private static void checkAllFiniteReal(final double[] values) {
470            for (int i = 0; i < values.length; i++) {
471                MathUtils.checkFinite(values[i]);
472            }
473        }
474    }