001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.distribution;
019    
020    import java.io.Serializable;
021    
022    import org.apache.commons.math.MathRuntimeException;
023    
024    /**
025     * Implementation for the {@link ZipfDistribution}.
026     * 
027     * @version $Revision: 762087 $ $Date: 2009-04-05 10:20:18 -0400 (Sun, 05 Apr 2009) $
028     */
029    public class ZipfDistributionImpl extends AbstractIntegerDistribution 
030        implements ZipfDistribution, Serializable {
031    
032        /** Serializable version identifier. */
033        private static final long serialVersionUID = -140627372283420404L;
034    
035        /** Number of elements. */
036        private int numberOfElements;
037    
038        /** Exponent parameter of the distribution. */
039        private double exponent;
040    
041        /**
042         * Create a new Zipf distribution with the given number of elements and 
043         * exponent. Both values must be positive; otherwise an 
044         * <code>IllegalArgumentException</code> is thrown.
045         * 
046         * @param numberOfElements the number of elements
047         * @param exponent the exponent
048         * @exception IllegalArgumentException if n &le; 0 or s &le; 0.0
049         */
050        public ZipfDistributionImpl(final int numberOfElements, final double exponent)
051            throws IllegalArgumentException {
052            setNumberOfElements(numberOfElements);
053            setExponent(exponent);
054        }
055    
056        /**
057         * Get the number of elements (e.g. corpus size) for the distribution.
058         * 
059         * @return the number of elements
060         */
061        public int getNumberOfElements() {
062            return numberOfElements;
063        }
064    
065        /**
066         * Set the number of elements (e.g. corpus size) for the distribution.
067         * The parameter value must be positive; otherwise an 
068         * <code>IllegalArgumentException</code> is thrown.
069         * 
070         * @param n the number of elements
071         * @exception IllegalArgumentException if n &le; 0
072         */
073        public void setNumberOfElements(final int n)
074            throws IllegalArgumentException {
075            if (n <= 0) {
076                throw MathRuntimeException.createIllegalArgumentException(
077                        "invalid number of elements {0} (must be positive)",
078                        n);
079            }
080            this.numberOfElements = n;
081        }
082        
083        /**
084         * Get the exponent characterising the distribution.
085         * 
086         * @return the exponent
087         */
088        public double getExponent() {
089            return exponent;
090        }
091    
092        /**
093         * Set the exponent characterising the distribution.
094         * The parameter value must be positive; otherwise an 
095         * <code>IllegalArgumentException</code> is thrown.
096         * 
097         * @param s the exponent
098         * @exception IllegalArgumentException if s &le; 0.0
099         */
100        public void setExponent(final double s)
101            throws IllegalArgumentException {
102            if (s <= 0.0) {
103                throw MathRuntimeException.createIllegalArgumentException(
104                        "invalid exponent {0} (must be positive)",
105                        s);
106            }
107            this.exponent = s;
108        }
109    
110        /**
111         * The probability mass function P(X = x) for a Zipf distribution.
112         * 
113         * @param x the value at which the probability density function is evaluated.
114         * @return the value of the probability mass function at x
115         */
116        public double probability(final int x) {
117            if (x <= 0 || x > getNumberOfElements()) {
118                return 0.0;
119            }
120    
121            return (1.0 / Math.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent);
122    
123        }
124        
125        /**
126         * The probability distribution function P(X <= x) for a Zipf distribution.
127         * 
128         * @param x the value at which the PDF is evaluated.
129         * @return Zipf distribution function evaluated at x
130         */
131        @Override
132        public double cumulativeProbability(final int x) {
133            if (x <= 0) {
134                return 0.0;
135            } else if (x >= getNumberOfElements()) {
136                return 1.0;
137            }
138    
139            return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent);
140    
141        }
142    
143        /**
144         * Access the domain value lower bound, based on <code>p</code>, used to
145         * bracket a PDF root.
146         * 
147         * @param p the desired probability for the critical value
148         * @return domain value lower bound, i.e.
149         *         P(X &lt; <i>lower bound</i>) &lt; <code>p</code> 
150         */
151        @Override
152        protected int getDomainLowerBound(final double p) {
153            return 0;
154        }
155    
156        /**
157         * Access the domain value upper bound, based on <code>p</code>, used to
158         * bracket a PDF root.
159         * 
160         * @param p the desired probability for the critical value
161         * @return domain value upper bound, i.e.
162         *         P(X &lt; <i>upper bound</i>) &gt; <code>p</code> 
163         */
164        @Override
165        protected int getDomainUpperBound(final double p) {
166            return numberOfElements;
167        }
168    
169    
170        /**
171         * Calculates the Nth generalized harmonic number. See 
172         * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic 
173         * Series</a>.
174         * 
175         * @param n the term in the series to calculate (must be &ge; 1)
176         * @param m the exponent; special case m == 1.0 is the harmonic series
177         * @return the nth generalized harmonic number
178         */
179        private double generalizedHarmonic(final int n, final double m) {
180            double value = 0;
181            for (int k = n; k > 0; --k) {
182                value += 1.0 / Math.pow(k, m);
183            }
184            return value;
185        }
186    
187    }