1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.math.distribution; 19 20 import java.io.Serializable; 21 22 import org.apache.commons.math.MathRuntimeException; 23 24 /** 25 * Implementation for the {@link ZipfDistribution}. 26 * 27 * @version $Revision: 762087 $ $Date: 2009-04-05 10:20:18 -0400 (Sun, 05 Apr 2009) $ 28 */ 29 public class ZipfDistributionImpl extends AbstractIntegerDistribution 30 implements ZipfDistribution, Serializable { 31 32 /** Serializable version identifier. */ 33 private static final long serialVersionUID = -140627372283420404L; 34 35 /** Number of elements. */ 36 private int numberOfElements; 37 38 /** Exponent parameter of the distribution. */ 39 private double exponent; 40 41 /** 42 * Create a new Zipf distribution with the given number of elements and 43 * exponent. Both values must be positive; otherwise an 44 * <code>IllegalArgumentException</code> is thrown. 45 * 46 * @param numberOfElements the number of elements 47 * @param exponent the exponent 48 * @exception IllegalArgumentException if n ≤ 0 or s ≤ 0.0 49 */ 50 public ZipfDistributionImpl(final int numberOfElements, final double exponent) 51 throws IllegalArgumentException { 52 setNumberOfElements(numberOfElements); 53 setExponent(exponent); 54 } 55 56 /** 57 * Get the number of elements (e.g. corpus size) for the distribution. 58 * 59 * @return the number of elements 60 */ 61 public int getNumberOfElements() { 62 return numberOfElements; 63 } 64 65 /** 66 * Set the number of elements (e.g. corpus size) for the distribution. 67 * The parameter value must be positive; otherwise an 68 * <code>IllegalArgumentException</code> is thrown. 69 * 70 * @param n the number of elements 71 * @exception IllegalArgumentException if n ≤ 0 72 */ 73 public void setNumberOfElements(final int n) 74 throws IllegalArgumentException { 75 if (n <= 0) { 76 throw MathRuntimeException.createIllegalArgumentException( 77 "invalid number of elements {0} (must be positive)", 78 n); 79 } 80 this.numberOfElements = n; 81 } 82 83 /** 84 * Get the exponent characterising the distribution. 85 * 86 * @return the exponent 87 */ 88 public double getExponent() { 89 return exponent; 90 } 91 92 /** 93 * Set the exponent characterising the distribution. 94 * The parameter value must be positive; otherwise an 95 * <code>IllegalArgumentException</code> is thrown. 96 * 97 * @param s the exponent 98 * @exception IllegalArgumentException if s ≤ 0.0 99 */ 100 public void setExponent(final double s) 101 throws IllegalArgumentException { 102 if (s <= 0.0) { 103 throw MathRuntimeException.createIllegalArgumentException( 104 "invalid exponent {0} (must be positive)", 105 s); 106 } 107 this.exponent = s; 108 } 109 110 /** 111 * The probability mass function P(X = x) for a Zipf distribution. 112 * 113 * @param x the value at which the probability density function is evaluated. 114 * @return the value of the probability mass function at x 115 */ 116 public double probability(final int x) { 117 if (x <= 0 || x > getNumberOfElements()) { 118 return 0.0; 119 } 120 121 return (1.0 / Math.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent); 122 123 } 124 125 /** 126 * The probability distribution function P(X <= x) for a Zipf distribution. 127 * 128 * @param x the value at which the PDF is evaluated. 129 * @return Zipf distribution function evaluated at x 130 */ 131 @Override 132 public double cumulativeProbability(final int x) { 133 if (x <= 0) { 134 return 0.0; 135 } else if (x >= getNumberOfElements()) { 136 return 1.0; 137 } 138 139 return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent); 140 141 } 142 143 /** 144 * Access the domain value lower bound, based on <code>p</code>, used to 145 * bracket a PDF root. 146 * 147 * @param p the desired probability for the critical value 148 * @return domain value lower bound, i.e. 149 * P(X < <i>lower bound</i>) < <code>p</code> 150 */ 151 @Override 152 protected int getDomainLowerBound(final double p) { 153 return 0; 154 } 155 156 /** 157 * Access the domain value upper bound, based on <code>p</code>, used to 158 * bracket a PDF root. 159 * 160 * @param p the desired probability for the critical value 161 * @return domain value upper bound, i.e. 162 * P(X < <i>upper bound</i>) > <code>p</code> 163 */ 164 @Override 165 protected int getDomainUpperBound(final double p) { 166 return numberOfElements; 167 } 168 169 170 /** 171 * Calculates the Nth generalized harmonic number. See 172 * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic 173 * Series</a>. 174 * 175 * @param n the term in the series to calculate (must be ≥ 1) 176 * @param m the exponent; special case m == 1.0 is the harmonic series 177 * @return the nth generalized harmonic number 178 */ 179 private double generalizedHarmonic(final int n, final double m) { 180 double value = 0; 181 for (int k = n; k > 0; --k) { 182 value += 1.0 / Math.pow(k, m); 183 } 184 return value; 185 } 186 187 }