001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.validator.routines;
018    
019    import java.io.Serializable;
020    import java.util.Arrays;
021    import java.util.List;
022    
023    /**
024     * <p><b>Domain name</b> validation routines.</p>
025     *
026     * <p>
027     * This validator provides methods for validating Internet domain names
028     * and top-level domains.
029     * </p>
030     *
031     * <p>Domain names are evaluated according
032     * to the standards <a href="http://www.ietf.org/rfc/rfc1034.txt">RFC1034</a>,
033     * section 3, and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC1123</a>,
034     * section 2.1. No accomodation is provided for the specialized needs of
035     * other applications; if the domain name has been URL-encoded, for example,
036     * validation will fail even though the equivalent plaintext version of the
037     * same name would have passed.
038     * </p>
039     *
040     * <p>
041     * Validation is also provided for top-level domains (TLDs) as defined and
042     * maintained by the Internet Assigned Numbers Authority (IANA):
043     * </p>
044     *
045     *   <ul>
046     *     <li>{@link #isValidInfrastructureTld} - validates infrastructure TLDs
047     *         (<code>.arpa</code>, etc.)</li>
048     *     <li>{@link #isValidGenericTld} - validates generic TLDs
049     *         (<code>.com, .org</code>, etc.)</li>
050     *     <li>{@link #isValidCountryCodeTld} - validates country code TLDs
051     *         (<code>.us, .uk, .cn</code>, etc.)</li>
052     *   </ul>
053     *
054     * <p>
055     * (<b>NOTE</b>: This class does not provide IP address lookup for domain names or
056     * methods to ensure that a given domain name matches a specific IP; see
057     * {@link java.net.InetAddress} for that functionality.)
058     * </p>
059     *
060     * @version $Revision: 600231 $ $Date: 2007-12-02 04:39:09 +0100 (So, 02. Dez 2007) $
061     * @since Validator 1.4
062     */
063    public class DomainValidator implements Serializable {
064    
065        // Regular expression strings for hostnames (derived from RFC2396 and RFC 1123)
066        private static final String DOMAIN_LABEL_REGEX = "\\p{Alnum}(?>[\\p{Alnum}-]*\\p{Alnum})*";
067        private static final String TOP_LABEL_REGEX = "\\p{Alpha}{2,}";
068        private static final String DOMAIN_NAME_REGEX =
069                "^(?:" + DOMAIN_LABEL_REGEX + "\\.)+" + "(" + TOP_LABEL_REGEX + ")$";
070    
071        /**
072         * Singleton instance of this validator.
073         */
074        private static final DomainValidator DOMAIN_VALIDATOR = new DomainValidator();
075    
076        /**
077         * RegexValidator for matching domains.
078         */
079        private final RegexValidator domainRegex =
080                new RegexValidator(DOMAIN_NAME_REGEX);
081    
082        /**
083         * Returns the singleton instance of this validator.
084         * @return the singleton instance of this validator
085         */
086        public static DomainValidator getInstance() {
087            return DOMAIN_VALIDATOR;
088        }
089    
090        /** Private constructor. */
091        private DomainValidator() {}
092    
093        /**
094         * Returns true if the specified <code>String</code> parses
095         * as a valid domain name with a recognized top-level domain.
096         * The parsing is case-sensitive.
097         * @param domain the parameter to check for domain name syntax
098         * @return true if the parameter is a valid domain name
099         */
100        public boolean isValid(String domain) {
101            String[] groups = domainRegex.match(domain);
102            if (groups != null && groups.length > 0) {
103                return isValidTld(groups[0]);
104            } else {
105                return false;
106            }
107        }
108    
109        /**
110         * Returns true if the specified <code>String</code> matches any
111         * IANA-defined top-level domain. Leading dots are ignored if present.
112         * The search is case-sensitive.
113         * @param tld the parameter to check for TLD status
114         * @return true if the parameter is a TLD
115         */
116        public boolean isValidTld(String tld) {
117            return isValidInfrastructureTld(tld)
118                    || isValidGenericTld(tld)
119                    || isValidCountryCodeTld(tld);
120        }
121    
122        /**
123         * Returns true if the specified <code>String</code> matches any
124         * IANA-defined infrastructure top-level domain. Leading dots are
125         * ignored if present. The search is case-sensitive.
126         * @param iTld the parameter to check for infrastructure TLD status
127         * @return true if the parameter is an infrastructure TLD
128         */
129        public boolean isValidInfrastructureTld(String iTld) {
130            return INFRASTRUCTURE_TLD_LIST.contains(chompLeadingDot(iTld.toLowerCase()));
131        }
132    
133        /**
134         * Returns true if the specified <code>String</code> matches any
135         * IANA-defined generic top-level domain. Leading dots are ignored
136         * if present. The search is case-sensitive.
137         * @param gTld the parameter to check for generic TLD status
138         * @return true if the parameter is a generic TLD
139         */
140        public boolean isValidGenericTld(String gTld) {
141            return GENERIC_TLD_LIST.contains(chompLeadingDot(gTld.toLowerCase()));
142        }
143    
144        /**
145         * Returns true if the specified <code>String</code> matches any
146         * IANA-defined country code top-level domain. Leading dots are
147         * ignored if present. The search is case-sensitive.
148         * @param ccTld the parameter to check for country code TLD status
149         * @return true if the parameter is a country code TLD
150         */
151        public boolean isValidCountryCodeTld(String ccTld) {
152            return COUNTRY_CODE_TLD_LIST.contains(chompLeadingDot(ccTld.toLowerCase()));
153        }
154    
155        private String chompLeadingDot(String str) {
156            if (str.startsWith(".")) {
157                return str.substring(1);
158            } else {
159                return str;
160            }
161        }
162    
163        // ---------------------------------------------
164        // ----- TLDs defined by IANA
165        // ----- Authoritative and comprehensive list at:
166        // ----- http://data.iana.org/TLD/tlds-alpha-by-domain.txt
167    
168        private static final String[] INFRASTRUCTURE_TLDS = new String[] {
169            "arpa",               // internet infrastructure
170            "root"                // diagnostic marker for non-truncated root zone
171        };
172    
173        private static final String[] GENERIC_TLDS = new String[] {
174            "aero",               // air transport industry
175            "asia",               // Pan-Asia/Asia Pacific
176            "biz",                // businesses
177            "cat",                // Catalan linguistic/cultural community
178            "com",                // commercial enterprises
179            "coop",               // cooperative associations
180            "info",               // informational sites
181            "jobs",               // Human Resource managers
182            "mobi",               // mobile products and services
183            "museum",             // museums, surprisingly enough
184            "name",               // individuals' sites
185            "net",                // internet support infrastructure/business
186            "org",                // noncommercial organizations
187            "pro",                // credentialed professionals and entities
188            "tel",                // contact data for businesses and individuals
189            "travel",             // entities in the travel industry
190            "gov",                // United States Government
191            "edu",                // accredited postsecondary US education entities
192            "mil",                // United States Military
193            "int"                 // organizations established by international treaty
194        };
195    
196        private static final String[] COUNTRY_CODE_TLDS = new String[] {
197            "ac",                 // Ascension Island
198            "ad",                 // Andorra
199            "ae",                 // United Arab Emirates
200            "af",                 // Afghanistan
201            "ag",                 // Antigua and Barbuda
202            "ai",                 // Anguilla
203            "al",                 // Albania
204            "am",                 // Armenia
205            "an",                 // Netherlands Antilles  
206            "ao",                 // Angola
207            "aq",                 // Antarctica
208            "ar",                 // Argentina
209            "as",                 // American Samoa
210            "at",                 // Austria
211            "au",                 // Australia (includes Ashmore and Cartier Islands and Coral Sea Islands)
212            "aw",                 // Aruba
213            "ax",                 // ??land
214            "az",                 // Azerbaijan
215            "ba",                 // Bosnia and Herzegovina
216            "bb",                 // Barbados
217            "bd",                 // Bangladesh
218            "be",                 // Belgium
219            "bf",                 // Burkina Faso
220            "bg",                 // Bulgaria
221            "bh",                 // Bahrain
222            "bi",                 // Burundi
223            "bj",                 // Benin
224            "bm",                 // Bermuda
225            "bn",                 // Brunei Darussalam
226            "bo",                 // Bolivia
227            "br",                 // Brazil
228            "bs",                 // Bahamas
229            "bt",                 // Bhutan
230            "bv",                 // Bouvet Island
231            "bw",                 // Botswana
232            "by",                 // Belarus
233            "bz",                 // Belize
234            "ca",                 // Canada
235            "cc",                 // Cocos (Keeling) Islands
236            "cd",                 // Democratic Republic of the Congo (formerly Zaire)
237            "cf",                 // Central African Republic
238            "cg",                 // Republic of the Congo
239            "ch",                 // Switzerland
240            "ci",                 // C??te d'Ivoire
241            "ck",                 // Cook Islands
242            "cl",                 // Chile
243            "cm",                 // Cameroon
244            "cn",                 // China, mainland
245            "co",                 // Colombia
246            "cr",                 // Costa Rica
247            "cu",                 // Cuba
248            "cv",                 // Cape Verde
249            "cx",                 // Christmas Island
250            "cy",                 // Cyprus
251            "cz",                 // Czech Republic
252            "de",                 // Germany
253            "dj",                 // Djibouti
254            "dk",                 // Denmark
255            "dm",                 // Dominica
256            "do",                 // Dominican Republic
257            "dz",                 // Algeria
258            "ec",                 // Ecuador
259            "ee",                 // Estonia
260            "eg",                 // Egypt
261            "er",                 // Eritrea
262            "es",                 // Spain
263            "et",                 // Ethiopia
264            "eu",                 // European Union
265            "fi",                 // Finland
266            "fj",                 // Fiji
267            "fk",                 // Falkland Islands
268            "fm",                 // Federated States of Micronesia
269            "fo",                 // Faroe Islands
270            "fr",                 // France
271            "ga",                 // Gabon
272            "gb",                 // Great Britain (United Kingdom)
273            "gd",                 // Grenada
274            "ge",                 // Georgia
275            "gf",                 // French Guiana
276            "gg",                 // Guernsey
277            "gh",                 // Ghana
278            "gi",                 // Gibraltar
279            "gl",                 // Greenland
280            "gm",                 // The Gambia
281            "gn",                 // Guinea
282            "gp",                 // Guadeloupe
283            "gq",                 // Equatorial Guinea
284            "gr",                 // Greece
285            "gs",                 // South Georgia and the South Sandwich Islands
286            "gt",                 // Guatemala
287            "gu",                 // Guam
288            "gw",                 // Guinea-Bissau
289            "gy",                 // Guyana
290            "hk",                 // Hong Kong
291            "hm",                 // Heard Island and McDonald Islands
292            "hn",                 // Honduras
293            "hr",                 // Croatia (Hrvatska)
294            "ht",                 // Haiti
295            "hu",                 // Hungary
296            "id",                 // Indonesia
297            "ie",                 // Ireland (??ire)
298            "il",                 // Israel
299            "im",                 // Isle of Man
300            "in",                 // India
301            "io",                 // British Indian Ocean Territory
302            "iq",                 // Iraq
303            "ir",                 // Iran
304            "is",                 // Iceland
305            "it",                 // Italy
306            "je",                 // Jersey
307            "jm",                 // Jamaica
308            "jo",                 // Jordan
309            "jp",                 // Japan
310            "ke",                 // Kenya
311            "kg",                 // Kyrgyzstan
312            "kh",                 // Cambodia (Khmer)
313            "ki",                 // Kiribati
314            "km",                 // Comoros
315            "kn",                 // Saint Kitts and Nevis
316            "kp",                 // North Korea
317            "kr",                 // South Korea
318            "kw",                 // Kuwait
319            "ky",                 // Cayman Islands
320            "kz",                 // Kazakhstan
321            "la",                 // Laos (currently being marketed as the official domain for Los Angeles)
322            "lb",                 // Lebanon
323            "lc",                 // Saint Lucia
324            "li",                 // Liechtenstein
325            "lk",                 // Sri Lanka
326            "lr",                 // Liberia
327            "ls",                 // Lesotho
328            "lt",                 // Lithuania
329            "lu",                 // Luxembourg
330            "lv",                 // Latvia
331            "ly",                 // Libya
332            "ma",                 // Morocco
333            "mc",                 // Monaco
334            "md",                 // Moldova
335            "me",                 // Montenegro
336            "mg",                 // Madagascar
337            "mh",                 // Marshall Islands
338            "mk",                 // Republic of Macedonia
339            "ml",                 // Mali
340            "mm",                 // Myanmar
341            "mn",                 // Mongolia
342            "mo",                 // Macau
343            "mp",                 // Northern Mariana Islands
344            "mq",                 // Martinique
345            "mr",                 // Mauritania
346            "ms",                 // Montserrat
347            "mt",                 // Malta
348            "mu",                 // Mauritius
349            "mv",                 // Maldives
350            "mw",                 // Malawi
351            "mx",                 // Mexico
352            "my",                 // Malaysia
353            "mz",                 // Mozambique
354            "na",                 // Namibia
355            "nc",                 // New Caledonia
356            "ne",                 // Niger
357            "nf",                 // Norfolk Island
358            "ng",                 // Nigeria
359            "ni",                 // Nicaragua
360            "nl",                 // Netherlands
361            "no",                 // Norway
362            "np",                 // Nepal
363            "nr",                 // Nauru
364            "nu",                 // Niue
365            "nz",                 // New Zealand
366            "om",                 // Oman
367            "pa",                 // Panama
368            "pe",                 // Peru
369            "pf",                 // French Polynesia With Clipperton Island
370            "pg",                 // Papua New Guinea
371            "ph",                 // Philippines
372            "pk",                 // Pakistan
373            "pl",                 // Poland
374            "pm",                 // Saint-Pierre and Miquelon
375            "pn",                 // Pitcairn Islands
376            "pr",                 // Puerto Rico
377            "ps",                 // Palestinian territories (PA-controlled West Bank and Gaza Strip)
378            "pt",                 // Portugal
379            "pw",                 // Palau
380            "py",                 // Paraguay
381            "qa",                 // Qatar
382            "re",                 // R??union
383            "ro",                 // Romania
384            "rs",                 // Serbia
385            "ru",                 // Russia
386            "rw",                 // Rwanda
387            "sa",                 // Saudi Arabia
388            "sb",                 // Solomon Islands
389            "sc",                 // Seychelles
390            "sd",                 // Sudan
391            "se",                 // Sweden
392            "sg",                 // Singapore
393            "sh",                 // Saint Helena
394            "si",                 // Slovenia
395            "sj",                 // Svalbard and Jan Mayen Islands Not in use (Norwegian dependencies; see .no)
396            "sk",                 // Slovakia
397            "sl",                 // Sierra Leone
398            "sm",                 // San Marino
399            "sn",                 // Senegal
400            "so",                 // Somalia
401            "sr",                 // Suriname
402            "st",                 // S??o Tom?? and Pr??ncipe
403            "su",                 // Soviet Union (deprecated)
404            "sv",                 // El Salvador
405            "sy",                 // Syria
406            "sz",                 // Swaziland
407            "tc",                 // Turks and Caicos Islands
408            "td",                 // Chad
409            "tf",                 // French Southern and Antarctic Lands
410            "tg",                 // Togo
411            "th",                 // Thailand
412            "tj",                 // Tajikistan
413            "tk",                 // Tokelau
414            "tl",                 // East Timor (deprecated old code)
415            "tm",                 // Turkmenistan
416            "tn",                 // Tunisia
417            "to",                 // Tonga
418            "tp",                 // East Timor
419            "tr",                 // Turkey
420            "tt",                 // Trinidad and Tobago
421            "tv",                 // Tuvalu
422            "tw",                 // Taiwan, Republic of China
423            "tz",                 // Tanzania
424            "ua",                 // Ukraine
425            "ug",                 // Uganda
426            "uk",                 // United Kingdom
427            "um",                 // United States Minor Outlying Islands
428            "us",                 // United States of America
429            "uy",                 // Uruguay
430            "uz",                 // Uzbekistan
431            "va",                 // Vatican City State
432            "vc",                 // Saint Vincent and the Grenadines
433            "ve",                 // Venezuela
434            "vg",                 // British Virgin Islands
435            "vi",                 // U.S. Virgin Islands
436            "vn",                 // Vietnam
437            "vu",                 // Vanuatu
438            "wf",                 // Wallis and Futuna
439            "ws",                 // Samoa (formerly Western Samoa)
440            "ye",                 // Yemen
441            "yt",                 // Mayotte
442            "yu",                 // Serbia and Montenegro (originally Yugoslavia)
443            "za",                 // South Africa
444            "zm",                 // Zambia
445            "zw",                 // Zimbabwe
446        };
447    
448        private static final List INFRASTRUCTURE_TLD_LIST = Arrays.asList(INFRASTRUCTURE_TLDS);
449        private static final List GENERIC_TLD_LIST = Arrays.asList(GENERIC_TLDS);
450        private static final List COUNTRY_CODE_TLD_LIST = Arrays.asList(COUNTRY_CODE_TLDS);
451    }