001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.validator.routines; 018 019 import java.io.Serializable; 020 import java.util.Arrays; 021 import java.util.List; 022 023 /** 024 * <p><b>Domain name</b> validation routines.</p> 025 * 026 * <p> 027 * This validator provides methods for validating Internet domain names 028 * and top-level domains. 029 * </p> 030 * 031 * <p>Domain names are evaluated according 032 * to the standards <a href="http://www.ietf.org/rfc/rfc1034.txt">RFC1034</a>, 033 * section 3, and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC1123</a>, 034 * section 2.1. No accomodation is provided for the specialized needs of 035 * other applications; if the domain name has been URL-encoded, for example, 036 * validation will fail even though the equivalent plaintext version of the 037 * same name would have passed. 038 * </p> 039 * 040 * <p> 041 * Validation is also provided for top-level domains (TLDs) as defined and 042 * maintained by the Internet Assigned Numbers Authority (IANA): 043 * </p> 044 * 045 * <ul> 046 * <li>{@link #isValidInfrastructureTld} - validates infrastructure TLDs 047 * (<code>.arpa</code>, etc.)</li> 048 * <li>{@link #isValidGenericTld} - validates generic TLDs 049 * (<code>.com, .org</code>, etc.)</li> 050 * <li>{@link #isValidCountryCodeTld} - validates country code TLDs 051 * (<code>.us, .uk, .cn</code>, etc.)</li> 052 * </ul> 053 * 054 * <p> 055 * (<b>NOTE</b>: This class does not provide IP address lookup for domain names or 056 * methods to ensure that a given domain name matches a specific IP; see 057 * {@link java.net.InetAddress} for that functionality.) 058 * </p> 059 * 060 * @version $Revision: 600231 $ $Date: 2007-12-02 04:39:09 +0100 (So, 02. Dez 2007) $ 061 * @since Validator 1.4 062 */ 063 public class DomainValidator implements Serializable { 064 065 // Regular expression strings for hostnames (derived from RFC2396 and RFC 1123) 066 private static final String DOMAIN_LABEL_REGEX = "\\p{Alnum}(?>[\\p{Alnum}-]*\\p{Alnum})*"; 067 private static final String TOP_LABEL_REGEX = "\\p{Alpha}{2,}"; 068 private static final String DOMAIN_NAME_REGEX = 069 "^(?:" + DOMAIN_LABEL_REGEX + "\\.)+" + "(" + TOP_LABEL_REGEX + ")$"; 070 071 /** 072 * Singleton instance of this validator. 073 */ 074 private static final DomainValidator DOMAIN_VALIDATOR = new DomainValidator(); 075 076 /** 077 * RegexValidator for matching domains. 078 */ 079 private final RegexValidator domainRegex = 080 new RegexValidator(DOMAIN_NAME_REGEX); 081 082 /** 083 * Returns the singleton instance of this validator. 084 * @return the singleton instance of this validator 085 */ 086 public static DomainValidator getInstance() { 087 return DOMAIN_VALIDATOR; 088 } 089 090 /** Private constructor. */ 091 private DomainValidator() {} 092 093 /** 094 * Returns true if the specified <code>String</code> parses 095 * as a valid domain name with a recognized top-level domain. 096 * The parsing is case-sensitive. 097 * @param domain the parameter to check for domain name syntax 098 * @return true if the parameter is a valid domain name 099 */ 100 public boolean isValid(String domain) { 101 String[] groups = domainRegex.match(domain); 102 if (groups != null && groups.length > 0) { 103 return isValidTld(groups[0]); 104 } else { 105 return false; 106 } 107 } 108 109 /** 110 * Returns true if the specified <code>String</code> matches any 111 * IANA-defined top-level domain. Leading dots are ignored if present. 112 * The search is case-sensitive. 113 * @param tld the parameter to check for TLD status 114 * @return true if the parameter is a TLD 115 */ 116 public boolean isValidTld(String tld) { 117 return isValidInfrastructureTld(tld) 118 || isValidGenericTld(tld) 119 || isValidCountryCodeTld(tld); 120 } 121 122 /** 123 * Returns true if the specified <code>String</code> matches any 124 * IANA-defined infrastructure top-level domain. Leading dots are 125 * ignored if present. The search is case-sensitive. 126 * @param iTld the parameter to check for infrastructure TLD status 127 * @return true if the parameter is an infrastructure TLD 128 */ 129 public boolean isValidInfrastructureTld(String iTld) { 130 return INFRASTRUCTURE_TLD_LIST.contains(chompLeadingDot(iTld.toLowerCase())); 131 } 132 133 /** 134 * Returns true if the specified <code>String</code> matches any 135 * IANA-defined generic top-level domain. Leading dots are ignored 136 * if present. The search is case-sensitive. 137 * @param gTld the parameter to check for generic TLD status 138 * @return true if the parameter is a generic TLD 139 */ 140 public boolean isValidGenericTld(String gTld) { 141 return GENERIC_TLD_LIST.contains(chompLeadingDot(gTld.toLowerCase())); 142 } 143 144 /** 145 * Returns true if the specified <code>String</code> matches any 146 * IANA-defined country code top-level domain. Leading dots are 147 * ignored if present. The search is case-sensitive. 148 * @param ccTld the parameter to check for country code TLD status 149 * @return true if the parameter is a country code TLD 150 */ 151 public boolean isValidCountryCodeTld(String ccTld) { 152 return COUNTRY_CODE_TLD_LIST.contains(chompLeadingDot(ccTld.toLowerCase())); 153 } 154 155 private String chompLeadingDot(String str) { 156 if (str.startsWith(".")) { 157 return str.substring(1); 158 } else { 159 return str; 160 } 161 } 162 163 // --------------------------------------------- 164 // ----- TLDs defined by IANA 165 // ----- Authoritative and comprehensive list at: 166 // ----- http://data.iana.org/TLD/tlds-alpha-by-domain.txt 167 168 private static final String[] INFRASTRUCTURE_TLDS = new String[] { 169 "arpa", // internet infrastructure 170 "root" // diagnostic marker for non-truncated root zone 171 }; 172 173 private static final String[] GENERIC_TLDS = new String[] { 174 "aero", // air transport industry 175 "asia", // Pan-Asia/Asia Pacific 176 "biz", // businesses 177 "cat", // Catalan linguistic/cultural community 178 "com", // commercial enterprises 179 "coop", // cooperative associations 180 "info", // informational sites 181 "jobs", // Human Resource managers 182 "mobi", // mobile products and services 183 "museum", // museums, surprisingly enough 184 "name", // individuals' sites 185 "net", // internet support infrastructure/business 186 "org", // noncommercial organizations 187 "pro", // credentialed professionals and entities 188 "tel", // contact data for businesses and individuals 189 "travel", // entities in the travel industry 190 "gov", // United States Government 191 "edu", // accredited postsecondary US education entities 192 "mil", // United States Military 193 "int" // organizations established by international treaty 194 }; 195 196 private static final String[] COUNTRY_CODE_TLDS = new String[] { 197 "ac", // Ascension Island 198 "ad", // Andorra 199 "ae", // United Arab Emirates 200 "af", // Afghanistan 201 "ag", // Antigua and Barbuda 202 "ai", // Anguilla 203 "al", // Albania 204 "am", // Armenia 205 "an", // Netherlands Antilles 206 "ao", // Angola 207 "aq", // Antarctica 208 "ar", // Argentina 209 "as", // American Samoa 210 "at", // Austria 211 "au", // Australia (includes Ashmore and Cartier Islands and Coral Sea Islands) 212 "aw", // Aruba 213 "ax", // ??land 214 "az", // Azerbaijan 215 "ba", // Bosnia and Herzegovina 216 "bb", // Barbados 217 "bd", // Bangladesh 218 "be", // Belgium 219 "bf", // Burkina Faso 220 "bg", // Bulgaria 221 "bh", // Bahrain 222 "bi", // Burundi 223 "bj", // Benin 224 "bm", // Bermuda 225 "bn", // Brunei Darussalam 226 "bo", // Bolivia 227 "br", // Brazil 228 "bs", // Bahamas 229 "bt", // Bhutan 230 "bv", // Bouvet Island 231 "bw", // Botswana 232 "by", // Belarus 233 "bz", // Belize 234 "ca", // Canada 235 "cc", // Cocos (Keeling) Islands 236 "cd", // Democratic Republic of the Congo (formerly Zaire) 237 "cf", // Central African Republic 238 "cg", // Republic of the Congo 239 "ch", // Switzerland 240 "ci", // C??te d'Ivoire 241 "ck", // Cook Islands 242 "cl", // Chile 243 "cm", // Cameroon 244 "cn", // China, mainland 245 "co", // Colombia 246 "cr", // Costa Rica 247 "cu", // Cuba 248 "cv", // Cape Verde 249 "cx", // Christmas Island 250 "cy", // Cyprus 251 "cz", // Czech Republic 252 "de", // Germany 253 "dj", // Djibouti 254 "dk", // Denmark 255 "dm", // Dominica 256 "do", // Dominican Republic 257 "dz", // Algeria 258 "ec", // Ecuador 259 "ee", // Estonia 260 "eg", // Egypt 261 "er", // Eritrea 262 "es", // Spain 263 "et", // Ethiopia 264 "eu", // European Union 265 "fi", // Finland 266 "fj", // Fiji 267 "fk", // Falkland Islands 268 "fm", // Federated States of Micronesia 269 "fo", // Faroe Islands 270 "fr", // France 271 "ga", // Gabon 272 "gb", // Great Britain (United Kingdom) 273 "gd", // Grenada 274 "ge", // Georgia 275 "gf", // French Guiana 276 "gg", // Guernsey 277 "gh", // Ghana 278 "gi", // Gibraltar 279 "gl", // Greenland 280 "gm", // The Gambia 281 "gn", // Guinea 282 "gp", // Guadeloupe 283 "gq", // Equatorial Guinea 284 "gr", // Greece 285 "gs", // South Georgia and the South Sandwich Islands 286 "gt", // Guatemala 287 "gu", // Guam 288 "gw", // Guinea-Bissau 289 "gy", // Guyana 290 "hk", // Hong Kong 291 "hm", // Heard Island and McDonald Islands 292 "hn", // Honduras 293 "hr", // Croatia (Hrvatska) 294 "ht", // Haiti 295 "hu", // Hungary 296 "id", // Indonesia 297 "ie", // Ireland (??ire) 298 "il", // Israel 299 "im", // Isle of Man 300 "in", // India 301 "io", // British Indian Ocean Territory 302 "iq", // Iraq 303 "ir", // Iran 304 "is", // Iceland 305 "it", // Italy 306 "je", // Jersey 307 "jm", // Jamaica 308 "jo", // Jordan 309 "jp", // Japan 310 "ke", // Kenya 311 "kg", // Kyrgyzstan 312 "kh", // Cambodia (Khmer) 313 "ki", // Kiribati 314 "km", // Comoros 315 "kn", // Saint Kitts and Nevis 316 "kp", // North Korea 317 "kr", // South Korea 318 "kw", // Kuwait 319 "ky", // Cayman Islands 320 "kz", // Kazakhstan 321 "la", // Laos (currently being marketed as the official domain for Los Angeles) 322 "lb", // Lebanon 323 "lc", // Saint Lucia 324 "li", // Liechtenstein 325 "lk", // Sri Lanka 326 "lr", // Liberia 327 "ls", // Lesotho 328 "lt", // Lithuania 329 "lu", // Luxembourg 330 "lv", // Latvia 331 "ly", // Libya 332 "ma", // Morocco 333 "mc", // Monaco 334 "md", // Moldova 335 "me", // Montenegro 336 "mg", // Madagascar 337 "mh", // Marshall Islands 338 "mk", // Republic of Macedonia 339 "ml", // Mali 340 "mm", // Myanmar 341 "mn", // Mongolia 342 "mo", // Macau 343 "mp", // Northern Mariana Islands 344 "mq", // Martinique 345 "mr", // Mauritania 346 "ms", // Montserrat 347 "mt", // Malta 348 "mu", // Mauritius 349 "mv", // Maldives 350 "mw", // Malawi 351 "mx", // Mexico 352 "my", // Malaysia 353 "mz", // Mozambique 354 "na", // Namibia 355 "nc", // New Caledonia 356 "ne", // Niger 357 "nf", // Norfolk Island 358 "ng", // Nigeria 359 "ni", // Nicaragua 360 "nl", // Netherlands 361 "no", // Norway 362 "np", // Nepal 363 "nr", // Nauru 364 "nu", // Niue 365 "nz", // New Zealand 366 "om", // Oman 367 "pa", // Panama 368 "pe", // Peru 369 "pf", // French Polynesia With Clipperton Island 370 "pg", // Papua New Guinea 371 "ph", // Philippines 372 "pk", // Pakistan 373 "pl", // Poland 374 "pm", // Saint-Pierre and Miquelon 375 "pn", // Pitcairn Islands 376 "pr", // Puerto Rico 377 "ps", // Palestinian territories (PA-controlled West Bank and Gaza Strip) 378 "pt", // Portugal 379 "pw", // Palau 380 "py", // Paraguay 381 "qa", // Qatar 382 "re", // R??union 383 "ro", // Romania 384 "rs", // Serbia 385 "ru", // Russia 386 "rw", // Rwanda 387 "sa", // Saudi Arabia 388 "sb", // Solomon Islands 389 "sc", // Seychelles 390 "sd", // Sudan 391 "se", // Sweden 392 "sg", // Singapore 393 "sh", // Saint Helena 394 "si", // Slovenia 395 "sj", // Svalbard and Jan Mayen Islands Not in use (Norwegian dependencies; see .no) 396 "sk", // Slovakia 397 "sl", // Sierra Leone 398 "sm", // San Marino 399 "sn", // Senegal 400 "so", // Somalia 401 "sr", // Suriname 402 "st", // S??o Tom?? and Pr??ncipe 403 "su", // Soviet Union (deprecated) 404 "sv", // El Salvador 405 "sy", // Syria 406 "sz", // Swaziland 407 "tc", // Turks and Caicos Islands 408 "td", // Chad 409 "tf", // French Southern and Antarctic Lands 410 "tg", // Togo 411 "th", // Thailand 412 "tj", // Tajikistan 413 "tk", // Tokelau 414 "tl", // East Timor (deprecated old code) 415 "tm", // Turkmenistan 416 "tn", // Tunisia 417 "to", // Tonga 418 "tp", // East Timor 419 "tr", // Turkey 420 "tt", // Trinidad and Tobago 421 "tv", // Tuvalu 422 "tw", // Taiwan, Republic of China 423 "tz", // Tanzania 424 "ua", // Ukraine 425 "ug", // Uganda 426 "uk", // United Kingdom 427 "um", // United States Minor Outlying Islands 428 "us", // United States of America 429 "uy", // Uruguay 430 "uz", // Uzbekistan 431 "va", // Vatican City State 432 "vc", // Saint Vincent and the Grenadines 433 "ve", // Venezuela 434 "vg", // British Virgin Islands 435 "vi", // U.S. Virgin Islands 436 "vn", // Vietnam 437 "vu", // Vanuatu 438 "wf", // Wallis and Futuna 439 "ws", // Samoa (formerly Western Samoa) 440 "ye", // Yemen 441 "yt", // Mayotte 442 "yu", // Serbia and Montenegro (originally Yugoslavia) 443 "za", // South Africa 444 "zm", // Zambia 445 "zw", // Zimbabwe 446 }; 447 448 private static final List INFRASTRUCTURE_TLD_LIST = Arrays.asList(INFRASTRUCTURE_TLDS); 449 private static final List GENERIC_TLD_LIST = Arrays.asList(GENERIC_TLDS); 450 private static final List COUNTRY_CODE_TLD_LIST = Arrays.asList(COUNTRY_CODE_TLDS); 451 }