| %line | %branch | |||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| org.apache.commons.validator.UrlValidator |
|
|
| 1 | /* |
|
| 2 | * Licensed to the Apache Software Foundation (ASF) under one or more |
|
| 3 | * contributor license agreements. See the NOTICE file distributed with |
|
| 4 | * this work for additional information regarding copyright ownership. |
|
| 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 |
|
| 6 | * (the "License"); you may not use this file except in compliance with |
|
| 7 | * the License. You may obtain a copy of the License at |
|
| 8 | * |
|
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
| 10 | * |
|
| 11 | * Unless required by applicable law or agreed to in writing, software |
|
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 14 | * See the License for the specific language governing permissions and |
|
| 15 | * limitations under the License. |
|
| 16 | */ |
|
| 17 | package org.apache.commons.validator; |
|
| 18 | ||
| 19 | import java.io.Serializable; |
|
| 20 | import java.util.Arrays; |
|
| 21 | import java.util.HashSet; |
|
| 22 | import java.util.Set; |
|
| 23 | ||
| 24 | import org.apache.commons.validator.util.Flags; |
|
| 25 | import org.apache.oro.text.perl.Perl5Util; |
|
| 26 | ||
| 27 | /** |
|
| 28 | * <p>Validates URLs.</p> |
|
| 29 | * Behavour of validation is modified by passing in options: |
|
| 30 | * <li>ALLOW_2_SLASHES - [FALSE] Allows double '/' characters in the path |
|
| 31 | * component.</li> |
|
| 32 | * <li>NO_FRAGMENT- [FALSE] By default fragments are allowed, if this option is |
|
| 33 | * included then fragments are flagged as illegal.</li> |
|
| 34 | * <li>ALLOW_ALL_SCHEMES - [FALSE] By default only http, https, and ftp are |
|
| 35 | * considered valid schemes. Enabling this option will let any scheme pass validation.</li> |
|
| 36 | * |
|
| 37 | * <p>Originally based in on php script by Debbie Dyer, validation.php v1.2b, Date: 03/07/02, |
|
| 38 | * http://javascript.internet.com. However, this validation now bears little resemblance |
|
| 39 | * to the php original.</p> |
|
| 40 | * <pre> |
|
| 41 | * Example of usage: |
|
| 42 | * Construct a UrlValidator with valid schemes of "http", and "https". |
|
| 43 | * |
|
| 44 | * String[] schemes = {"http","https"}. |
|
| 45 | * UrlValidator urlValidator = new UrlValidator(schemes); |
|
| 46 | * if (urlValidator.isValid("ftp://foo.bar.com/")) { |
|
| 47 | * System.out.println("url is valid"); |
|
| 48 | * } else { |
|
| 49 | * System.out.println("url is invalid"); |
|
| 50 | * } |
|
| 51 | * |
|
| 52 | * prints "url is invalid" |
|
| 53 | * If instead the default constructor is used. |
|
| 54 | * |
|
| 55 | * UrlValidator urlValidator = new UrlValidator(); |
|
| 56 | * if (urlValidator.isValid("ftp://foo.bar.com/")) { |
|
| 57 | * System.out.println("url is valid"); |
|
| 58 | * } else { |
|
| 59 | * System.out.println("url is invalid"); |
|
| 60 | * } |
|
| 61 | * |
|
| 62 | * prints out "url is valid" |
|
| 63 | * </pre> |
|
| 64 | * |
|
| 65 | * @see |
|
| 66 | * <a href='http://www.ietf.org/rfc/rfc2396.txt' > |
|
| 67 | * Uniform Resource Identifiers (URI): Generic Syntax |
|
| 68 | * </a> |
|
| 69 | * |
|
| 70 | * @version $Revision: 478334 $ $Date: 2006-11-22 21:31:54 +0000 (Wed, 22 Nov 2006) $ |
|
| 71 | * @since Validator 1.1 |
|
| 72 | */ |
|
| 73 | public class UrlValidator implements Serializable { |
|
| 74 | ||
| 75 | /** |
|
| 76 | * Allows all validly formatted schemes to pass validation instead of |
|
| 77 | * supplying a set of valid schemes. |
|
| 78 | */ |
|
| 79 | public static final int ALLOW_ALL_SCHEMES = 1 << 0; |
|
| 80 | ||
| 81 | /** |
|
| 82 | * Allow two slashes in the path component of the URL. |
|
| 83 | */ |
|
| 84 | public static final int ALLOW_2_SLASHES = 1 << 1; |
|
| 85 | ||
| 86 | /** |
|
| 87 | * Enabling this options disallows any URL fragments. |
|
| 88 | */ |
|
| 89 | public static final int NO_FRAGMENTS = 1 << 2; |
|
| 90 | ||
| 91 | private static final String ALPHA_CHARS = "a-zA-Z"; |
|
| 92 | ||
| 93 | private static final String ALPHA_NUMERIC_CHARS = ALPHA_CHARS + "\\d"; |
|
| 94 | ||
| 95 | private static final String SPECIAL_CHARS = ";/@&=,.?:+$"; |
|
| 96 | ||
| 97 | private static final String VALID_CHARS = "[^\\s" + SPECIAL_CHARS + "]"; |
|
| 98 | ||
| 99 | private static final String SCHEME_CHARS = ALPHA_CHARS; |
|
| 100 | ||
| 101 | // Drop numeric, and "+-." for now |
|
| 102 | private static final String AUTHORITY_CHARS = ALPHA_NUMERIC_CHARS + "\\-\\."; |
|
| 103 | ||
| 104 | private static final String ATOM = VALID_CHARS + '+'; |
|
| 105 | ||
| 106 | /** |
|
| 107 | * This expression derived/taken from the BNF for URI (RFC2396). |
|
| 108 | */ |
|
| 109 | private static final String URL_PATTERN = |
|
| 110 | "/^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?/"; |
|
| 111 | // 12 3 4 5 6 7 8 9 |
|
| 112 | ||
| 113 | /** |
|
| 114 | * Schema/Protocol (ie. http:, ftp:, file:, etc). |
|
| 115 | */ |
|
| 116 | private static final int PARSE_URL_SCHEME = 2; |
|
| 117 | ||
| 118 | /** |
|
| 119 | * Includes hostname/ip and port number. |
|
| 120 | */ |
|
| 121 | private static final int PARSE_URL_AUTHORITY = 4; |
|
| 122 | ||
| 123 | private static final int PARSE_URL_PATH = 5; |
|
| 124 | ||
| 125 | private static final int PARSE_URL_QUERY = 7; |
|
| 126 | ||
| 127 | private static final int PARSE_URL_FRAGMENT = 9; |
|
| 128 | ||
| 129 | /** |
|
| 130 | * Protocol (ie. http:, ftp:,https:). |
|
| 131 | */ |
|
| 132 | private static final String SCHEME_PATTERN = "/^[" + SCHEME_CHARS + "]/"; |
|
| 133 | ||
| 134 | private static final String AUTHORITY_PATTERN = |
|
| 135 | "/^([" + AUTHORITY_CHARS + "]*)(:\\d*)?(.*)?/"; |
|
| 136 | // 1 2 3 4 |
|
| 137 | ||
| 138 | private static final int PARSE_AUTHORITY_HOST_IP = 1; |
|
| 139 | ||
| 140 | private static final int PARSE_AUTHORITY_PORT = 2; |
|
| 141 | ||
| 142 | /** |
|
| 143 | * Should always be empty. |
|
| 144 | */ |
|
| 145 | private static final int PARSE_AUTHORITY_EXTRA = 3; |
|
| 146 | ||
| 147 | private static final String PATH_PATTERN = "/^(/[-\\w:@&?=+,.!/~*'%$_;]*)?$/"; |
|
| 148 | ||
| 149 | private static final String QUERY_PATTERN = "/^(.*)$/"; |
|
| 150 | ||
| 151 | private static final String LEGAL_ASCII_PATTERN = "/^[\\000-\\177]+$/"; |
|
| 152 | ||
| 153 | private static final String IP_V4_DOMAIN_PATTERN = |
|
| 154 | "/^(\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})$/"; |
|
| 155 | ||
| 156 | private static final String DOMAIN_PATTERN = |
|
| 157 | "/^" + ATOM + "(\\." + ATOM + ")*$/"; |
|
| 158 | ||
| 159 | private static final String PORT_PATTERN = "/^:(\\d{1,5})$/"; |
|
| 160 | ||
| 161 | private static final String ATOM_PATTERN = "/(" + ATOM + ")/"; |
|
| 162 | ||
| 163 | private static final String ALPHA_PATTERN = "/^[" + ALPHA_CHARS + "]/"; |
|
| 164 | ||
| 165 | /** |
|
| 166 | * Holds the set of current validation options. |
|
| 167 | */ |
|
| 168 | 20 | private Flags options = null; |
| 169 | ||
| 170 | /** |
|
| 171 | * The set of schemes that are allowed to be in a URL. |
|
| 172 | */ |
|
| 173 | 20 | private Set allowedSchemes = new HashSet(); |
| 174 | ||
| 175 | /** |
|
| 176 | * If no schemes are provided, default to this set. |
|
| 177 | */ |
|
| 178 | 20 | protected String[] defaultSchemes = {"http", "https", "ftp"}; |
| 179 | ||
| 180 | /** |
|
| 181 | * Create a UrlValidator with default properties. |
|
| 182 | */ |
|
| 183 | public UrlValidator() { |
|
| 184 | 15 | this(null); |
| 185 | 15 | } |
| 186 | ||
| 187 | /** |
|
| 188 | * Behavior of validation is modified by passing in several strings options: |
|
| 189 | * @param schemes Pass in one or more url schemes to consider valid, passing in |
|
| 190 | * a null will default to "http,https,ftp" being valid. |
|
| 191 | * If a non-null schemes is specified then all valid schemes must |
|
| 192 | * be specified. Setting the ALLOW_ALL_SCHEMES option will |
|
| 193 | * ignore the contents of schemes. |
|
| 194 | */ |
|
| 195 | public UrlValidator(String[] schemes) { |
|
| 196 | 16 | this(schemes, 0); |
| 197 | 16 | } |
| 198 | ||
| 199 | /** |
|
| 200 | * Initialize a UrlValidator with the given validation options. |
|
| 201 | * @param options The options should be set using the public constants declared in |
|
| 202 | * this class. To set multiple options you simply add them together. For example, |
|
| 203 | * ALLOW_2_SLASHES + NO_FRAGMENTS enables both of those options. |
|
| 204 | */ |
|
| 205 | public UrlValidator(int options) { |
|
| 206 | 0 | this(null, options); |
| 207 | 0 | } |
| 208 | ||
| 209 | /** |
|
| 210 | * Behavour of validation is modified by passing in options: |
|
| 211 | * @param schemes The set of valid schemes. |
|
| 212 | * @param options The options should be set using the public constants declared in |
|
| 213 | * this class. To set multiple options you simply add them together. For example, |
|
| 214 | * ALLOW_2_SLASHES + NO_FRAGMENTS enables both of those options. |
|
| 215 | */ |
|
| 216 | 20 | public UrlValidator(String[] schemes, int options) { |
| 217 | 20 | this.options = new Flags(options); |
| 218 | ||
| 219 | 20 | if (this.options.isOn(ALLOW_ALL_SCHEMES)) { |
| 220 | 2 | return; |
| 221 | } |
|
| 222 | ||
| 223 | 18 | if (schemes == null) { |
| 224 | 15 | schemes = this.defaultSchemes; |
| 225 | } |
|
| 226 | ||
| 227 | 18 | this.allowedSchemes.addAll(Arrays.asList(schemes)); |
| 228 | 18 | } |
| 229 | ||
| 230 | /** |
|
| 231 | * <p>Checks if a field has a valid url address.</p> |
|
| 232 | * |
|
| 233 | * @param value The value validation is being performed on. A <code>null</code> |
|
| 234 | * value is considered invalid. |
|
| 235 | * @return true if the url is valid. |
|
| 236 | */ |
|
| 237 | public boolean isValid(String value) { |
|
| 238 | 75606 | if (value == null) { |
| 239 | 0 | return false; |
| 240 | } |
|
| 241 | ||
| 242 | 75606 | Perl5Util matchUrlPat = new Perl5Util(); |
| 243 | 75606 | Perl5Util matchAsciiPat = new Perl5Util(); |
| 244 | ||
| 245 | 75606 | if (!matchAsciiPat.match(LEGAL_ASCII_PATTERN, value)) { |
| 246 | 0 | return false; |
| 247 | } |
|
| 248 | ||
| 249 | // Check the whole url address structure |
|
| 250 | 75606 | if (!matchUrlPat.match(URL_PATTERN, value)) { |
| 251 | 0 | return false; |
| 252 | } |
|
| 253 | ||
| 254 | 75606 | if (!isValidScheme(matchUrlPat.group(PARSE_URL_SCHEME))) { |
| 255 | 28350 | return false; |
| 256 | } |
|
| 257 | ||
| 258 | 47256 | if (!isValidAuthority(matchUrlPat.group(PARSE_URL_AUTHORITY))) { |
| 259 | 39375 | return false; |
| 260 | } |
|
| 261 | ||
| 262 | 7881 | if (!isValidPath(matchUrlPat.group(PARSE_URL_PATH))) { |
| 263 | 2520 | return false; |
| 264 | } |
|
| 265 | ||
| 266 | 5361 | if (!isValidQuery(matchUrlPat.group(PARSE_URL_QUERY))) { |
| 267 | 0 | return false; |
| 268 | } |
|
| 269 | ||
| 270 | 5361 | if (!isValidFragment(matchUrlPat.group(PARSE_URL_FRAGMENT))) { |
| 271 | 630 | return false; |
| 272 | } |
|
| 273 | ||
| 274 | 4731 | return true; |
| 275 | } |
|
| 276 | ||
| 277 | /** |
|
| 278 | * Validate scheme. If schemes[] was initialized to a non null, |
|
| 279 | * then only those scheme's are allowed. Note this is slightly different |
|
| 280 | * than for the constructor. |
|
| 281 | * @param scheme The scheme to validate. A <code>null</code> value is considered |
|
| 282 | * invalid. |
|
| 283 | * @return true if valid. |
|
| 284 | */ |
|
| 285 | protected boolean isValidScheme(String scheme) { |
|
| 286 | 75610 | if (scheme == null) { |
| 287 | 18900 | return false; |
| 288 | } |
|
| 289 | ||
| 290 | 56710 | Perl5Util schemeMatcher = new Perl5Util(); |
| 291 | 56710 | if (!schemeMatcher.match(SCHEME_PATTERN, scheme)) { |
| 292 | 9450 | return false; |
| 293 | } |
|
| 294 | ||
| 295 | 47260 | if (this.options.isOff(ALLOW_ALL_SCHEMES)) { |
| 296 | ||
| 297 | 6 | if (!this.allowedSchemes.contains(scheme)) { |
| 298 | 3 | return false; |
| 299 | } |
|
| 300 | } |
|
| 301 | ||
| 302 | 47257 | return true; |
| 303 | } |
|
| 304 | ||
| 305 | /** |
|
| 306 | * Returns true if the authority is properly formatted. An authority is the combination |
|
| 307 | * of hostname and port. A <code>null</code> authority value is considered invalid. |
|
| 308 | * @param authority Authority value to validate. |
|
| 309 | * @return true if authority (hostname and port) is valid. |
|
| 310 | */ |
|
| 311 | protected boolean isValidAuthority(String authority) { |
|
| 312 | 47256 | if (authority == null) { |
| 313 | 18831 | return false; |
| 314 | } |
|
| 315 | ||
| 316 | 28425 | Perl5Util authorityMatcher = new Perl5Util(); |
| 317 | 28425 | Perl5Util matchIPV4Pat = new Perl5Util(); |
| 318 | ||
| 319 | 28425 | if (!authorityMatcher.match(AUTHORITY_PATTERN, authority)) { |
| 320 | 0 | return false; |
| 321 | } |
|
| 322 | ||
| 323 | 28425 | boolean ipV4Address = false; |
| 324 | 28425 | boolean hostname = false; |
| 325 | // check if authority is IP address or hostname |
|
| 326 | 28425 | String hostIP = authorityMatcher.group(PARSE_AUTHORITY_HOST_IP); |
| 327 | 28425 | ipV4Address = matchIPV4Pat.match(IP_V4_DOMAIN_PATTERN, hostIP); |
| 328 | ||
| 329 | 28425 | if (ipV4Address) { |
| 330 | // this is an IP address so check components |
|
| 331 | 17325 | for (int i = 1; i <= 4; i++) { |
| 332 | 14175 | String ipSegment = matchIPV4Pat.group(i); |
| 333 | 14175 | if (ipSegment == null || ipSegment.length() <= 0) { |
| 334 | 0 | return false; |
| 335 | } |
|
| 336 | ||
| 337 | try { |
|
| 338 | 14175 | if (Integer.parseInt(ipSegment) > 255) { |
| 339 | 1575 | return false; |
| 340 | } |
|
| 341 | 12600 | } catch(NumberFormatException e) { |
| 342 | 0 | return false; |
| 343 | } |
|
| 344 | ||
| 345 | } |
|
| 346 | } else { |
|
| 347 | // Domain is hostname name |
|
| 348 | 23700 | Perl5Util domainMatcher = new Perl5Util(); |
| 349 | 23700 | hostname = domainMatcher.match(DOMAIN_PATTERN, hostIP); |
| 350 | } |
|
| 351 | ||
| 352 | //rightmost hostname will never start with a digit. |
|
| 353 | 26850 | if (hostname) { |
| 354 | // LOW-TECH FIX FOR VALIDATOR-202 |
|
| 355 | // TODO: Rewrite to use ArrayList and .add semantics: see VALIDATOR-203 |
|
| 356 | 15789 | char[] class="keyword">chars = hostIP.toCharArray(); |
| 357 | 15789 | int size = 1; |
| 358 | 118890 | for(int i=0; i<chars.length; i++) { |
| 359 | 103101 | if(chars[i] == '.') { |
| 360 | 22119 | size++; |
| 361 | } |
|
| 362 | } |
|
| 363 | 15789 | String[] domainSegment = new String[size]; |
| 364 | 15789 | boolean match = true; |
| 365 | 15789 | int segmentCount = 0; |
| 366 | 15789 | int segmentLength = 0; |
| 367 | 15789 | Perl5Util atomMatcher = new Perl5Util(); |
| 368 | ||
| 369 | 85275 | while (match) { |
| 370 | 53697 | match = atomMatcher.match(ATOM_PATTERN, hostIP); |
| 371 | 53697 | if (match) { |
| 372 | 37908 | domainSegment[segmentCount] = atomMatcher.group(1); |
| 373 | 37908 | segmentLength = domainSegment[segmentCount].length() + 1; |
| 374 | 37908 | hostIP = |
| 375 | (segmentLength >= hostIP.length()) |
|
| 376 | ? "" |
|
| 377 | : hostIP.substring(segmentLength); |
|
| 378 | ||
| 379 | 37908 | segmentCount++; |
| 380 | } |
|
| 381 | } |
|
| 382 | 15789 | String topLevel = domainSegment[segmentCount - 1]; |
| 383 | 15789 | if (topLevel.length() < 2 || topLevel.length() > 4) { |
| 384 | 4749 | return false; |
| 385 | } |
|
| 386 | ||
| 387 | // First letter of top level must be a alpha |
|
| 388 | 11040 | Perl5Util alphaMatcher = new Perl5Util(); |
| 389 | 11040 | if (!alphaMatcher.match(ALPHA_PATTERN, topLevel.substring(0, 1))) { |
| 390 | 1575 | return false; |
| 391 | } |
|
| 392 | ||
| 393 | // Make sure there's a host name preceding the authority. |
|
| 394 | 9465 | if (segmentCount < 2) { |
| 395 | 1584 | return false; |
| 396 | } |
|
| 397 | } |
|
| 398 | ||
| 399 | 18942 | if (!hostname && !ipV4Address) { |
| 400 | 7911 | return false; |
| 401 | } |
|
| 402 | ||
| 403 | 11031 | String port = authorityMatcher.group(PARSE_AUTHORITY_PORT); |
| 404 | 11031 | if (port != null) { |
| 405 | 9450 | Perl5Util portMatcher = new Perl5Util(); |
| 406 | 9450 | if (!portMatcher.match(PORT_PATTERN, port)) { |
| 407 | 1575 | return false; |
| 408 | } |
|
| 409 | } |
|
| 410 | ||
| 411 | 9456 | String extra = authorityMatcher.group(PARSE_AUTHORITY_EXTRA); |
| 412 | 9456 | if (!GenericValidator.isBlankOrNull(extra)) { |
| 413 | 1575 | return false; |
| 414 | } |
|
| 415 | ||
| 416 | 7881 | return true; |
| 417 | } |
|
| 418 | ||
| 419 | /** |
|
| 420 | * Returns true if the path is valid. A <code>null</code> value is considered invalid. |
|
| 421 | * @param path Path value to validate. |
|
| 422 | * @return true if path is valid. |
|
| 423 | */ |
|
| 424 | protected boolean isValidPath(String path) { |
|
| 425 | 7881 | if (path == null) { |
| 426 | 0 | return false; |
| 427 | } |
|
| 428 | ||
| 429 | 7881 | Perl5Util pathMatcher = new Perl5Util(); |
| 430 | ||
| 431 | 7881 | if (!pathMatcher.match(PATH_PATTERN, path)) { |
| 432 | 0 | return false; |
| 433 | } |
|
| 434 | ||
| 435 | 7881 | int slash2Count = countToken("//", path); |
| 436 | 7881 | if (this.options.isOff(ALLOW_2_SLASHES) && (slash2Count > 0)) { |
| 437 | 630 | return false; |
| 438 | } |
|
| 439 | ||
| 440 | 7251 | int slashCount = countToken("/", path); |
| 441 | 7251 | int dot2Count = countToken("..", path); |
| 442 | 7251 | if (dot2Count > 0) { |
| 443 | 1890 | if ((slashCount - slash2Count - 1) <= dot2Count) { |
| 444 | 1890 | return false; |
| 445 | } |
|
| 446 | } |
|
| 447 | ||
| 448 | 5361 | return true; |
| 449 | } |
|
| 450 | ||
| 451 | /** |
|
| 452 | * Returns true if the query is null or it's a properly formatted query string. |
|
| 453 | * @param query Query value to validate. |
|
| 454 | * @return true if query is valid. |
|
| 455 | */ |
|
| 456 | protected boolean isValidQuery(String query) { |
|
| 457 | 5361 | if (query == null) { |
| 458 | 2211 | return true; |
| 459 | } |
|
| 460 | ||
| 461 | 3150 | Perl5Util queryMatcher = new Perl5Util(); |
| 462 | 3150 | return queryMatcher.match(QUERY_PATTERN, query); |
| 463 | } |
|
| 464 | ||
| 465 | /** |
|
| 466 | * Returns true if the given fragment is null or fragments are allowed. |
|
| 467 | * @param fragment Fragment value to validate. |
|
| 468 | * @return true if fragment is valid. |
|
| 469 | */ |
|
| 470 | protected boolean isValidFragment(String fragment) { |
|
| 471 | 5361 | if (fragment == null) { |
| 472 | 4731 | return true; |
| 473 | } |
|
| 474 | ||
| 475 | 630 | return this.options.isOff(NO_FRAGMENTS); |
| 476 | } |
|
| 477 | ||
| 478 | /** |
|
| 479 | * Returns the number of times the token appears in the target. |
|
| 480 | * @param token Token value to be counted. |
|
| 481 | * @param target Target value to count tokens in. |
|
| 482 | * @return the number of tokens. |
|
| 483 | */ |
|
| 484 | protected int countToken(String token, String target) { |
|
| 485 | 22383 | int tokenIndex = 0; |
| 486 | 22383 | int count = 0; |
| 487 | 81014 | while (tokenIndex != -1) { |
| 488 | 36248 | tokenIndex = target.indexOf(token, tokenIndex); |
| 489 | 36248 | if (tokenIndex > -1) { |
| 490 | 13865 | tokenIndex++; |
| 491 | 13865 | count++; |
| 492 | } |
|
| 493 | } |
|
| 494 | 22383 | return count; |
| 495 | } |
|
| 496 | } |
| This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |