| %line | %branch | |||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| org.apache.commons.validator.UrlValidator |
|
|
| 1 | /* |
|
| 2 | * $Id: UrlValidator.java 327148 2005-10-21 10:50:20Z niallp $ |
|
| 3 | * $Rev: 327148 $ |
|
| 4 | * $Date: 2005-10-21 11:50:20 +0100 (Fri, 21 Oct 2005) $ |
|
| 5 | * |
|
| 6 | * ==================================================================== |
|
| 7 | * Copyright 2001-2005 The Apache Software Foundation |
|
| 8 | * |
|
| 9 | * Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 10 | * you may not use this file except in compliance with the License. |
|
| 11 | * You may obtain a copy of the License at |
|
| 12 | * |
|
| 13 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
| 14 | * |
|
| 15 | * Unless required by applicable law or agreed to in writing, software |
|
| 16 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
| 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 18 | * See the License for the specific language governing permissions and |
|
| 19 | * limitations under the License. |
|
| 20 | */ |
|
| 21 | ||
| 22 | package org.apache.commons.validator; |
|
| 23 | ||
| 24 | import java.io.Serializable; |
|
| 25 | import java.util.Arrays; |
|
| 26 | import java.util.HashSet; |
|
| 27 | import java.util.Set; |
|
| 28 | ||
| 29 | import org.apache.commons.validator.util.Flags; |
|
| 30 | import org.apache.oro.text.perl.Perl5Util; |
|
| 31 | ||
| 32 | /** |
|
| 33 | * <p>Validates URLs.</p> |
|
| 34 | * Behavour of validation is modified by passing in options: |
|
| 35 | * <li>ALLOW_2_SLASHES - [FALSE] Allows double '/' characters in the path |
|
| 36 | * component.</li> |
|
| 37 | * <li>NO_FRAGMENT- [FALSE] By default fragments are allowed, if this option is |
|
| 38 | * included then fragments are flagged as illegal.</li> |
|
| 39 | * <li>ALLOW_ALL_SCHEMES - [FALSE] By default only http, https, and ftp are |
|
| 40 | * considered valid schemes. Enabling this option will let any scheme pass validation.</li> |
|
| 41 | * |
|
| 42 | * <p>Originally based in on php script by Debbie Dyer, validation.php v1.2b, Date: 03/07/02, |
|
| 43 | * http://javascript.internet.com. However, this validation now bears little resemblance |
|
| 44 | * to the php original.</p> |
|
| 45 | * <pre> |
|
| 46 | * Example of usage: |
|
| 47 | * Construct a UrlValidator with valid schemes of "http", and "https". |
|
| 48 | * |
|
| 49 | * String[] schemes = {"http","https"}. |
|
| 50 | * UrlValidator urlValidator = new UrlValidator(schemes); |
|
| 51 | * if (urlValidator.isValid("ftp://foo.bar.com/")) { |
|
| 52 | * System.out.println("url is valid"); |
|
| 53 | * } else { |
|
| 54 | * System.out.println("url is invalid"); |
|
| 55 | * } |
|
| 56 | * |
|
| 57 | * prints "url is invalid" |
|
| 58 | * If instead the default constructor is used. |
|
| 59 | * |
|
| 60 | * UrlValidator urlValidator = new UrlValidator(); |
|
| 61 | * if (urlValidator.isValid("ftp://foo.bar.com/")) { |
|
| 62 | * System.out.println("url is valid"); |
|
| 63 | * } else { |
|
| 64 | * System.out.println("url is invalid"); |
|
| 65 | * } |
|
| 66 | * |
|
| 67 | * prints out "url is valid" |
|
| 68 | * </pre> |
|
| 69 | * |
|
| 70 | * @see |
|
| 71 | * <a href='http://www.ietf.org/rfc/rfc2396.txt' > |
|
| 72 | * Uniform Resource Identifiers (URI): Generic Syntax |
|
| 73 | * </a> |
|
| 74 | * |
|
| 75 | * @since Validator 1.1 |
|
| 76 | */ |
|
| 77 | public class UrlValidator implements Serializable { |
|
| 78 | ||
| 79 | /** |
|
| 80 | * Allows all validly formatted schemes to pass validation instead of |
|
| 81 | * supplying a set of valid schemes. |
|
| 82 | */ |
|
| 83 | public static final int ALLOW_ALL_SCHEMES = 1 << 0; |
|
| 84 | ||
| 85 | /** |
|
| 86 | * Allow two slashes in the path component of the URL. |
|
| 87 | */ |
|
| 88 | public static final int ALLOW_2_SLASHES = 1 << 1; |
|
| 89 | ||
| 90 | /** |
|
| 91 | * Enabling this options disallows any URL fragments. |
|
| 92 | */ |
|
| 93 | public static final int NO_FRAGMENTS = 1 << 2; |
|
| 94 | ||
| 95 | private static final String ALPHA_CHARS = "a-zA-Z"; |
|
| 96 | ||
| 97 | private static final String ALPHA_NUMERIC_CHARS = ALPHA_CHARS + "\\d"; |
|
| 98 | ||
| 99 | private static final String SPECIAL_CHARS = ";/@&=,.?:+$"; |
|
| 100 | ||
| 101 | private static final String VALID_CHARS = "[^\\s" + SPECIAL_CHARS + "]"; |
|
| 102 | ||
| 103 | private static final String SCHEME_CHARS = ALPHA_CHARS; |
|
| 104 | ||
| 105 | // Drop numeric, and "+-." for now |
|
| 106 | private static final String AUTHORITY_CHARS = ALPHA_NUMERIC_CHARS + "\\-\\."; |
|
| 107 | ||
| 108 | private static final String ATOM = VALID_CHARS + '+'; |
|
| 109 | ||
| 110 | /** |
|
| 111 | * This expression derived/taken from the BNF for URI (RFC2396). |
|
| 112 | */ |
|
| 113 | private static final String URL_PATTERN = |
|
| 114 | "/^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?/"; |
|
| 115 | // 12 3 4 5 6 7 8 9 |
|
| 116 | ||
| 117 | /** |
|
| 118 | * Schema/Protocol (ie. http:, ftp:, file:, etc). |
|
| 119 | */ |
|
| 120 | private static final int PARSE_URL_SCHEME = 2; |
|
| 121 | ||
| 122 | /** |
|
| 123 | * Includes hostname/ip and port number. |
|
| 124 | */ |
|
| 125 | private static final int PARSE_URL_AUTHORITY = 4; |
|
| 126 | ||
| 127 | private static final int PARSE_URL_PATH = 5; |
|
| 128 | ||
| 129 | private static final int PARSE_URL_QUERY = 7; |
|
| 130 | ||
| 131 | private static final int PARSE_URL_FRAGMENT = 9; |
|
| 132 | ||
| 133 | /** |
|
| 134 | * Protocol (ie. http:, ftp:,https:). |
|
| 135 | */ |
|
| 136 | private static final String SCHEME_PATTERN = "/^[" + SCHEME_CHARS + "]/"; |
|
| 137 | ||
| 138 | private static final String AUTHORITY_PATTERN = |
|
| 139 | "/^([" + AUTHORITY_CHARS + "]*)(:\\d*)?(.*)?/"; |
|
| 140 | // 1 2 3 4 |
|
| 141 | ||
| 142 | private static final int PARSE_AUTHORITY_HOST_IP = 1; |
|
| 143 | ||
| 144 | private static final int PARSE_AUTHORITY_PORT = 2; |
|
| 145 | ||
| 146 | /** |
|
| 147 | * Should always be empty. |
|
| 148 | */ |
|
| 149 | private static final int PARSE_AUTHORITY_EXTRA = 3; |
|
| 150 | ||
| 151 | private static final String PATH_PATTERN = "/^(/[-\\w:@&?=+,.!/~*'%$]*)?$/"; |
|
| 152 | ||
| 153 | private static final String QUERY_PATTERN = "/^(.*)$/"; |
|
| 154 | ||
| 155 | private static final String LEGAL_ASCII_PATTERN = "/^[\\000-\\177]+$/"; |
|
| 156 | ||
| 157 | private static final String IP_V4_DOMAIN_PATTERN = |
|
| 158 | "/^(\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})$/"; |
|
| 159 | ||
| 160 | private static final String DOMAIN_PATTERN = |
|
| 161 | "/^" + ATOM + "(\\." + ATOM + ")*$/"; |
|
| 162 | ||
| 163 | private static final String PORT_PATTERN = "/^:(\\d{1,5})$/"; |
|
| 164 | ||
| 165 | private static final String ATOM_PATTERN = "/(" + ATOM + ")/"; |
|
| 166 | ||
| 167 | private static final String ALPHA_PATTERN = "/^[" + ALPHA_CHARS + "]/"; |
|
| 168 | ||
| 169 | /** |
|
| 170 | * Holds the set of current validation options. |
|
| 171 | */ |
|
| 172 | 18 | private Flags options = null; |
| 173 | ||
| 174 | /** |
|
| 175 | * The set of schemes that are allowed to be in a URL. |
|
| 176 | */ |
|
| 177 | 18 | private Set allowedSchemes = new HashSet(); |
| 178 | ||
| 179 | /** |
|
| 180 | * If no schemes are provided, default to this set. |
|
| 181 | */ |
|
| 182 | 18 | protected String[] defaultSchemes = {"http", "https", "ftp"}; |
| 183 | ||
| 184 | /** |
|
| 185 | * Create a UrlValidator with default properties. |
|
| 186 | */ |
|
| 187 | public UrlValidator() { |
|
| 188 | 15 | this(null); |
| 189 | 15 | } |
| 190 | ||
| 191 | /** |
|
| 192 | * Behavior of validation is modified by passing in several strings options: |
|
| 193 | * @param schemes Pass in one or more url schemes to consider valid, passing in |
|
| 194 | * a null will default to "http,https,ftp" being valid. |
|
| 195 | * If a non-null schemes is specified then all valid schemes must |
|
| 196 | * be specified. Setting the ALLOW_ALL_SCHEMES option will |
|
| 197 | * ignore the contents of schemes. |
|
| 198 | */ |
|
| 199 | public UrlValidator(String[] schemes) { |
|
| 200 | 15 | this(schemes, 0); |
| 201 | 15 | } |
| 202 | ||
| 203 | /** |
|
| 204 | * Initialize a UrlValidator with the given validation options. |
|
| 205 | * @param options The options should be set using the public constants declared in |
|
| 206 | * this class. To set multiple options you simply add them together. For example, |
|
| 207 | * ALLOW_2_SLASHES + NO_FRAGMENTS enables both of those options. |
|
| 208 | */ |
|
| 209 | public UrlValidator(int options) { |
|
| 210 | 0 | this(null, options); |
| 211 | 0 | } |
| 212 | ||
| 213 | /** |
|
| 214 | * Behavour of validation is modified by passing in options: |
|
| 215 | * @param schemes The set of valid schemes. |
|
| 216 | * @param options The options should be set using the public constants declared in |
|
| 217 | * this class. To set multiple options you simply add them together. For example, |
|
| 218 | * ALLOW_2_SLASHES + NO_FRAGMENTS enables both of those options. |
|
| 219 | */ |
|
| 220 | 18 | public UrlValidator(String[] schemes, int options) { |
| 221 | 18 | this.options = new Flags(options); |
| 222 | ||
| 223 | 18 | if (this.options.isOn(ALLOW_ALL_SCHEMES)) { |
| 224 | 2 | return; |
| 225 | } |
|
| 226 | ||
| 227 | 16 | if (schemes == null) { |
| 228 | 15 | schemes = this.defaultSchemes; |
| 229 | } |
|
| 230 | ||
| 231 | 16 | this.allowedSchemes.addAll(Arrays.asList(schemes)); |
| 232 | 16 | } |
| 233 | ||
| 234 | /** |
|
| 235 | * <p>Checks if a field has a valid url address.</p> |
|
| 236 | * |
|
| 237 | * @param value The value validation is being performed on. A <code>null</code> |
|
| 238 | * value is considered invalid. |
|
| 239 | * @return true if the url is valid. |
|
| 240 | */ |
|
| 241 | public boolean isValid(String value) { |
|
| 242 | 75604 | if (value == null) { |
| 243 | 0 | return false; |
| 244 | } |
|
| 245 | ||
| 246 | 75604 | Perl5Util matchUrlPat = new Perl5Util(); |
| 247 | 75604 | Perl5Util matchAsciiPat = new Perl5Util(); |
| 248 | ||
| 249 | 75604 | if (!matchAsciiPat.match(LEGAL_ASCII_PATTERN, value)) { |
| 250 | 0 | return false; |
| 251 | } |
|
| 252 | ||
| 253 | // Check the whole url address structure |
|
| 254 | 75604 | if (!matchUrlPat.match(URL_PATTERN, value)) { |
| 255 | 0 | return false; |
| 256 | } |
|
| 257 | ||
| 258 | 75604 | if (!isValidScheme(matchUrlPat.group(PARSE_URL_SCHEME))) { |
| 259 | 28350 | return false; |
| 260 | } |
|
| 261 | ||
| 262 | 47254 | if (!isValidAuthority(matchUrlPat.group(PARSE_URL_AUTHORITY))) { |
| 263 | 39375 | return false; |
| 264 | } |
|
| 265 | ||
| 266 | 7879 | if (!isValidPath(matchUrlPat.group(PARSE_URL_PATH))) { |
| 267 | 2520 | return false; |
| 268 | } |
|
| 269 | ||
| 270 | 5359 | if (!isValidQuery(matchUrlPat.group(PARSE_URL_QUERY))) { |
| 271 | 0 | return false; |
| 272 | } |
|
| 273 | ||
| 274 | 5359 | if (!isValidFragment(matchUrlPat.group(PARSE_URL_FRAGMENT))) { |
| 275 | 630 | return false; |
| 276 | } |
|
| 277 | ||
| 278 | 4729 | return true; |
| 279 | } |
|
| 280 | ||
| 281 | /** |
|
| 282 | * Validate scheme. If schemes[] was initialized to a non null, |
|
| 283 | * then only those scheme's are allowed. Note this is slightly different |
|
| 284 | * than for the constructor. |
|
| 285 | * @param scheme The scheme to validate. A <code>null</code> value is considered |
|
| 286 | * invalid. |
|
| 287 | * @return true if valid. |
|
| 288 | */ |
|
| 289 | protected boolean isValidScheme(String scheme) { |
|
| 290 | 75608 | if (scheme == null) { |
| 291 | 18900 | return false; |
| 292 | } |
|
| 293 | ||
| 294 | 56708 | Perl5Util schemeMatcher = new Perl5Util(); |
| 295 | 56708 | if (!schemeMatcher.match(SCHEME_PATTERN, scheme)) { |
| 296 | 9450 | return false; |
| 297 | } |
|
| 298 | ||
| 299 | 47258 | if (this.options.isOff(ALLOW_ALL_SCHEMES)) { |
| 300 | ||
| 301 | 4 | if (!this.allowedSchemes.contains(scheme)) { |
| 302 | 3 | return false; |
| 303 | } |
|
| 304 | } |
|
| 305 | ||
| 306 | 47255 | return true; |
| 307 | } |
|
| 308 | ||
| 309 | /** |
|
| 310 | * Returns true if the authority is properly formatted. An authority is the combination |
|
| 311 | * of hostname and port. A <code>null</code> authority value is considered invalid. |
|
| 312 | * @param authority Authority value to validate. |
|
| 313 | * @return true if authority (hostname and port) is valid. |
|
| 314 | */ |
|
| 315 | protected boolean isValidAuthority(String authority) { |
|
| 316 | 47254 | if (authority == null) { |
| 317 | 18831 | return false; |
| 318 | } |
|
| 319 | ||
| 320 | 28423 | Perl5Util authorityMatcher = new Perl5Util(); |
| 321 | 28423 | Perl5Util matchIPV4Pat = new Perl5Util(); |
| 322 | ||
| 323 | 28423 | if (!authorityMatcher.match(AUTHORITY_PATTERN, authority)) { |
| 324 | 0 | return false; |
| 325 | } |
|
| 326 | ||
| 327 | 28423 | boolean ipV4Address = false; |
| 328 | 28423 | boolean hostname = false; |
| 329 | // check if authority is IP address or hostname |
|
| 330 | 28423 | String hostIP = authorityMatcher.group(PARSE_AUTHORITY_HOST_IP); |
| 331 | 28423 | ipV4Address = matchIPV4Pat.match(IP_V4_DOMAIN_PATTERN, hostIP); |
| 332 | ||
| 333 | 28423 | if (ipV4Address) { |
| 334 | // this is an IP address so check components |
|
| 335 | 17325 | for (int i = 1; i <= 4; i++) { |
| 336 | 14175 | String ipSegment = matchIPV4Pat.group(i); |
| 337 | 14175 | if (ipSegment == null || ipSegment.length() <= 0) { |
| 338 | 0 | return false; |
| 339 | } |
|
| 340 | ||
| 341 | try { |
|
| 342 | 14175 | if (Integer.parseInt(ipSegment) > 255) { |
| 343 | 1575 | return false; |
| 344 | } |
|
| 345 | 12600 | } catch(NumberFormatException e) { |
| 346 | 0 | return false; |
| 347 | } |
|
| 348 | ||
| 349 | } |
|
| 350 | } else { |
|
| 351 | // Domain is hostname name |
|
| 352 | 23698 | Perl5Util domainMatcher = new Perl5Util(); |
| 353 | 23698 | hostname = domainMatcher.match(DOMAIN_PATTERN, hostIP); |
| 354 | } |
|
| 355 | ||
| 356 | //rightmost hostname will never start with a digit. |
|
| 357 | 26848 | if (hostname) { |
| 358 | 15787 | String[] domainSegment = new String[10]; |
| 359 | 15787 | boolean match = true; |
| 360 | 15787 | int segmentCount = 0; |
| 361 | 15787 | int segmentLength = 0; |
| 362 | 15787 | Perl5Util atomMatcher = new Perl5Util(); |
| 363 | ||
| 364 | 85206 | while (match) { |
| 365 | 53632 | match = atomMatcher.match(ATOM_PATTERN, hostIP); |
| 366 | 53632 | if (match) { |
| 367 | 37845 | domainSegment[segmentCount] = atomMatcher.group(1); |
| 368 | 37845 | segmentLength = domainSegment[segmentCount].length() + 1; |
| 369 | 37845 | hostIP = |
| 370 | (segmentLength >= hostIP.length()) |
|
| 371 | ? "" |
|
| 372 | : hostIP.substring(segmentLength); |
|
| 373 | ||
| 374 | 37845 | segmentCount++; |
| 375 | } |
|
| 376 | } |
|
| 377 | 15787 | String topLevel = domainSegment[segmentCount - 1]; |
| 378 | 15787 | if (topLevel.length() < 2 || topLevel.length() > 4) { |
| 379 | 4749 | return false; |
| 380 | } |
|
| 381 | ||
| 382 | // First letter of top level must be a alpha |
|
| 383 | 11038 | Perl5Util alphaMatcher = new Perl5Util(); |
| 384 | 11038 | if (!alphaMatcher.match(ALPHA_PATTERN, topLevel.substring(0, 1))) { |
| 385 | 1575 | return false; |
| 386 | } |
|
| 387 | ||
| 388 | // Make sure there's a host name preceding the authority. |
|
| 389 | 9463 | if (segmentCount < 2) { |
| 390 | 1584 | return false; |
| 391 | } |
|
| 392 | } |
|
| 393 | ||
| 394 | 18940 | if (!hostname && !ipV4Address) { |
| 395 | 7911 | return false; |
| 396 | } |
|
| 397 | ||
| 398 | 11029 | String port = authorityMatcher.group(PARSE_AUTHORITY_PORT); |
| 399 | 11029 | if (port != null) { |
| 400 | 9450 | Perl5Util portMatcher = new Perl5Util(); |
| 401 | 9450 | if (!portMatcher.match(PORT_PATTERN, port)) { |
| 402 | 1575 | return false; |
| 403 | } |
|
| 404 | } |
|
| 405 | ||
| 406 | 9454 | String extra = authorityMatcher.group(PARSE_AUTHORITY_EXTRA); |
| 407 | 9454 | if (!GenericValidator.isBlankOrNull(extra)) { |
| 408 | 1575 | return false; |
| 409 | } |
|
| 410 | ||
| 411 | 7879 | return true; |
| 412 | } |
|
| 413 | ||
| 414 | /** |
|
| 415 | * Returns true if the path is valid. A <code>null</code> value is considered invalid. |
|
| 416 | * @param path Path value to validate. |
|
| 417 | * @return true if path is valid. |
|
| 418 | */ |
|
| 419 | protected boolean isValidPath(String path) { |
|
| 420 | 7879 | if (path == null) { |
| 421 | 0 | return false; |
| 422 | } |
|
| 423 | ||
| 424 | 7879 | Perl5Util pathMatcher = new Perl5Util(); |
| 425 | ||
| 426 | 7879 | if (!pathMatcher.match(PATH_PATTERN, path)) { |
| 427 | 0 | return false; |
| 428 | } |
|
| 429 | ||
| 430 | 7879 | int slash2Count = countToken("//", path); |
| 431 | 7879 | if (this.options.isOff(ALLOW_2_SLASHES) && (slash2Count > 0)) { |
| 432 | 630 | return false; |
| 433 | } |
|
| 434 | ||
| 435 | 7249 | int slashCount = countToken("/", path); |
| 436 | 7249 | int dot2Count = countToken("..", path); |
| 437 | 7249 | if (dot2Count > 0) { |
| 438 | 1890 | if ((slashCount - slash2Count - 1) <= dot2Count) { |
| 439 | 1890 | return false; |
| 440 | } |
|
| 441 | } |
|
| 442 | ||
| 443 | 5359 | return true; |
| 444 | } |
|
| 445 | ||
| 446 | /** |
|
| 447 | * Returns true if the query is null or it's a properly formatted query string. |
|
| 448 | * @param query Query value to validate. |
|
| 449 | * @return true if query is valid. |
|
| 450 | */ |
|
| 451 | protected boolean isValidQuery(String query) { |
|
| 452 | 5359 | if (query == null) { |
| 453 | 2209 | return true; |
| 454 | } |
|
| 455 | ||
| 456 | 3150 | Perl5Util queryMatcher = new Perl5Util(); |
| 457 | 3150 | return queryMatcher.match(QUERY_PATTERN, query); |
| 458 | } |
|
| 459 | ||
| 460 | /** |
|
| 461 | * Returns true if the given fragment is null or fragments are allowed. |
|
| 462 | * @param fragment Fragment value to validate. |
|
| 463 | * @return true if fragment is valid. |
|
| 464 | */ |
|
| 465 | protected boolean isValidFragment(String fragment) { |
|
| 466 | 5359 | if (fragment == null) { |
| 467 | 4729 | return true; |
| 468 | } |
|
| 469 | ||
| 470 | 630 | return this.options.isOff(NO_FRAGMENTS); |
| 471 | } |
|
| 472 | ||
| 473 | /** |
|
| 474 | * Returns the number of times the token appears in the target. |
|
| 475 | * @param token Token value to be counted. |
|
| 476 | * @param target Target value to count tokens in. |
|
| 477 | * @return the number of tokens. |
|
| 478 | */ |
|
| 479 | protected int countToken(String token, String target) { |
|
| 480 | 22377 | int tokenIndex = 0; |
| 481 | 22377 | int count = 0; |
| 482 | 80993 | while (tokenIndex != -1) { |
| 483 | 36239 | tokenIndex = target.indexOf(token, tokenIndex); |
| 484 | 36239 | if (tokenIndex > -1) { |
| 485 | 13862 | tokenIndex++; |
| 486 | 13862 | count++; |
| 487 | } |
|
| 488 | } |
|
| 489 | 22377 | return count; |
| 490 | } |
|
| 491 | } |
| This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |