1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.io;
18
19 import java.io.File;
20 import java.util.ArrayList;
21 import java.util.Collection;
22 import java.util.Iterator;
23 import java.util.Stack;
24
25 /**
26 * General filename and filepath manipulation utilities.
27 * <p>
28 * When dealing with filenames you can hit problems when moving from a Windows
29 * based development machine to a Unix based production machine.
30 * This class aims to help avoid those problems.
31 * <p>
32 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
33 * using JDK {@link java.io.File File} objects and the two argument constructor
34 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}.
35 * <p>
36 * Most methods on this class are designed to work the same on both Unix and Windows.
37 * Those that don't include 'System', 'Unix' or 'Windows' in their name.
38 * <p>
39 * Most methods recognise both separators (forward and back), and both
40 * sets of prefixes. See the javadoc of each method for details.
41 * <p>
42 * This class defines six components within a filename
43 * (example C:\dev\project\file.txt):
44 * <ul>
45 * <li>the prefix - C:\</li>
46 * <li>the path - dev\project\</li>
47 * <li>the full path - C:\dev\project\</li>
48 * <li>the name - file.txt</li>
49 * <li>the base name - file</li>
50 * <li>the extension - txt</li>
51 * </ul>
52 * Note that this class works best if directory filenames end with a separator.
53 * If you omit the last separator, it is impossible to determine if the filename
54 * corresponds to a file or a directory. As a result, we have chosen to say
55 * it corresponds to a file.
56 * <p>
57 * This class only supports Unix and Windows style names.
58 * Prefixes are matched as follows:
59 * <pre>
60 * Windows:
61 * a\b\c.txt --> "" --> relative
62 * \a\b\c.txt --> "\" --> current drive absolute
63 * C:a\b\c.txt --> "C:" --> drive relative
64 * C:\a\b\c.txt --> "C:\" --> absolute
65 * \\server\a\b\c.txt --> "\\server\" --> UNC
66 *
67 * Unix:
68 * a/b/c.txt --> "" --> relative
69 * /a/b/c.txt --> "/" --> absolute
70 * ~/a/b/c.txt --> "~/" --> current user
71 * ~ --> "~/" --> current user (slash added)
72 * ~user/a/b/c.txt --> "~user/" --> named user
73 * ~user --> "~user/" --> named user (slash added)
74 * </pre>
75 * Both prefix styles are matched always, irrespective of the machine that you are
76 * currently running on.
77 * <p>
78 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils.
79 *
80 * @author <a href="mailto:burton@relativity.yi.org">Kevin A. Burton</A>
81 * @author <a href="mailto:sanders@apache.org">Scott Sanders</a>
82 * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a>
83 * @author <a href="mailto:Christoph.Reck@dlr.de">Christoph.Reck</a>
84 * @author <a href="mailto:peter@apache.org">Peter Donald</a>
85 * @author <a href="mailto:jefft@apache.org">Jeff Turner</a>
86 * @author Matthew Hawthorne
87 * @author Martin Cooper
88 * @author <a href="mailto:jeremias@apache.org">Jeremias Maerki</a>
89 * @author Stephen Colebourne
90 * @version $Id: FilenameUtils.java 490424 2006-12-27 01:20:43Z bayard $
91 * @since Commons IO 1.1
92 */
93 public class FilenameUtils {
94
95 /**
96 * The extension separator character.
97 */
98 private static final char EXTENSION_SEPARATOR = '.';
99
100 /**
101 * The Unix separator character.
102 */
103 private static final char UNIX_SEPARATOR = '/';
104
105 /**
106 * The Windows separator character.
107 */
108 private static final char WINDOWS_SEPARATOR = '\\';
109
110 /**
111 * The system separator character.
112 */
113 private static final char SYSTEM_SEPARATOR = File.separatorChar;
114
115 /**
116 * The separator character that is the opposite of the system separator.
117 */
118 private static final char OTHER_SEPARATOR;
119 static {
120 if (isSystemWindows()) {
121 OTHER_SEPARATOR = UNIX_SEPARATOR;
122 } else {
123 OTHER_SEPARATOR = WINDOWS_SEPARATOR;
124 }
125 }
126
127 /**
128 * Instances should NOT be constructed in standard programming.
129 */
130 public FilenameUtils() {
131 super();
132 }
133
134 //-----------------------------------------------------------------------
135 /**
136 * Determines if Windows file system is in use.
137 *
138 * @return true if the system is Windows
139 */
140 static boolean isSystemWindows() {
141 return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR;
142 }
143
144 //-----------------------------------------------------------------------
145 /**
146 * Checks if the character is a separator.
147 *
148 * @param ch the character to check
149 * @return true if it is a separator character
150 */
151 private static boolean isSeparator(char ch) {
152 return (ch == UNIX_SEPARATOR) || (ch == WINDOWS_SEPARATOR);
153 }
154
155 //-----------------------------------------------------------------------
156 /**
157 * Normalizes a path, removing double and single dot path steps.
158 * <p>
159 * This method normalizes a path to a standard format.
160 * The input may contain separators in either Unix or Windows format.
161 * The output will contain separators in the format of the system.
162 * <p>
163 * A trailing slash will be retained.
164 * A double slash will be merged to a single slash (but UNC names are handled).
165 * A single dot path segment will be removed.
166 * A double dot will cause that path segment and the one before to be removed.
167 * If the double dot has no parent path segment to work with, <code>null</code>
168 * is returned.
169 * <p>
170 * The output will be the same on both Unix and Windows except
171 * for the separator character.
172 * <pre>
173 * /foo// --> /foo/
174 * /foo/./ --> /foo/
175 * /foo/../bar --> /bar
176 * /foo/../bar/ --> /bar/
177 * /foo/../bar/../baz --> /baz
178 * //foo//./bar --> /foo/bar
179 * /../ --> null
180 * ../foo --> null
181 * foo/bar/.. --> foo/
182 * foo/../../bar --> null
183 * foo/../bar --> bar
184 * //server/foo/../bar --> //server/bar
185 * //server/../bar --> null
186 * C:\foo\..\bar --> C:\bar
187 * C:\..\bar --> null
188 * ~/foo/../bar/ --> ~/bar/
189 * ~/../bar --> null
190 * </pre>
191 * (Note the file separator returned will be correct for Windows/Unix)
192 *
193 * @param filename the filename to normalize, null returns null
194 * @return the normalized filename, or null if invalid
195 */
196 public static String normalize(String filename) {
197 return doNormalize(filename, true);
198 }
199
200 //-----------------------------------------------------------------------
201 /**
202 * Normalizes a path, removing double and single dot path steps,
203 * and removing any final directory separator.
204 * <p>
205 * This method normalizes a path to a standard format.
206 * The input may contain separators in either Unix or Windows format.
207 * The output will contain separators in the format of the system.
208 * <p>
209 * A trailing slash will be removed.
210 * A double slash will be merged to a single slash (but UNC names are handled).
211 * A single dot path segment will be removed.
212 * A double dot will cause that path segment and the one before to be removed.
213 * If the double dot has no parent path segment to work with, <code>null</code>
214 * is returned.
215 * <p>
216 * The output will be the same on both Unix and Windows except
217 * for the separator character.
218 * <pre>
219 * /foo// --> /foo
220 * /foo/./ --> /foo
221 * /foo/../bar --> /bar
222 * /foo/../bar/ --> /bar
223 * /foo/../bar/../baz --> /baz
224 * //foo//./bar --> /foo/bar
225 * /../ --> null
226 * ../foo --> null
227 * foo/bar/.. --> foo
228 * foo/../../bar --> null
229 * foo/../bar --> bar
230 * //server/foo/../bar --> //server/bar
231 * //server/../bar --> null
232 * C:\foo\..\bar --> C:\bar
233 * C:\..\bar --> null
234 * ~/foo/../bar/ --> ~/bar
235 * ~/../bar --> null
236 * </pre>
237 * (Note the file separator returned will be correct for Windows/Unix)
238 *
239 * @param filename the filename to normalize, null returns null
240 * @return the normalized filename, or null if invalid
241 */
242 public static String normalizeNoEndSeparator(String filename) {
243 return doNormalize(filename, false);
244 }
245
246 /**
247 * Internal method to perform the normalization.
248 *
249 * @param filename the filename
250 * @param keepSeparator true to keep the final separator
251 * @return the normalized filename
252 */
253 private static String doNormalize(String filename, boolean keepSeparator) {
254 if (filename == null) {
255 return null;
256 }
257 int size = filename.length();
258 if (size == 0) {
259 return filename;
260 }
261 int prefix = getPrefixLength(filename);
262 if (prefix < 0) {
263 return null;
264 }
265
266 char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy
267 filename.getChars(0, filename.length(), array, 0);
268
269 // fix separators throughout
270 for (int i = 0; i < array.length; i++) {
271 if (array[i] == OTHER_SEPARATOR) {
272 array[i] = SYSTEM_SEPARATOR;
273 }
274 }
275
276 // add extra separator on the end to simplify code below
277 boolean lastIsDirectory = true;
278 if (array[size - 1] != SYSTEM_SEPARATOR) {
279 array[size++] = SYSTEM_SEPARATOR;
280 lastIsDirectory = false;
281 }
282
283 // adjoining slashes
284 for (int i = prefix + 1; i < size; i++) {
285 if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == SYSTEM_SEPARATOR) {
286 System.arraycopy(array, i, array, i - 1, size - i);
287 size--;
288 i--;
289 }
290 }
291
292 // dot slash
293 for (int i = prefix + 1; i < size; i++) {
294 if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == '.' &&
295 (i == prefix + 1 || array[i - 2] == SYSTEM_SEPARATOR)) {
296 if (i == size - 1) {
297 lastIsDirectory = true;
298 }
299 System.arraycopy(array, i + 1, array, i - 1, size - i);
300 size -=2;
301 i--;
302 }
303 }
304
305 // double dot slash
306 outer:
307 for (int i = prefix + 2; i < size; i++) {
308 if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == '.' && array[i - 2] == '.' &&
309 (i == prefix + 2 || array[i - 3] == SYSTEM_SEPARATOR)) {
310 if (i == prefix + 2) {
311 return null;
312 }
313 if (i == size - 1) {
314 lastIsDirectory = true;
315 }
316 int j;
317 for (j = i - 4 ; j >= prefix; j--) {
318 if (array[j] == SYSTEM_SEPARATOR) {
319 // remove b/../ from a/b/../c
320 System.arraycopy(array, i + 1, array, j + 1, size - i);
321 size -= (i - j);
322 i = j + 1;
323 continue outer;
324 }
325 }
326 // remove a/../ from a/../c
327 System.arraycopy(array, i + 1, array, prefix, size - i);
328 size -= (i + 1 - prefix);
329 i = prefix + 1;
330 }
331 }
332
333 if (size <= 0) { // should never be less than 0
334 return "";
335 }
336 if (size <= prefix) { // should never be less than prefix
337 return new String(array, 0, size);
338 }
339 if (lastIsDirectory && keepSeparator) {
340 return new String(array, 0, size); // keep trailing separator
341 }
342 return new String(array, 0, size - 1); // lose trailing separator
343 }
344
345 //-----------------------------------------------------------------------
346 /**
347 * Concatenates a filename to a base path using normal command line style rules.
348 * <p>
349 * The effect is equivalent to resultant directory after changing
350 * directory to the first argument, followed by changing directory to
351 * the second argument.
352 * <p>
353 * The first argument is the base path, the second is the path to concatenate.
354 * The returned path is always normalized via {@link #normalize(String)},
355 * thus <code>..</code> is handled.
356 * <p>
357 * If <code>pathToAdd</code> is absolute (has an absolute prefix), then
358 * it will be normalized and returned.
359 * Otherwise, the paths will be joined, normalized and returned.
360 * <p>
361 * The output will be the same on both Unix and Windows except
362 * for the separator character.
363 * <pre>
364 * /foo/ + bar --> /foo/bar
365 * /foo + bar --> /foo/bar
366 * /foo + /bar --> /bar
367 * /foo + C:/bar --> C:/bar
368 * /foo + C:bar --> C:bar (*)
369 * /foo/a/ + ../bar --> foo/bar
370 * /foo/ + ../../bar --> null
371 * /foo/ + /bar --> /bar
372 * /foo/.. + /bar --> /bar
373 * /foo + bar/c.txt --> /foo/bar/c.txt
374 * /foo/c.txt + bar --> /foo/c.txt/bar (!)
375 * </pre>
376 * (*) Note that the Windows relative drive prefix is unreliable when
377 * used with this method.
378 * (!) Note that the first parameter must be a path. If it ends with a name, then
379 * the name will be built into the concatenated path. If this might be a problem,
380 * use {@link #getFullPath(String)} on the base path argument.
381 *
382 * @param basePath the base path to attach to, always treated as a path
383 * @param fullFilenameToAdd the filename (or path) to attach to the base
384 * @return the concatenated path, or null if invalid
385 */
386 public static String concat(String basePath, String fullFilenameToAdd) {
387 int prefix = getPrefixLength(fullFilenameToAdd);
388 if (prefix < 0) {
389 return null;
390 }
391 if (prefix > 0) {
392 return normalize(fullFilenameToAdd);
393 }
394 if (basePath == null) {
395 return null;
396 }
397 int len = basePath.length();
398 if (len == 0) {
399 return normalize(fullFilenameToAdd);
400 }
401 char ch = basePath.charAt(len - 1);
402 if (isSeparator(ch)) {
403 return normalize(basePath + fullFilenameToAdd);
404 } else {
405 return normalize(basePath + '/' + fullFilenameToAdd);
406 }
407 }
408
409 //-----------------------------------------------------------------------
410 /**
411 * Converts all separators to the Unix separator of forward slash.
412 *
413 * @param path the path to be changed, null ignored
414 * @return the updated path
415 */
416 public static String separatorsToUnix(String path) {
417 if (path == null || path.indexOf(WINDOWS_SEPARATOR) == -1) {
418 return path;
419 }
420 return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR);
421 }
422
423 /**
424 * Converts all separators to the Windows separator of backslash.
425 *
426 * @param path the path to be changed, null ignored
427 * @return the updated path
428 */
429 public static String separatorsToWindows(String path) {
430 if (path == null || path.indexOf(UNIX_SEPARATOR) == -1) {
431 return path;
432 }
433 return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR);
434 }
435
436 /**
437 * Converts all separators to the system separator.
438 *
439 * @param path the path to be changed, null ignored
440 * @return the updated path
441 */
442 public static String separatorsToSystem(String path) {
443 if (path == null) {
444 return null;
445 }
446 if (isSystemWindows()) {
447 return separatorsToWindows(path);
448 } else {
449 return separatorsToUnix(path);
450 }
451 }
452
453 //-----------------------------------------------------------------------
454 /**
455 * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>.
456 * <p>
457 * This method will handle a file in either Unix or Windows format.
458 * <p>
459 * The prefix length includes the first slash in the full filename
460 * if applicable. Thus, it is possible that the length returned is greater
461 * than the length of the input string.
462 * <pre>
463 * Windows:
464 * a\b\c.txt --> "" --> relative
465 * \a\b\c.txt --> "\" --> current drive absolute
466 * C:a\b\c.txt --> "C:" --> drive relative
467 * C:\a\b\c.txt --> "C:\" --> absolute
468 * \\server\a\b\c.txt --> "\\server\" --> UNC
469 *
470 * Unix:
471 * a/b/c.txt --> "" --> relative
472 * /a/b/c.txt --> "/" --> absolute
473 * ~/a/b/c.txt --> "~/" --> current user
474 * ~ --> "~/" --> current user (slash added)
475 * ~user/a/b/c.txt --> "~user/" --> named user
476 * ~user --> "~user/" --> named user (slash added)
477 * </pre>
478 * <p>
479 * The output will be the same irrespective of the machine that the code is running on.
480 * ie. both Unix and Windows prefixes are matched regardless.
481 *
482 * @param filename the filename to find the prefix in, null returns -1
483 * @return the length of the prefix, -1 if invalid or null
484 */
485 public static int getPrefixLength(String filename) {
486 if (filename == null) {
487 return -1;
488 }
489 int len = filename.length();
490 if (len == 0) {
491 return 0;
492 }
493 char ch0 = filename.charAt(0);
494 if (ch0 == ':') {
495 return -1;
496 }
497 if (len == 1) {
498 if (ch0 == '~') {
499 return 2; // return a length greater than the input
500 }
501 return (isSeparator(ch0) ? 1 : 0);
502 } else {
503 if (ch0 == '~') {
504 int posUnix = filename.indexOf(UNIX_SEPARATOR, 1);
505 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1);
506 if (posUnix == -1 && posWin == -1) {
507 return len + 1; // return a length greater than the input
508 }
509 posUnix = (posUnix == -1 ? posWin : posUnix);
510 posWin = (posWin == -1 ? posUnix : posWin);
511 return Math.min(posUnix, posWin) + 1;
512 }
513 char ch1 = filename.charAt(1);
514 if (ch1 == ':') {
515 ch0 = Character.toUpperCase(ch0);
516 if (ch0 >= 'A' && ch0 <= 'Z') {
517 if (len == 2 || isSeparator(filename.charAt(2)) == false) {
518 return 2;
519 }
520 return 3;
521 }
522 return -1;
523
524 } else if (isSeparator(ch0) && isSeparator(ch1)) {
525 int posUnix = filename.indexOf(UNIX_SEPARATOR, 2);
526 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2);
527 if ((posUnix == -1 && posWin == -1) || posUnix == 2 || posWin == 2) {
528 return -1;
529 }
530 posUnix = (posUnix == -1 ? posWin : posUnix);
531 posWin = (posWin == -1 ? posUnix : posWin);
532 return Math.min(posUnix, posWin) + 1;
533 } else {
534 return (isSeparator(ch0) ? 1 : 0);
535 }
536 }
537 }
538
539 /**
540 * Returns the index of the last directory separator character.
541 * <p>
542 * This method will handle a file in either Unix or Windows format.
543 * The position of the last forward or backslash is returned.
544 * <p>
545 * The output will be the same irrespective of the machine that the code is running on.
546 *
547 * @param filename the filename to find the last path separator in, null returns -1
548 * @return the index of the last separator character, or -1 if there
549 * is no such character
550 */
551 public static int indexOfLastSeparator(String filename) {
552 if (filename == null) {
553 return -1;
554 }
555 int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR);
556 int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR);
557 return Math.max(lastUnixPos, lastWindowsPos);
558 }
559
560 /**
561 * Returns the index of the last extension separator character, which is a dot.
562 * <p>
563 * This method also checks that there is no directory separator after the last dot.
564 * To do this it uses {@link #indexOfLastSeparator(String)} which will
565 * handle a file in either Unix or Windows format.
566 * <p>
567 * The output will be the same irrespective of the machine that the code is running on.
568 *
569 * @param filename the filename to find the last path separator in, null returns -1
570 * @return the index of the last separator character, or -1 if there
571 * is no such character
572 */
573 public static int indexOfExtension(String filename) {
574 if (filename == null) {
575 return -1;
576 }
577 int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR);
578 int lastSeparator = indexOfLastSeparator(filename);
579 return (lastSeparator > extensionPos ? -1 : extensionPos);
580 }
581
582 //-----------------------------------------------------------------------
583 /**
584 * Gets the prefix from a full filename, such as <code>C:/</code>
585 * or <code>~/</code>.
586 * <p>
587 * This method will handle a file in either Unix or Windows format.
588 * The prefix includes the first slash in the full filename where applicable.
589 * <pre>
590 * Windows:
591 * a\b\c.txt --> "" --> relative
592 * \a\b\c.txt --> "\" --> current drive absolute
593 * C:a\b\c.txt --> "C:" --> drive relative
594 * C:\a\b\c.txt --> "C:\" --> absolute
595 * \\server\a\b\c.txt --> "\\server\" --> UNC
596 *
597 * Unix:
598 * a/b/c.txt --> "" --> relative
599 * /a/b/c.txt --> "/" --> absolute
600 * ~/a/b/c.txt --> "~/" --> current user
601 * ~ --> "~/" --> current user (slash added)
602 * ~user/a/b/c.txt --> "~user/" --> named user
603 * ~user --> "~user/" --> named user (slash added)
604 * </pre>
605 * <p>
606 * The output will be the same irrespective of the machine that the code is running on.
607 * ie. both Unix and Windows prefixes are matched regardless.
608 *
609 * @param filename the filename to query, null returns null
610 * @return the prefix of the file, null if invalid
611 */
612 public static String getPrefix(String filename) {
613 if (filename == null) {
614 return null;
615 }
616 int len = getPrefixLength(filename);
617 if (len < 0) {
618 return null;
619 }
620 if (len > filename.length()) {
621 return filename + UNIX_SEPARATOR; // we know this only happens for unix
622 }
623 return filename.substring(0, len);
624 }
625
626 /**
627 * Gets the path from a full filename, which excludes the prefix.
628 * <p>
629 * This method will handle a file in either Unix or Windows format.
630 * The method is entirely text based, and returns the text before and
631 * including the last forward or backslash.
632 * <pre>
633 * C:\a\b\c.txt --> a\b\
634 * ~/a/b/c.txt --> a/b/
635 * a.txt --> ""
636 * a/b/c --> a/b/
637 * a/b/c/ --> a/b/c/
638 * </pre>
639 * <p>
640 * The output will be the same irrespective of the machine that the code is running on.
641 * <p>
642 * This method drops the prefix from the result.
643 * See {@link #getFullPath(String)} for the method that retains the prefix.
644 *
645 * @param filename the filename to query, null returns null
646 * @return the path of the file, an empty string if none exists, null if invalid
647 */
648 public static String getPath(String filename) {
649 return doGetPath(filename, 1);
650 }
651
652 /**
653 * Gets the path from a full filename, which excludes the prefix, and
654 * also excluding the final directory separator.
655 * <p>
656 * This method will handle a file in either Unix or Windows format.
657 * The method is entirely text based, and returns the text before the
658 * last forward or backslash.
659 * <pre>
660 * C:\a\b\c.txt --> a\b
661 * ~/a/b/c.txt --> a/b
662 * a.txt --> ""
663 * a/b/c --> a/b
664 * a/b/c/ --> a/b/c
665 * </pre>
666 * <p>
667 * The output will be the same irrespective of the machine that the code is running on.
668 * <p>
669 * This method drops the prefix from the result.
670 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
671 *
672 * @param filename the filename to query, null returns null
673 * @return the path of the file, an empty string if none exists, null if invalid
674 */
675 public static String getPathNoEndSeparator(String filename) {
676 return doGetPath(filename, 0);
677 }
678
679 /**
680 * Does the work of getting the path.
681 *
682 * @param filename the filename
683 * @param separatorAdd 0 to omit the end separator, 1 to return it
684 * @return the path
685 */
686 private static String doGetPath(String filename, int separatorAdd) {
687 if (filename == null) {
688 return null;
689 }
690 int prefix = getPrefixLength(filename);
691 if (prefix < 0) {
692 return null;
693 }
694 int index = indexOfLastSeparator(filename);
695 if (prefix >= filename.length() || index < 0) {
696 return "";
697 }
698 return filename.substring(prefix, index + separatorAdd);
699 }
700
701 /**
702 * Gets the full path from a full filename, which is the prefix + path.
703 * <p>
704 * This method will handle a file in either Unix or Windows format.
705 * The method is entirely text based, and returns the text before and
706 * including the last forward or backslash.
707 * <pre>
708 * C:\a\b\c.txt --> C:\a\b\
709 * ~/a/b/c.txt --> ~/a/b/
710 * a.txt --> ""
711 * a/b/c --> a/b/
712 * a/b/c/ --> a/b/c/
713 * C: --> C:
714 * C:\ --> C:\
715 * ~ --> ~/
716 * ~/ --> ~/
717 * ~user --> ~user/
718 * ~user/ --> ~user/
719 * </pre>
720 * <p>
721 * The output will be the same irrespective of the machine that the code is running on.
722 *
723 * @param filename the filename to query, null returns null
724 * @return the path of the file, an empty string if none exists, null if invalid
725 */
726 public static String getFullPath(String filename) {
727 return doGetFullPath(filename, true);
728 }
729
730 /**
731 * Gets the full path from a full filename, which is the prefix + path,
732 * and also excluding the final directory separator.
733 * <p>
734 * This method will handle a file in either Unix or Windows format.
735 * The method is entirely text based, and returns the text before the
736 * last forward or backslash.
737 * <pre>
738 * C:\a\b\c.txt --> C:\a\b
739 * ~/a/b/c.txt --> ~/a/b
740 * a.txt --> ""
741 * a/b/c --> a/b
742 * a/b/c/ --> a/b/c
743 * C: --> C:
744 * C:\ --> C:\
745 * ~ --> ~
746 * ~/ --> ~
747 * ~user --> ~user
748 * ~user/ --> ~user
749 * </pre>
750 * <p>
751 * The output will be the same irrespective of the machine that the code is running on.
752 *
753 * @param filename the filename to query, null returns null
754 * @return the path of the file, an empty string if none exists, null if invalid
755 */
756 public static String getFullPathNoEndSeparator(String filename) {
757 return doGetFullPath(filename, false);
758 }
759
760 /**
761 * Does the work of getting the path.
762 *
763 * @param filename the filename
764 * @param includeSeparator true to include the end separator
765 * @return the path
766 */
767 private static String doGetFullPath(String filename, boolean includeSeparator) {
768 if (filename == null) {
769 return null;
770 }
771 int prefix = getPrefixLength(filename);
772 if (prefix < 0) {
773 return null;
774 }
775 if (prefix >= filename.length()) {
776 if (includeSeparator) {
777 return getPrefix(filename); // add end slash if necessary
778 } else {
779 return filename;
780 }
781 }
782 int index = indexOfLastSeparator(filename);
783 if (index < 0) {
784 return filename.substring(0, prefix);
785 }
786 int end = index + (includeSeparator ? 1 : 0);
787 return filename.substring(0, end);
788 }
789
790 /**
791 * Gets the name minus the path from a full filename.
792 * <p>
793 * This method will handle a file in either Unix or Windows format.
794 * The text after the last forward or backslash is returned.
795 * <pre>
796 * a/b/c.txt --> c.txt
797 * a.txt --> a.txt
798 * a/b/c --> c
799 * a/b/c/ --> ""
800 * </pre>
801 * <p>
802 * The output will be the same irrespective of the machine that the code is running on.
803 *
804 * @param filename the filename to query, null returns null
805 * @return the name of the file without the path, or an empty string if none exists
806 */
807 public static String getName(String filename) {
808 if (filename == null) {
809 return null;
810 }
811 int index = indexOfLastSeparator(filename);
812 return filename.substring(index + 1);
813 }
814
815 /**
816 * Gets the base name, minus the full path and extension, from a full filename.
817 * <p>
818 * This method will handle a file in either Unix or Windows format.
819 * The text after the last forward or backslash and before the last dot is returned.
820 * <pre>
821 * a/b/c.txt --> c
822 * a.txt --> a
823 * a/b/c --> c
824 * a/b/c/ --> ""
825 * </pre>
826 * <p>
827 * The output will be the same irrespective of the machine that the code is running on.
828 *
829 * @param filename the filename to query, null returns null
830 * @return the name of the file without the path, or an empty string if none exists
831 */
832 public static String getBaseName(String filename) {
833 return removeExtension(getName(filename));
834 }
835
836 /**
837 * Gets the extension of a filename.
838 * <p>
839 * This method returns the textual part of the filename after the last dot.
840 * There must be no directory separator after the dot.
841 * <pre>
842 * foo.txt --> "txt"
843 * a/b/c.jpg --> "jpg"
844 * a/b.txt/c --> ""
845 * a/b/c --> ""
846 * </pre>
847 * <p>
848 * The output will be the same irrespective of the machine that the code is running on.
849 *
850 * @param filename the filename to retrieve the extension of.
851 * @return the extension of the file or an empty string if none exists.
852 */
853 public static String getExtension(String filename) {
854 if (filename == null) {
855 return null;
856 }
857 int index = indexOfExtension(filename);
858 if (index == -1) {
859 return "";
860 } else {
861 return filename.substring(index + 1);
862 }
863 }
864
865 //-----------------------------------------------------------------------
866 /**
867 * Removes the extension from a filename.
868 * <p>
869 * This method returns the textual part of the filename before the last dot.
870 * There must be no directory separator after the dot.
871 * <pre>
872 * foo.txt --> foo
873 * a\b\c.jpg --> a\b\c
874 * a\b\c --> a\b\c
875 * a.b\c --> a.b\c
876 * </pre>
877 * <p>
878 * The output will be the same irrespective of the machine that the code is running on.
879 *
880 * @param filename the filename to query, null returns null
881 * @return the filename minus the extension
882 */
883 public static String removeExtension(String filename) {
884 if (filename == null) {
885 return null;
886 }
887 int index = indexOfExtension(filename);
888 if (index == -1) {
889 return filename;
890 } else {
891 return filename.substring(0, index);
892 }
893 }
894
895 //-----------------------------------------------------------------------
896 /**
897 * Checks whether two filenames are equal exactly.
898 * <p>
899 * No processing is performed on the filenames other than comparison,
900 * thus this is merely a null-safe case-sensitive equals.
901 *
902 * @param filename1 the first filename to query, may be null
903 * @param filename2 the second filename to query, may be null
904 * @return true if the filenames are equal, null equals null
905 * @see IOCase#SENSITIVE
906 */
907 public static boolean equals(String filename1, String filename2) {
908 return equals(filename1, filename2, false, IOCase.SENSITIVE);
909 }
910
911 /**
912 * Checks whether two filenames are equal using the case rules of the system.
913 * <p>
914 * No processing is performed on the filenames other than comparison.
915 * The check is case-sensitive on Unix and case-insensitive on Windows.
916 *
917 * @param filename1 the first filename to query, may be null
918 * @param filename2 the second filename to query, may be null
919 * @return true if the filenames are equal, null equals null
920 * @see IOCase#SYSTEM
921 */
922 public static boolean equalsOnSystem(String filename1, String filename2) {
923 return equals(filename1, filename2, false, IOCase.SYSTEM);
924 }
925
926 //-----------------------------------------------------------------------
927 /**
928 * Checks whether two filenames are equal after both have been normalized.
929 * <p>
930 * Both filenames are first passed to {@link #normalize(String)}.
931 * The check is then performed in a case-sensitive manner.
932 *
933 * @param filename1 the first filename to query, may be null
934 * @param filename2 the second filename to query, may be null
935 * @return true if the filenames are equal, null equals null
936 * @see IOCase#SENSITIVE
937 */
938 public static boolean equalsNormalized(String filename1, String filename2) {
939 return equals(filename1, filename2, true, IOCase.SENSITIVE);
940 }
941
942 /**
943 * Checks whether two filenames are equal after both have been normalized
944 * and using the case rules of the system.
945 * <p>
946 * Both filenames are first passed to {@link #normalize(String)}.
947 * The check is then performed case-sensitive on Unix and
948 * case-insensitive on Windows.
949 *
950 * @param filename1 the first filename to query, may be null
951 * @param filename2 the second filename to query, may be null
952 * @return true if the filenames are equal, null equals null
953 * @see IOCase#SYSTEM
954 */
955 public static boolean equalsNormalizedOnSystem(String filename1, String filename2) {
956 return equals(filename1, filename2, true, IOCase.SYSTEM);
957 }
958
959 /**
960 * Checks whether two filenames are equal, optionally normalizing and providing
961 * control over the case-sensitivity.
962 *
963 * @param filename1 the first filename to query, may be null
964 * @param filename2 the second filename to query, may be null
965 * @param normalized whether to normalize the filenames
966 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive
967 * @return true if the filenames are equal, null equals null
968 * @since Commons IO 1.3
969 */
970 public static boolean equals(
971 String filename1, String filename2,
972 boolean normalized, IOCase caseSensitivity) {
973
974 if (filename1 == null || filename2 == null) {
975 return filename1 == filename2;
976 }
977 if (normalized) {
978 filename1 = normalize(filename1);
979 filename2 = normalize(filename2);
980 }
981 if (caseSensitivity == null) {
982 caseSensitivity = IOCase.SENSITIVE;
983 }
984 return caseSensitivity.checkEquals(filename1, filename2);
985 }
986
987 //-----------------------------------------------------------------------
988 /**
989 * Checks whether the extension of the filename is that specified.
990 * <p>
991 * This method obtains the extension as the textual part of the filename
992 * after the last dot. There must be no directory separator after the dot.
993 * The extension check is case-sensitive on all platforms.
994 *
995 * @param filename the filename to query, null returns false
996 * @param extension the extension to check for, null or empty checks for no extension
997 * @return true if the filename has the specified extension
998 */
999 public static boolean isExtension(String filename, String extension) {
1000 if (filename == null) {
1001 return false;
1002 }
1003 if (extension == null || extension.length() == 0) {
1004 return (indexOfExtension(filename) == -1);
1005 }
1006 String fileExt = getExtension(filename);
1007 return fileExt.equals(extension);
1008 }
1009
1010 /**
1011 * Checks whether the extension of the filename is one of those specified.
1012 * <p>
1013 * This method obtains the extension as the textual part of the filename
1014 * after the last dot. There must be no directory separator after the dot.
1015 * The extension check is case-sensitive on all platforms.
1016 *
1017 * @param filename the filename to query, null returns false
1018 * @param extensions the extensions to check for, null checks for no extension
1019 * @return true if the filename is one of the extensions
1020 */
1021 public static boolean isExtension(String filename, String[] extensions) {
1022 if (filename == null) {
1023 return false;
1024 }
1025 if (extensions == null || extensions.length == 0) {
1026 return (indexOfExtension(filename) == -1);
1027 }
1028 String fileExt = getExtension(filename);
1029 for (int i = 0; i < extensions.length; i++) {
1030 if (fileExt.equals(extensions[i])) {
1031 return true;
1032 }
1033 }
1034 return false;
1035 }
1036
1037 /**
1038 * Checks whether the extension of the filename is one of those specified.
1039 * <p>
1040 * This method obtains the extension as the textual part of the filename
1041 * after the last dot. There must be no directory separator after the dot.
1042 * The extension check is case-sensitive on all platforms.
1043 *
1044 * @param filename the filename to query, null returns false
1045 * @param extensions the extensions to check for, null checks for no extension
1046 * @return true if the filename is one of the extensions
1047 */
1048 public static boolean isExtension(String filename, Collection extensions) {
1049 if (filename == null) {
1050 return false;
1051 }
1052 if (extensions == null || extensions.isEmpty()) {
1053 return (indexOfExtension(filename) == -1);
1054 }
1055 String fileExt = getExtension(filename);
1056 for (Iterator it = extensions.iterator(); it.hasNext();) {
1057 if (fileExt.equals(it.next())) {
1058 return true;
1059 }
1060 }
1061 return false;
1062 }
1063
1064 //-----------------------------------------------------------------------
1065 /**
1066 * Checks a filename to see if it matches the specified wildcard matcher,
1067 * always testing case-sensitive.
1068 * <p>
1069 * The wildcard matcher uses the characters '?' and '*' to represent a
1070 * single or multiple wildcard characters.
1071 * This is the same as often found on Dos/Unix command lines.
1072 * The check is case-sensitive always.
1073 * <pre>
1074 * wildcardMatch("c.txt", "*.txt") --> true
1075 * wildcardMatch("c.txt", "*.jpg") --> false
1076 * wildcardMatch("a/b/c.txt", "a/b/*") --> true
1077 * wildcardMatch("c.txt", "*.???") --> true
1078 * wildcardMatch("c.txt", "*.????") --> false
1079 * </pre>
1080 *
1081 * @param filename the filename to match on
1082 * @param wildcardMatcher the wildcard string to match against
1083 * @return true if the filename matches the wilcard string
1084 * @see IOCase#SENSITIVE
1085 */
1086 public static boolean wildcardMatch(String filename, String wildcardMatcher) {
1087 return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE);
1088 }
1089
1090 /**
1091 * Checks a filename to see if it matches the specified wildcard matcher
1092 * using the case rules of the system.
1093 * <p>
1094 * The wildcard matcher uses the characters '?' and '*' to represent a
1095 * single or multiple wildcard characters.
1096 * This is the same as often found on Dos/Unix command lines.
1097 * The check is case-sensitive on Unix and case-insensitive on Windows.
1098 * <pre>
1099 * wildcardMatch("c.txt", "*.txt") --> true
1100 * wildcardMatch("c.txt", "*.jpg") --> false
1101 * wildcardMatch("a/b/c.txt", "a/b/*") --> true
1102 * wildcardMatch("c.txt", "*.???") --> true
1103 * wildcardMatch("c.txt", "*.????") --> false
1104 * </pre>
1105 *
1106 * @param filename the filename to match on
1107 * @param wildcardMatcher the wildcard string to match against
1108 * @return true if the filename matches the wilcard string
1109 * @see IOCase#SYSTEM
1110 */
1111 public static boolean wildcardMatchOnSystem(String filename, String wildcardMatcher) {
1112 return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM);
1113 }
1114
1115 /**
1116 * Checks a filename to see if it matches the specified wildcard matcher
1117 * allowing control over case-sensitivity.
1118 * <p>
1119 * The wildcard matcher uses the characters '?' and '*' to represent a
1120 * single or multiple wildcard characters.
1121 *
1122 * @param filename the filename to match on
1123 * @param wildcardMatcher the wildcard string to match against
1124 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive
1125 * @return true if the filename matches the wilcard string
1126 * @since Commons IO 1.3
1127 */
1128 public static boolean wildcardMatch(String filename, String wildcardMatcher, IOCase caseSensitivity) {
1129 if (filename == null && wildcardMatcher == null) {
1130 return true;
1131 }
1132 if (filename == null || wildcardMatcher == null) {
1133 return false;
1134 }
1135 if (caseSensitivity == null) {
1136 caseSensitivity = IOCase.SENSITIVE;
1137 }
1138 filename = caseSensitivity.convertCase(filename);
1139 wildcardMatcher = caseSensitivity.convertCase(wildcardMatcher);
1140 String[] wcs = splitOnTokens(wildcardMatcher);
1141 boolean anyChars = false;
1142 int textIdx = 0;
1143 int wcsIdx = 0;
1144 Stack backtrack = new Stack();
1145
1146 // loop around a backtrack stack, to handle complex * matching
1147 do {
1148 if (backtrack.size() > 0) {
1149 int[] array = (int[]) backtrack.pop();
1150 wcsIdx = array[0];
1151 textIdx = array[1];
1152 anyChars = true;
1153 }
1154
1155 // loop whilst tokens and text left to process
1156 while (wcsIdx < wcs.length) {
1157
1158 if (wcs[wcsIdx].equals("?")) {
1159 // ? so move to next text char
1160 textIdx++;
1161 anyChars = false;
1162
1163 } else if (wcs[wcsIdx].equals("*")) {
1164 // set any chars status
1165 anyChars = true;
1166 if (wcsIdx == wcs.length - 1) {
1167 textIdx = filename.length();
1168 }
1169
1170 } else {
1171 // matching text token
1172 if (anyChars) {
1173 // any chars then try to locate text token
1174 textIdx = filename.indexOf(wcs[wcsIdx], textIdx);
1175 if (textIdx == -1) {
1176 // token not found
1177 break;
1178 }
1179 int repeat = filename.indexOf(wcs[wcsIdx], textIdx + 1);
1180 if (repeat >= 0) {
1181 backtrack.push(new int[] {wcsIdx, repeat});
1182 }
1183 } else {
1184 // matching from current position
1185 if (!filename.startsWith(wcs[wcsIdx], textIdx)) {
1186 // couldnt match token
1187 break;
1188 }
1189 }
1190
1191 // matched text token, move text index to end of matched token
1192 textIdx += wcs[wcsIdx].length();
1193 anyChars = false;
1194 }
1195
1196 wcsIdx++;
1197 }
1198
1199 // full match
1200 if (wcsIdx == wcs.length && textIdx == filename.length()) {
1201 return true;
1202 }
1203
1204 } while (backtrack.size() > 0);
1205
1206 return false;
1207 }
1208
1209 /**
1210 * Splits a string into a number of tokens.
1211 *
1212 * @param text the text to split
1213 * @return the tokens, never null
1214 */
1215 static String[] splitOnTokens(String text) {
1216 // used by wildcardMatch
1217 // package level so a unit test may run on this
1218
1219 if (text.indexOf("?") == -1 && text.indexOf("*") == -1) {
1220 return new String[] { text };
1221 }
1222
1223 char[] array = text.toCharArray();
1224 ArrayList list = new ArrayList();
1225 StringBuffer buffer = new StringBuffer();
1226 for (int i = 0; i < array.length; i++) {
1227 if (array[i] == '?' || array[i] == '*') {
1228 if (buffer.length() != 0) {
1229 list.add(buffer.toString());
1230 buffer.setLength(0);
1231 }
1232 if (array[i] == '?') {
1233 list.add("?");
1234 } else if (list.size() == 0 ||
1235 (i > 0 && list.get(list.size() - 1).equals("*") == false)) {
1236 list.add("*");
1237 }
1238 } else {
1239 buffer.append(array[i]);
1240 }
1241 }
1242 if (buffer.length() != 0) {
1243 list.add(buffer.toString());
1244 }
1245
1246 return (String[]) list.toArray( new String[ list.size() ] );
1247 }
1248
1249 }