source: filezilla/trunk/fuentes/src/putty/wildcard.c @ 130

Last change on this file since 130 was 130, checked in by jrpelegrina, 4 years ago

First release to xenial

File size: 13.7 KB
Line 
1/*
2 * Wildcard matching engine for use with SFTP-based file transfer
3 * programs (PSFTP, new-look PSCP): since SFTP has no notion of
4 * getting the remote side to do globbing (and rightly so) we have
5 * to do it locally, by retrieving all the filenames in a directory
6 * and checking each against the wildcard pattern.
7 */
8
9#include <assert.h>
10#include <stdlib.h>
11#include <string.h>
12
13#include "putty.h"
14
15/*
16 * Definition of wildcard syntax:
17 *
18 *  - * matches any sequence of characters, including zero.
19 *  - ? matches exactly one character which can be anything.
20 *  - [abc] matches exactly one character which is a, b or c.
21 *  - [a-f] matches anything from a through f.
22 *  - [^a-f] matches anything _except_ a through f.
23 *  - [-_] matches - or _; [^-_] matches anything else. (The - is
24 *    non-special if it occurs immediately after the opening
25 *    bracket or ^.)
26 *  - [a^] matches an a or a ^. (The ^ is non-special if it does
27 *    _not_ occur immediately after the opening bracket.)
28 *  - \*, \?, \[, \], \\ match the single characters *, ?, [, ], \.
29 *  - All other characters are non-special and match themselves.
30 */
31
32/*
33 * Some notes on differences from POSIX globs (IEEE Std 1003.1, 2003 ed.):
34 *  - backslashes act as escapes even within [] bracket expressions
35 *  - does not support [!...] for non-matching list (POSIX are weird);
36 *    NB POSIX allows [^...] as well via "A bracket expression starting
37 *    with an unquoted circumflex character produces unspecified
38 *    results". If we wanted to allow [!...] we might want to define
39 *    [^!] as having its literal meaning (match '^' or '!').
40 *  - none of the scary [[:class:]] stuff, etc
41 */
42
43/*
44 * The wildcard matching technique we use is very simple and
45 * potentially O(N^2) in running time, but I don't anticipate it
46 * being that bad in reality (particularly since N will be the size
47 * of a filename, which isn't all that much). Perhaps one day, once
48 * PuTTY has grown a regexp matcher for some other reason, I might
49 * come back and reimplement wildcards by translating them into
50 * regexps or directly into NFAs; but for the moment, in the
51 * absence of any other need for the NFA->DFA translation engine,
52 * anything more than the simplest possible wildcard matcher is
53 * vast code-size overkill.
54 *
55 * Essentially, these wildcards are much simpler than regexps in
56 * that they consist of a sequence of rigid fragments (? and [...]
57 * can never match more or less than one character) separated by
58 * asterisks. It is therefore extremely simple to look at a rigid
59 * fragment and determine whether or not it begins at a particular
60 * point in the test string; so we can search along the string
61 * until we find each fragment, then search for the next. As long
62 * as we find each fragment in the _first_ place it occurs, there
63 * will never be a danger of having to backpedal and try to find it
64 * again somewhere else.
65 */
66
67enum {
68    WC_TRAILINGBACKSLASH = 1,
69    WC_UNCLOSEDCLASS,
70    WC_INVALIDRANGE
71};
72
73/*
74 * Error reporting is done by returning various negative values
75 * from the wildcard routines. Passing any such value to wc_error
76 * will give a human-readable message.
77 */
78const char *wc_error(int value)
79{
80    value = abs(value);
81    switch (value) {
82      case WC_TRAILINGBACKSLASH:
83        return "'\' occurred at end of string (expected another character)";
84      case WC_UNCLOSEDCLASS:
85        return "expected ']' to close character class";
86      case WC_INVALIDRANGE:
87        return "character range was not terminated (']' just after '-')";
88    }
89    return "INTERNAL ERROR: unrecognised wildcard error number";
90}
91
92/*
93 * This is the routine that tests a target string to see if an
94 * initial substring of it matches a fragment. If successful, it
95 * returns 1, and advances both `fragment' and `target' past the
96 * fragment and matching substring respectively. If unsuccessful it
97 * returns zero. If the wildcard fragment suffers a syntax error,
98 * it returns <0 and the precise value indexes into wc_error.
99 */
100static int wc_match_fragment(const char **fragment, const char **target)
101{
102    const char *f, *t;
103
104    f = *fragment;
105    t = *target;
106    /*
107     * The fragment terminates at either the end of the string, or
108     * the first (unescaped) *.
109     */
110    while (*f && *f != '*' && *t) {
111        /*
112         * Extract one character from t, and one character's worth
113         * of pattern from f, and step along both. Return 0 if they
114         * fail to match.
115         */
116        if (*f == '\\') {
117            /*
118             * Backslash, which means f[1] is to be treated as a
119             * literal character no matter what it is. It may not
120             * be the end of the string.
121             */
122            if (!f[1])
123                return -WC_TRAILINGBACKSLASH;   /* error */
124            if (f[1] != *t)
125                return 0;              /* failed to match */
126            f += 2;
127        } else if (*f == '?') {
128            /*
129             * Question mark matches anything.
130             */
131            f++;
132        } else if (*f == '[') {
133            int invert = 0;
134            int matched = 0;
135            /*
136             * Open bracket introduces a character class.
137             */
138            f++;
139            if (*f == '^') {
140                invert = 1;
141                f++;
142            }
143            while (*f != ']') {
144                if (*f == '\\')
145                    f++;               /* backslashes still work */
146                if (!*f)
147                    return -WC_UNCLOSEDCLASS;   /* error again */
148                if (f[1] == '-') {
149                    int lower, upper, ourchr;
150                    lower = (unsigned char) *f++;
151                    f++;               /* eat the minus */
152                    if (*f == ']')
153                        return -WC_INVALIDRANGE;   /* different error! */
154                    if (*f == '\\')
155                        f++;           /* backslashes _still_ work */
156                    if (!*f)
157                        return -WC_UNCLOSEDCLASS;   /* error again */
158                    upper = (unsigned char) *f++;
159                    ourchr = (unsigned char) *t;
160                    if (lower > upper) {
161                        int t = lower; lower = upper; upper = t;
162                    }
163                    if (ourchr >= lower && ourchr <= upper)
164                        matched = 1;
165                } else {
166                    matched |= (*t == *f++);
167                }
168            }
169            if (invert == matched)
170                return 0;              /* failed to match character class */
171            f++;                       /* eat the ] */
172        } else {
173            /*
174             * Non-special character matches itself.
175             */
176            if (*f != *t)
177                return 0;
178            f++;
179        }
180        /*
181         * Now we've done that, increment t past the character we
182         * matched.
183         */
184        t++;
185    }
186    if (!*f || *f == '*') {
187        /*
188         * We have reached the end of f without finding a mismatch;
189         * so we're done. Update the caller pointers and return 1.
190         */
191        *fragment = f;
192        *target = t;
193        return 1;
194    }
195    /*
196     * Otherwise, we must have reached the end of t before we
197     * reached the end of f; so we've failed. Return 0.
198     */
199    return 0;
200}
201
202/*
203 * This is the real wildcard matching routine. It returns 1 for a
204 * successful match, 0 for an unsuccessful match, and <0 for a
205 * syntax error in the wildcard.
206 */
207int wc_match(const char *wildcard, const char *target)
208{
209    int ret;
210
211    /*
212     * Every time we see a '*' _followed_ by a fragment, we just
213     * search along the string for a location at which the fragment
214     * matches. The only special case is when we see a fragment
215     * right at the start, in which case we just call the matching
216     * routine once and give up if it fails.
217     */
218    if (*wildcard != '*') {
219        ret = wc_match_fragment(&wildcard, &target);
220        if (ret <= 0)
221            return ret;                /* pass back failure or error alike */
222    }
223
224    while (*wildcard) {
225        assert(*wildcard == '*');
226        while (*wildcard == '*')
227            wildcard++;
228
229        /*
230         * It's possible we've just hit the end of the wildcard
231         * after seeing a *, in which case there's no need to
232         * bother searching any more because we've won.
233         */
234        if (!*wildcard)
235            return 1;
236
237        /*
238         * Now `wildcard' points at the next fragment. So we
239         * attempt to match it against `target', and if that fails
240         * we increment `target' and try again, and so on. When we
241         * find we're about to try matching against the empty
242         * string, we give up and return 0.
243         */
244        ret = 0;
245        while (*target) {
246            const char *save_w = wildcard, *save_t = target;
247
248            ret = wc_match_fragment(&wildcard, &target);
249
250            if (ret < 0)
251                return ret;            /* syntax error */
252
253            if (ret > 0 && !*wildcard && *target) {
254                /*
255                 * Final special case - literally.
256                 *
257                 * This situation arises when we are matching a
258                 * _terminal_ fragment of the wildcard (that is,
259                 * there is nothing after it, e.g. "*a"), and it
260                 * has matched _too early_. For example, matching
261                 * "*a" against "parka" will match the "a" fragment
262                 * against the _first_ a, and then (if it weren't
263                 * for this special case) matching would fail
264                 * because we're at the end of the wildcard but not
265                 * at the end of the target string.
266                 *
267                 * In this case what we must do is measure the
268                 * length of the fragment in the target (which is
269                 * why we saved `target'), jump straight to that
270                 * distance from the end of the string using
271                 * strlen, and match the same fragment again there
272                 * (which is why we saved `wildcard'). Then we
273                 * return whatever that operation returns.
274                 */
275                target = save_t + strlen(save_t) - (target - save_t);
276                wildcard = save_w;
277                return wc_match_fragment(&wildcard, &target);
278            }
279
280            if (ret > 0)
281                break;
282            target++;
283        }
284        if (ret > 0)
285            continue;
286        return 0;
287    }
288
289    /*
290     * If we reach here, it must be because we successfully matched
291     * a fragment and then found ourselves right at the end of the
292     * wildcard. Hence, we return 1 if and only if we are also
293     * right at the end of the target.
294     */
295    return (*target ? 0 : 1);
296}
297
298/*
299 * Another utility routine that translates a non-wildcard string
300 * into its raw equivalent by removing any escaping backslashes.
301 * Expects a target string buffer of anything up to the length of
302 * the original wildcard. You can also pass NULL as the output
303 * buffer if you're only interested in the return value.
304 *
305 * Returns 1 on success, or 0 if a wildcard character was
306 * encountered. In the latter case the output string MAY not be
307 * zero-terminated and you should not use it for anything!
308 */
309int wc_unescape(char *output, const char *wildcard)
310{
311    while (*wildcard) {
312        if (*wildcard == '\\') {
313            wildcard++;
314            /* We are lenient about trailing backslashes in non-wildcards. */
315            if (*wildcard) {
316                if (output)
317                    *output++ = *wildcard;
318                wildcard++;
319            }
320        } else if (*wildcard == '*' || *wildcard == '?' ||
321                   *wildcard == '[' || *wildcard == ']') {
322            return 0;                  /* it's a wildcard! */
323        } else {
324            if (output)
325                *output++ = *wildcard;
326            wildcard++;
327        }
328    }
329    if (output)
330        *output = '\0';
331    return 1;                          /* it's clean */
332}
333
334#ifdef TESTMODE
335
336struct test {
337    const char *wildcard;
338    const char *target;
339    int expected_result;
340};
341
342const struct test fragment_tests[] = {
343    /*
344     * We exhaustively unit-test the fragment matching routine
345     * itself, which should save us the need to test all its
346     * intricacies during the full wildcard tests.
347     */
348    {"abc", "abc", 1},
349    {"abc", "abd", 0},
350    {"abc", "abcd", 1},
351    {"abcd", "abc", 0},
352    {"ab[cd]", "abc", 1},
353    {"ab[cd]", "abd", 1},
354    {"ab[cd]", "abe", 0},
355    {"ab[^cd]", "abc", 0},
356    {"ab[^cd]", "abd", 0},
357    {"ab[^cd]", "abe", 1},
358    {"ab\\", "abc", -WC_TRAILINGBACKSLASH},
359    {"ab\\*", "ab*", 1},
360    {"ab\\?", "ab*", 0},
361    {"ab?", "abc", 1},
362    {"ab?", "ab", 0},
363    {"ab[", "abc", -WC_UNCLOSEDCLASS},
364    {"ab[c-", "abb", -WC_UNCLOSEDCLASS},
365    {"ab[c-]", "abb", -WC_INVALIDRANGE},
366    {"ab[c-e]", "abb", 0},
367    {"ab[c-e]", "abc", 1},
368    {"ab[c-e]", "abd", 1},
369    {"ab[c-e]", "abe", 1},
370    {"ab[c-e]", "abf", 0},
371    {"ab[e-c]", "abb", 0},
372    {"ab[e-c]", "abc", 1},
373    {"ab[e-c]", "abd", 1},
374    {"ab[e-c]", "abe", 1},
375    {"ab[e-c]", "abf", 0},
376    {"ab[^c-e]", "abb", 1},
377    {"ab[^c-e]", "abc", 0},
378    {"ab[^c-e]", "abd", 0},
379    {"ab[^c-e]", "abe", 0},
380    {"ab[^c-e]", "abf", 1},
381    {"ab[^e-c]", "abb", 1},
382    {"ab[^e-c]", "abc", 0},
383    {"ab[^e-c]", "abd", 0},
384    {"ab[^e-c]", "abe", 0},
385    {"ab[^e-c]", "abf", 1},
386    {"ab[a^]", "aba", 1},
387    {"ab[a^]", "ab^", 1},
388    {"ab[a^]", "abb", 0},
389    {"ab[^a^]", "aba", 0},
390    {"ab[^a^]", "ab^", 0},
391    {"ab[^a^]", "abb", 1},
392    {"ab[-c]", "ab-", 1},
393    {"ab[-c]", "abc", 1},
394    {"ab[-c]", "abd", 0},
395    {"ab[^-c]", "ab-", 0},
396    {"ab[^-c]", "abc", 0},
397    {"ab[^-c]", "abd", 1},
398    {"ab[\\[-\\]]", "abZ", 0},
399    {"ab[\\[-\\]]", "ab[", 1},
400    {"ab[\\[-\\]]", "ab\\", 1},
401    {"ab[\\[-\\]]", "ab]", 1},
402    {"ab[\\[-\\]]", "ab^", 0},
403    {"ab[^\\[-\\]]", "abZ", 1},
404    {"ab[^\\[-\\]]", "ab[", 0},
405    {"ab[^\\[-\\]]", "ab\\", 0},
406    {"ab[^\\[-\\]]", "ab]", 0},
407    {"ab[^\\[-\\]]", "ab^", 1},
408    {"ab[a-fA-F]", "aba", 1},
409    {"ab[a-fA-F]", "abF", 1},
410    {"ab[a-fA-F]", "abZ", 0},
411};
412
413const struct test full_tests[] = {
414    {"a", "argh", 0},
415    {"a", "ba", 0},
416    {"a", "a", 1},
417    {"a*", "aardvark", 1},
418    {"a*", "badger", 0},
419    {"*a", "park", 0},
420    {"*a", "pArka", 1},
421    {"*a", "parka", 1},
422    {"*a*", "park", 1},
423    {"*a*", "perk", 0},
424    {"?b*r?", "abracadabra", 1},
425    {"?b*r?", "abracadabr", 0},
426    {"?b*r?", "abracadabzr", 0},
427};
428
429int main(void)
430{
431    int i;
432    int fails, passes;
433
434    fails = passes = 0;
435
436    for (i = 0; i < sizeof(fragment_tests)/sizeof(*fragment_tests); i++) {
437        const char *f, *t;
438        int eret, aret;
439        f = fragment_tests[i].wildcard;
440        t = fragment_tests[i].target;
441        eret = fragment_tests[i].expected_result;
442        aret = wc_match_fragment(&f, &t);
443        if (aret != eret) {
444            printf("failed test: /%s/ against /%s/ returned %d not %d\n",
445                   fragment_tests[i].wildcard, fragment_tests[i].target,
446                   aret, eret);
447            fails++;
448        } else
449            passes++;
450    }
451
452    for (i = 0; i < sizeof(full_tests)/sizeof(*full_tests); i++) {
453        const char *f, *t;
454        int eret, aret;
455        f = full_tests[i].wildcard;
456        t = full_tests[i].target;
457        eret = full_tests[i].expected_result;
458        aret = wc_match(f, t);
459        if (aret != eret) {
460            printf("failed test: /%s/ against /%s/ returned %d not %d\n",
461                   full_tests[i].wildcard, full_tests[i].target,
462                   aret, eret);
463            fails++;
464        } else
465            passes++;
466    }
467
468    printf("passed %d, failed %d\n", passes, fails);
469
470    return 0;
471}
472
473#endif
Note: See TracBrowser for help on using the repository browser.