Commit 66f357fa by Aaron Leung

Matcher for optionally-signed ints. I think this wraps up the primitive stuff…

Matcher for optionally-signed ints. I think this wraps up the primitive stuff that's common to most/all languages.
parent af774749
...@@ -36,6 +36,10 @@ char *prefix_is_delimited_by(char *src, char *beg, char *end, int esc) { ...@@ -36,6 +36,10 @@ char *prefix_is_delimited_by(char *src, char *beg, char *end, int esc) {
} }
} }
char *prefix_epsilon(char *src) {
return src;
}
char *_prefix_alternatives(char *src, ...) { char *_prefix_alternatives(char *src, ...) {
va_list ap; va_list ap;
va_start(ap, src); va_start(ap, src);
...@@ -72,71 +76,73 @@ char *prefix_one_plus(char *src, prefix_matcher m) { ...@@ -72,71 +76,73 @@ char *prefix_one_plus(char *src, prefix_matcher m) {
return src; return src;
} }
DEFINE_SINGLE_CTYPE_MATCHER(space); SINGLE_CTYPE_MATCHER(space);
DEFINE_SINGLE_CTYPE_MATCHER(alpha); SINGLE_CTYPE_MATCHER(alpha);
DEFINE_SINGLE_CTYPE_MATCHER(digit); SINGLE_CTYPE_MATCHER(digit);
DEFINE_SINGLE_CTYPE_MATCHER(xdigit); SINGLE_CTYPE_MATCHER(xdigit);
DEFINE_SINGLE_CTYPE_MATCHER(alnum); SINGLE_CTYPE_MATCHER(alnum);
DEFINE_SINGLE_CTYPE_MATCHER(punct); SINGLE_CTYPE_MATCHER(punct);
DEFINE_CTYPE_SEQUENCE_MATCHER(space); CTYPE_SEQUENCE_MATCHER(space);
DEFINE_CTYPE_SEQUENCE_MATCHER(alpha); CTYPE_SEQUENCE_MATCHER(alpha);
DEFINE_CTYPE_SEQUENCE_MATCHER(digit); CTYPE_SEQUENCE_MATCHER(digit);
DEFINE_CTYPE_SEQUENCE_MATCHER(xdigit); CTYPE_SEQUENCE_MATCHER(xdigit);
DEFINE_CTYPE_SEQUENCE_MATCHER(alnum); CTYPE_SEQUENCE_MATCHER(alnum);
DEFINE_CTYPE_SEQUENCE_MATCHER(punct); CTYPE_SEQUENCE_MATCHER(punct);
DEFINE_TO_EOL_MATCHER(shell_comment, "#"); TO_EOL_MATCHER(shell_comment, "#");
DEFINE_TO_EOL_MATCHER(c_line_comment, "//"); TO_EOL_MATCHER(c_line_comment, "//");
DEFINE_DELIMITED_MATCHER(c_block_comment, "/*", "*/", 0); DELIMITED_MATCHER(c_block_comment, "/*", "*/", 0);
DEFINE_DELIMITED_MATCHER(double_quoted_string, "\"", "\"", 1); DELIMITED_MATCHER(double_quoted_string, "\"", "\"", 1);
DEFINE_DELIMITED_MATCHER(single_quoted_string, "\'", "\'", 1); DELIMITED_MATCHER(single_quoted_string, "\'", "\'", 1);
DEFINE_DELIMITED_MATCHER(interpolant, "#{", "}", 0); DELIMITED_MATCHER(interpolant, "#{", "}", 0);
DEFINE_CHAR_MATCHER (lparen, '('); CHAR_MATCHER (lparen, '(');
DEFINE_CHAR_MATCHER (rparen, ')'); CHAR_MATCHER (rparen, ')');
DEFINE_CHAR_MATCHER (lbrack, '['); CHAR_MATCHER (lbrack, '[');
DEFINE_CHAR_MATCHER (rbrack, ']'); CHAR_MATCHER (rbrack, ']');
DEFINE_CHAR_MATCHER (lbrace, '{'); CHAR_MATCHER (lbrace, '{');
DEFINE_CHAR_MATCHER (rbrace, '}'); CHAR_MATCHER (rbrace, '}');
DEFINE_CHAR_MATCHER (underscore, '_'); CHAR_MATCHER (underscore, '_');
DEFINE_CHAR_MATCHER (hyphen, '-'); CHAR_MATCHER (hyphen, '-');
DEFINE_CHAR_MATCHER (semicolon, ';'); CHAR_MATCHER (semicolon, ';');
DEFINE_CHAR_MATCHER (colon, ':'); CHAR_MATCHER (colon, ':');
DEFINE_CHAR_MATCHER (period, '.'); CHAR_MATCHER (period, '.');
DEFINE_CHAR_MATCHER (question, '?'); CHAR_MATCHER (question, '?');
DEFINE_CHAR_MATCHER (exclamation, '!'); CHAR_MATCHER (exclamation, '!');
DEFINE_CHAR_MATCHER (tilde, '~'); CHAR_MATCHER (tilde, '~');
DEFINE_CHAR_MATCHER (backquote, '`'); CHAR_MATCHER (backquote, '`');
DEFINE_CHAR_MATCHER (quote, '\"'); CHAR_MATCHER (quote, '\"');
DEFINE_CHAR_MATCHER (apostrophe, '\''); CHAR_MATCHER (apostrophe, '\'');
DEFINE_CHAR_MATCHER (ampersand, '&'); CHAR_MATCHER (ampersand, '&');
DEFINE_CHAR_MATCHER (caret, '^'); CHAR_MATCHER (caret, '^');
DEFINE_CHAR_MATCHER (pipe, '|'); CHAR_MATCHER (pipe, '|');
DEFINE_CHAR_MATCHER (slash, '/'); CHAR_MATCHER (slash, '/');
DEFINE_CHAR_MATCHER (backslash, '\\'); CHAR_MATCHER (backslash, '\\');
DEFINE_CHAR_MATCHER (asterisk, '*'); CHAR_MATCHER (asterisk, '*');
DEFINE_CHAR_MATCHER (pound, '#'); CHAR_MATCHER (pound, '#');
DEFINE_CHAR_MATCHER (hash, '#'); CHAR_MATCHER (hash, '#');
DEFINE_CHAR_MATCHER (plus, '+'); CHAR_MATCHER (plus, '+');
DEFINE_CHAR_MATCHER (minus, '-'); CHAR_MATCHER (minus, '-');
DEFINE_CHAR_MATCHER (times, '*'); CHAR_MATCHER (times, '*');
DEFINE_CHAR_MATCHER (divide, '/'); CHAR_MATCHER (divide, '/');
DEFINE_CHAR_MATCHER (percent, '%'); CHAR_MATCHER (percent, '%');
DEFINE_CHAR_MATCHER (dollar, '$'); CHAR_MATCHER (dollar, '$');
DEFINE_CHAR_MATCHER (gt, '>'); CHAR_MATCHER (gt, '>');
DEFINE_CHARS_MATCHER(gte, ">="); CHARS_MATCHER(gte, ">=");
DEFINE_CHAR_MATCHER (lt, '<'); CHAR_MATCHER (lt, '<');
DEFINE_CHARS_MATCHER(lte, "<="); CHARS_MATCHER(lte, "<=");
DEFINE_CHAR_MATCHER (eq, '='); CHAR_MATCHER (eq, '=');
DEFINE_CHAR_MATCHER (assign, '='); CHAR_MATCHER (assign, '=');
DEFINE_CHARS_MATCHER(equal, "=="); CHARS_MATCHER(equal, "==");
static DEFINE_ALTERNATIVES_MATCHER(identifier_initial, prefix_is_alphas, prefix_is_underscore); static ALTERNATIVES_MATCHER(identifier_initial, prefix_is_alphas, prefix_is_underscore);
static DEFINE_ALTERNATIVES_MATCHER(identifier_trailer, prefix_is_alnums, prefix_is_underscore); static ALTERNATIVES_MATCHER(identifier_trailer, prefix_is_alnums, prefix_is_underscore);
DEFINE_FIRST_REST_MATCHER(identifier, prefix_is_identifier_initial, prefix_is_identifier_trailer); FIRST_REST_MATCHER(identifier, prefix_is_identifier_initial, prefix_is_identifier_trailer);
static ALTERNATIVES_MATCHER(optional_sign, prefix_is_plus, prefix_is_minus, prefix_epsilon);
SEQUENCE_MATCHER(integer, prefix_is_optional_sign, prefix_is_digits);
\ No newline at end of file
typedef char *(*prefix_matcher)(char *); typedef char *(*prefix_matcher)(char *);
#define DECLARE_MATCHER(name) \ #define DECLARE(name) \
char *prefix_is_ ## name(char *) char *prefix_is_ ## name(char *)
#define DEFINE_CHAR_MATCHER(name, prefix) \ #define CHAR_MATCHER(name, prefix) \
char *prefix_is_ ## name(char *src) { \ char *prefix_is_ ## name(char *src) { \
return prefix_is_char(src, prefix); \ return prefix_is_char(src, prefix); \
} }
#define DEFINE_CHARS_MATCHER(name, prefix) \ #define CHARS_MATCHER(name, prefix) \
char *prefix_is_ ## name(char *src) { \ char *prefix_is_ ## name(char *src) { \
return prefix_is_chars(src, prefix); \ return prefix_is_chars(src, prefix); \
} }
#define DEFINE_SINGLE_CTYPE_MATCHER(type) \ #define CLASS_CHAR_MATCHER(name, class) \
char *prefix_is ## name(char *src) { \
return prefix_is_one_of(src, class); \
}
#define CLASS_CHARS_MATCHER(name, class) \
char *prefix)us ## name(char *src) { \
return prefix_is_some_of(src, class); \
}
#define SINGLE_CTYPE_MATCHER(type) \
char *prefix_is_ ## type(char *src) { \ char *prefix_is_ ## type(char *src) { \
return is ## type(*src) ? src+1 : NULL; \ return is ## type(*src) ? src+1 : NULL; \
} }
#define DEFINE_CTYPE_SEQUENCE_MATCHER(type) \ #define CTYPE_SEQUENCE_MATCHER(type) \
char *prefix_is_ ## type ## s(char *src) { \ char *prefix_is_ ## type ## s(char *src) { \
char *p = src; \ char *p = src; \
while (is ## type(*p)) p++; \ while (is ## type(*p)) p++; \
return p == src ? NULL : p; \ return p == src ? NULL : p; \
} }
#define DEFINE_TO_EOL_MATCHER(name, prefix) \ #define TO_EOL_MATCHER(name, prefix) \
char *prefix_is_ ## name(char *src) { \ char *prefix_is_ ## name(char *src) { \
if (!(src = prefix_is_chars(src, prefix))) return NULL; \ if (!(src = prefix_is_chars(src, prefix))) return NULL; \
while(*src && *src != '\n') src++; \ while(*src && *src != '\n') src++; \
return src; \ return src; \
} }
#define DEFINE_DELIMITED_MATCHER(name, begin, end, escapable) \ #define DELIMITED_MATCHER(name, begin, end, escapable) \
char *prefix_is_ ## name(char *src) { \ char *prefix_is_ ## name(char *src) { \
return prefix_is_delimited_by(src, begin, end, escapable); \ return prefix_is_delimited_by(src, begin, end, escapable); \
} }
#define DEFINE_ALTERNATIVES_MATCHER(name, ...) \ #define ALTERNATIVES_MATCHER(name, ...) \
char *prefix_is_ ## name(char *src) { \ char *prefix_is_ ## name(char *src) { \
return prefix_alternatives(src, __VA_ARGS__); \ return prefix_alternatives(src, __VA_ARGS__); \
} }
#define DEFINE_SEQUENCE_MATCHER(name, ...) \ #define SEQUENCE_MATCHER(name, ...) \
char *prefix_is_ ## name(char *src) { \ char *prefix_is_ ## name(char *src) { \
return prefix_sequence(src, __VA_ARGS__); \ return prefix_sequence(src, __VA_ARGS__); \
} }
#define DEFINE_OPTIONAL_MATCHER(name, matcher) \ #define OPTIONAL_MATCHER(name, matcher) \
char *prefix_is_ ## name(char *src) { \ char *prefix_is_ ## name(char *src) { \
return prefix_optional(src, matcher); \ return prefix_optional(src, matcher); \
} }
#define DEFINE_FIRST_REST_MATCHER(name, first_matcher, rest_matcher) \ #define FIRST_REST_MATCHER(name, first_matcher, rest_matcher) \
char *prefix_is_ ## name(char *src) { \ char *prefix_is_ ## name(char *src) { \
if (src = first_matcher(src)) src = prefix_zero_plus(src, rest_matcher); \ if (src = first_matcher(src)) src = prefix_zero_plus(src, rest_matcher); \
return src; \ return src; \
...@@ -64,6 +74,7 @@ char *prefix_is_one_of(char *src, char *class); ...@@ -64,6 +74,7 @@ char *prefix_is_one_of(char *src, char *class);
char *prefix_is_some_of(char *src, char *class); char *prefix_is_some_of(char *src, char *class);
char *prefix_is_delimited_by(char *src, char *beg, char *end, int esc); char *prefix_is_delimited_by(char *src, char *beg, char *end, int esc);
char *prefix_epsilon(char *src);
char *_prefix_alternatives(char *src, ...); char *_prefix_alternatives(char *src, ...);
#define prefix_alternatives(src, ...) _prefix_alternatives(src, __VA_ARGS__, NULL) #define prefix_alternatives(src, ...) _prefix_alternatives(src, __VA_ARGS__, NULL)
char *_prefix_sequence(char *src, ...); char *_prefix_sequence(char *src, ...);
...@@ -72,68 +83,69 @@ char *prefix_optional(char *src, prefix_matcher m); ...@@ -72,68 +83,69 @@ char *prefix_optional(char *src, prefix_matcher m);
char *prefix_zero_plus(char *src, prefix_matcher m); char *prefix_zero_plus(char *src, prefix_matcher m);
char *prefix_one_plus(char *src, prefix_matcher m); char *prefix_one_plus(char *src, prefix_matcher m);
DECLARE_MATCHER(space); DECLARE(space);
DECLARE_MATCHER(alpha); DECLARE(alpha);
DECLARE_MATCHER(digit); DECLARE(digit);
DECLARE_MATCHER(xdigit); DECLARE(xdigit);
DECLARE_MATCHER(alnum); DECLARE(alnum);
DECLARE_MATCHER(punct); DECLARE(punct);
DECLARE_MATCHER(spaces); DECLARE(spaces);
DECLARE_MATCHER(alphas); DECLARE(alphas);
DECLARE_MATCHER(digits); DECLARE(digits);
DECLARE_MATCHER(xdigits); DECLARE(xdigits);
DECLARE_MATCHER(alnums); DECLARE(alnums);
DECLARE_MATCHER(puncts); DECLARE(puncts);
DECLARE_MATCHER(shell_comment); DECLARE(shell_comment);
DECLARE_MATCHER(c_line_comment); DECLARE(c_line_comment);
DECLARE_MATCHER(c_block_comment); DECLARE(c_block_comment);
DECLARE_MATCHER(double_quoted_string); DECLARE(double_quoted_string);
DECLARE_MATCHER(single_quoted_string); DECLARE(single_quoted_string);
DECLARE_MATCHER(interpolant); DECLARE(interpolant);
DECLARE_MATCHER(lparen); DECLARE(lparen);
DECLARE_MATCHER(rparen); DECLARE(rparen);
DECLARE_MATCHER(lbrack); DECLARE(lbrack);
DECLARE_MATCHER(rbrack); DECLARE(rbrack);
DECLARE_MATCHER(lbrace); DECLARE(lbrace);
DECLARE_MATCHER(rbrace); DECLARE(rbrace);
DECLARE_MATCHER(underscore); DECLARE(underscore);
DECLARE_MATCHER(hyphen); DECLARE(hyphen);
DECLARE_MATCHER(semicolon); DECLARE(semicolon);
DECLARE_MATCHER(colon); DECLARE(colon);
DECLARE_MATCHER(period); DECLARE(period);
DECLARE_MATCHER(question); DECLARE(question);
DECLARE_MATCHER(exclamation); DECLARE(exclamation);
DECLARE_MATCHER(tilde); DECLARE(tilde);
DECLARE_MATCHER(backquote); DECLARE(backquote);
DECLARE_MATCHER(quote); DECLARE(quote);
DECLARE_MATCHER(apostrophe); DECLARE(apostrophe);
DECLARE_MATCHER(ampersand); DECLARE(ampersand);
DECLARE_MATCHER(caret); DECLARE(caret);
DECLARE_MATCHER(pipe); DECLARE(pipe);
DECLARE_MATCHER(slash); DECLARE(slash);
DECLARE_MATCHER(backslash); DECLARE(backslash);
DECLARE_MATCHER(asterisk); DECLARE(asterisk);
DECLARE_MATCHER(pound); DECLARE(pound);
DECLARE_MATCHER(hash); DECLARE(hash);
DECLARE_MATCHER(plus); DECLARE(plus);
DECLARE_MATCHER(minus); DECLARE(minus);
DECLARE_MATCHER(times); DECLARE(times);
DECLARE_MATCHER(divide); DECLARE(divide);
DECLARE_MATCHER(percent); DECLARE(percent);
DECLARE_MATCHER(dollar); DECLARE(dollar);
DECLARE_MATCHER(gt); DECLARE(gt);
DECLARE_MATCHER(gte); DECLARE(gte);
DECLARE_MATCHER(lt); DECLARE(lt);
DECLARE_MATCHER(lte); DECLARE(lte);
DECLARE_MATCHER(eq); DECLARE(eq);
DECLARE_MATCHER(assign); DECLARE(assign);
DECLARE_MATCHER(equal); DECLARE(equal);
DECLARE_MATCHER(identifier); DECLARE(identifier);
DECLARE(integer);
\ No newline at end of file
...@@ -27,15 +27,19 @@ int main() { ...@@ -27,15 +27,19 @@ int main() {
char *sqstring = "'this \\'is\\' a \"string\" now' blah blah blah"; char *sqstring = "'this \\'is\\' a \"string\" now' blah blah blah";
char *scomment = "# a shell-style comment"; char *scomment = "# a shell-style comment";
char *bcomment = "/* this is a c comment \\*/ blah blah"; char *bcomment = "/* this is a c comment \\*/ blah blah";
char *noncomment = "/* blah blah"; char *non_comment = "/* blah blah";
char *interpolant = "#{ this is an interpolant \\} blah blah"; char *interpolant = "#{ this is an interpolant \\} blah blah";
char *words = "hello my name is aaron"; char *words = "hello my name is aaron";
char *id1 = "_identifier123{blah bloo}"; char *id1 = "_identifier123{blah bloo}";
char *non_id = "12non_ident_ifier_"; char *non_id = "12non_ident_ifier_";
char *word2 = "-blah-blah_blah"; char *word2 = "-blah-blah_bl12-34:foo";
char *non_word = "-12blah-bloo";
char *selector = "#foo > :first-child { color: #abcdef; }"; char *selector = "#foo > :first-child { color: #abcdef; }";
char *lcomment = "// blah blah blah // end\n blah blah"; char *lcomment = "// blah blah blah // end\n blah blah";
char *id2 = "badec4669264hello"; char *id2 = "badec4669264hello";
char *integer1 = "3837483+3";
char *integer2 = "+294739-4";
char *integer3 = "-294729+1";
test1(prefix_is_spaces, spaces); test1(prefix_is_spaces, spaces);
test1(prefix_is_spaces, words); test1(prefix_is_spaces, words);
...@@ -56,7 +60,7 @@ int main() { ...@@ -56,7 +60,7 @@ int main() {
test1(prefix_is_shell_comment, lcomment); test1(prefix_is_shell_comment, lcomment);
test1(prefix_is_c_block_comment, bcomment); test1(prefix_is_c_block_comment, bcomment);
test1(prefix_is_c_block_comment, noncomment); test1(prefix_is_c_block_comment, non_comment);
test1(prefix_is_double_quoted_string, dqstring); test1(prefix_is_double_quoted_string, dqstring);
test1(prefix_is_double_quoted_string, sqstring); test1(prefix_is_double_quoted_string, sqstring);
...@@ -68,7 +72,7 @@ int main() { ...@@ -68,7 +72,7 @@ int main() {
test1(prefix_is_interpolant, lcomment); test1(prefix_is_interpolant, lcomment);
test1(prefix_is_c_line_comment, lcomment); test1(prefix_is_c_line_comment, lcomment);
test1(prefix_is_c_line_comment, noncomment); test1(prefix_is_c_line_comment, non_comment);
testn(prefix_sequence, id2, prefix_is_alphas, prefix_is_digits); testn(prefix_sequence, id2, prefix_is_alphas, prefix_is_digits);
testn(prefix_sequence, id2, prefix_is_alphas, prefix_is_puncts); testn(prefix_sequence, id2, prefix_is_alphas, prefix_is_puncts);
...@@ -84,6 +88,11 @@ int main() { ...@@ -84,6 +88,11 @@ int main() {
test1(prefix_is_identifier, id1); test1(prefix_is_identifier, id1);
test1(prefix_is_identifier, non_id); test1(prefix_is_identifier, non_id);
test1(prefix_is_integer, integer1);
test1(prefix_is_integer, integer2);
test1(prefix_is_integer, integer3);
test1(prefix_is_integer, word2);
return 0; return 0;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment