Commit 66f357fa by Aaron Leung

Matcher for optionally-signed ints. I think this wraps up the primitive stuff…

Matcher for optionally-signed ints. I think this wraps up the primitive stuff that's common to most/all languages.
parent af774749
......@@ -36,6 +36,10 @@ char *prefix_is_delimited_by(char *src, char *beg, char *end, int esc) {
}
}
char *prefix_epsilon(char *src) {
return src;
}
char *_prefix_alternatives(char *src, ...) {
va_list ap;
va_start(ap, src);
......@@ -72,71 +76,73 @@ char *prefix_one_plus(char *src, prefix_matcher m) {
return src;
}
DEFINE_SINGLE_CTYPE_MATCHER(space);
DEFINE_SINGLE_CTYPE_MATCHER(alpha);
DEFINE_SINGLE_CTYPE_MATCHER(digit);
DEFINE_SINGLE_CTYPE_MATCHER(xdigit);
DEFINE_SINGLE_CTYPE_MATCHER(alnum);
DEFINE_SINGLE_CTYPE_MATCHER(punct);
DEFINE_CTYPE_SEQUENCE_MATCHER(space);
DEFINE_CTYPE_SEQUENCE_MATCHER(alpha);
DEFINE_CTYPE_SEQUENCE_MATCHER(digit);
DEFINE_CTYPE_SEQUENCE_MATCHER(xdigit);
DEFINE_CTYPE_SEQUENCE_MATCHER(alnum);
DEFINE_CTYPE_SEQUENCE_MATCHER(punct);
DEFINE_TO_EOL_MATCHER(shell_comment, "#");
DEFINE_TO_EOL_MATCHER(c_line_comment, "//");
DEFINE_DELIMITED_MATCHER(c_block_comment, "/*", "*/", 0);
DEFINE_DELIMITED_MATCHER(double_quoted_string, "\"", "\"", 1);
DEFINE_DELIMITED_MATCHER(single_quoted_string, "\'", "\'", 1);
DEFINE_DELIMITED_MATCHER(interpolant, "#{", "}", 0);
DEFINE_CHAR_MATCHER (lparen, '(');
DEFINE_CHAR_MATCHER (rparen, ')');
DEFINE_CHAR_MATCHER (lbrack, '[');
DEFINE_CHAR_MATCHER (rbrack, ']');
DEFINE_CHAR_MATCHER (lbrace, '{');
DEFINE_CHAR_MATCHER (rbrace, '}');
DEFINE_CHAR_MATCHER (underscore, '_');
DEFINE_CHAR_MATCHER (hyphen, '-');
DEFINE_CHAR_MATCHER (semicolon, ';');
DEFINE_CHAR_MATCHER (colon, ':');
DEFINE_CHAR_MATCHER (period, '.');
DEFINE_CHAR_MATCHER (question, '?');
DEFINE_CHAR_MATCHER (exclamation, '!');
DEFINE_CHAR_MATCHER (tilde, '~');
DEFINE_CHAR_MATCHER (backquote, '`');
DEFINE_CHAR_MATCHER (quote, '\"');
DEFINE_CHAR_MATCHER (apostrophe, '\'');
DEFINE_CHAR_MATCHER (ampersand, '&');
DEFINE_CHAR_MATCHER (caret, '^');
DEFINE_CHAR_MATCHER (pipe, '|');
DEFINE_CHAR_MATCHER (slash, '/');
DEFINE_CHAR_MATCHER (backslash, '\\');
DEFINE_CHAR_MATCHER (asterisk, '*');
DEFINE_CHAR_MATCHER (pound, '#');
DEFINE_CHAR_MATCHER (hash, '#');
DEFINE_CHAR_MATCHER (plus, '+');
DEFINE_CHAR_MATCHER (minus, '-');
DEFINE_CHAR_MATCHER (times, '*');
DEFINE_CHAR_MATCHER (divide, '/');
DEFINE_CHAR_MATCHER (percent, '%');
DEFINE_CHAR_MATCHER (dollar, '$');
DEFINE_CHAR_MATCHER (gt, '>');
DEFINE_CHARS_MATCHER(gte, ">=");
DEFINE_CHAR_MATCHER (lt, '<');
DEFINE_CHARS_MATCHER(lte, "<=");
DEFINE_CHAR_MATCHER (eq, '=');
DEFINE_CHAR_MATCHER (assign, '=');
DEFINE_CHARS_MATCHER(equal, "==");
static DEFINE_ALTERNATIVES_MATCHER(identifier_initial, prefix_is_alphas, prefix_is_underscore);
static DEFINE_ALTERNATIVES_MATCHER(identifier_trailer, prefix_is_alnums, prefix_is_underscore);
DEFINE_FIRST_REST_MATCHER(identifier, prefix_is_identifier_initial, prefix_is_identifier_trailer);
SINGLE_CTYPE_MATCHER(space);
SINGLE_CTYPE_MATCHER(alpha);
SINGLE_CTYPE_MATCHER(digit);
SINGLE_CTYPE_MATCHER(xdigit);
SINGLE_CTYPE_MATCHER(alnum);
SINGLE_CTYPE_MATCHER(punct);
CTYPE_SEQUENCE_MATCHER(space);
CTYPE_SEQUENCE_MATCHER(alpha);
CTYPE_SEQUENCE_MATCHER(digit);
CTYPE_SEQUENCE_MATCHER(xdigit);
CTYPE_SEQUENCE_MATCHER(alnum);
CTYPE_SEQUENCE_MATCHER(punct);
TO_EOL_MATCHER(shell_comment, "#");
TO_EOL_MATCHER(c_line_comment, "//");
DELIMITED_MATCHER(c_block_comment, "/*", "*/", 0);
DELIMITED_MATCHER(double_quoted_string, "\"", "\"", 1);
DELIMITED_MATCHER(single_quoted_string, "\'", "\'", 1);
DELIMITED_MATCHER(interpolant, "#{", "}", 0);
CHAR_MATCHER (lparen, '(');
CHAR_MATCHER (rparen, ')');
CHAR_MATCHER (lbrack, '[');
CHAR_MATCHER (rbrack, ']');
CHAR_MATCHER (lbrace, '{');
CHAR_MATCHER (rbrace, '}');
CHAR_MATCHER (underscore, '_');
CHAR_MATCHER (hyphen, '-');
CHAR_MATCHER (semicolon, ';');
CHAR_MATCHER (colon, ':');
CHAR_MATCHER (period, '.');
CHAR_MATCHER (question, '?');
CHAR_MATCHER (exclamation, '!');
CHAR_MATCHER (tilde, '~');
CHAR_MATCHER (backquote, '`');
CHAR_MATCHER (quote, '\"');
CHAR_MATCHER (apostrophe, '\'');
CHAR_MATCHER (ampersand, '&');
CHAR_MATCHER (caret, '^');
CHAR_MATCHER (pipe, '|');
CHAR_MATCHER (slash, '/');
CHAR_MATCHER (backslash, '\\');
CHAR_MATCHER (asterisk, '*');
CHAR_MATCHER (pound, '#');
CHAR_MATCHER (hash, '#');
CHAR_MATCHER (plus, '+');
CHAR_MATCHER (minus, '-');
CHAR_MATCHER (times, '*');
CHAR_MATCHER (divide, '/');
CHAR_MATCHER (percent, '%');
CHAR_MATCHER (dollar, '$');
CHAR_MATCHER (gt, '>');
CHARS_MATCHER(gte, ">=");
CHAR_MATCHER (lt, '<');
CHARS_MATCHER(lte, "<=");
CHAR_MATCHER (eq, '=');
CHAR_MATCHER (assign, '=');
CHARS_MATCHER(equal, "==");
static ALTERNATIVES_MATCHER(identifier_initial, prefix_is_alphas, prefix_is_underscore);
static ALTERNATIVES_MATCHER(identifier_trailer, prefix_is_alnums, prefix_is_underscore);
FIRST_REST_MATCHER(identifier, prefix_is_identifier_initial, prefix_is_identifier_trailer);
static ALTERNATIVES_MATCHER(optional_sign, prefix_is_plus, prefix_is_minus, prefix_epsilon);
SEQUENCE_MATCHER(integer, prefix_is_optional_sign, prefix_is_digits);
\ No newline at end of file
typedef char *(*prefix_matcher)(char *);
#define DECLARE_MATCHER(name) \
#define DECLARE(name) \
char *prefix_is_ ## name(char *)
#define DEFINE_CHAR_MATCHER(name, prefix) \
#define CHAR_MATCHER(name, prefix) \
char *prefix_is_ ## name(char *src) { \
return prefix_is_char(src, prefix); \
}
#define DEFINE_CHARS_MATCHER(name, prefix) \
#define CHARS_MATCHER(name, prefix) \
char *prefix_is_ ## name(char *src) { \
return prefix_is_chars(src, prefix); \
}
#define DEFINE_SINGLE_CTYPE_MATCHER(type) \
#define CLASS_CHAR_MATCHER(name, class) \
char *prefix_is ## name(char *src) { \
return prefix_is_one_of(src, class); \
}
#define CLASS_CHARS_MATCHER(name, class) \
char *prefix)us ## name(char *src) { \
return prefix_is_some_of(src, class); \
}
#define SINGLE_CTYPE_MATCHER(type) \
char *prefix_is_ ## type(char *src) { \
return is ## type(*src) ? src+1 : NULL; \
}
#define DEFINE_CTYPE_SEQUENCE_MATCHER(type) \
#define CTYPE_SEQUENCE_MATCHER(type) \
char *prefix_is_ ## type ## s(char *src) { \
char *p = src; \
while (is ## type(*p)) p++; \
return p == src ? NULL : p; \
}
#define DEFINE_TO_EOL_MATCHER(name, prefix) \
#define TO_EOL_MATCHER(name, prefix) \
char *prefix_is_ ## name(char *src) { \
if (!(src = prefix_is_chars(src, prefix))) return NULL; \
while(*src && *src != '\n') src++; \
return src; \
}
#define DEFINE_DELIMITED_MATCHER(name, begin, end, escapable) \
#define DELIMITED_MATCHER(name, begin, end, escapable) \
char *prefix_is_ ## name(char *src) { \
return prefix_is_delimited_by(src, begin, end, escapable); \
}
#define DEFINE_ALTERNATIVES_MATCHER(name, ...) \
#define ALTERNATIVES_MATCHER(name, ...) \
char *prefix_is_ ## name(char *src) { \
return prefix_alternatives(src, __VA_ARGS__); \
}
#define DEFINE_SEQUENCE_MATCHER(name, ...) \
#define SEQUENCE_MATCHER(name, ...) \
char *prefix_is_ ## name(char *src) { \
return prefix_sequence(src, __VA_ARGS__); \
}
#define DEFINE_OPTIONAL_MATCHER(name, matcher) \
#define OPTIONAL_MATCHER(name, matcher) \
char *prefix_is_ ## name(char *src) { \
return prefix_optional(src, matcher); \
}
#define DEFINE_FIRST_REST_MATCHER(name, first_matcher, rest_matcher) \
#define FIRST_REST_MATCHER(name, first_matcher, rest_matcher) \
char *prefix_is_ ## name(char *src) { \
if (src = first_matcher(src)) src = prefix_zero_plus(src, rest_matcher); \
return src; \
......@@ -64,6 +74,7 @@ char *prefix_is_one_of(char *src, char *class);
char *prefix_is_some_of(char *src, char *class);
char *prefix_is_delimited_by(char *src, char *beg, char *end, int esc);
char *prefix_epsilon(char *src);
char *_prefix_alternatives(char *src, ...);
#define prefix_alternatives(src, ...) _prefix_alternatives(src, __VA_ARGS__, NULL)
char *_prefix_sequence(char *src, ...);
......@@ -72,68 +83,69 @@ char *prefix_optional(char *src, prefix_matcher m);
char *prefix_zero_plus(char *src, prefix_matcher m);
char *prefix_one_plus(char *src, prefix_matcher m);
DECLARE_MATCHER(space);
DECLARE_MATCHER(alpha);
DECLARE_MATCHER(digit);
DECLARE_MATCHER(xdigit);
DECLARE_MATCHER(alnum);
DECLARE_MATCHER(punct);
DECLARE_MATCHER(spaces);
DECLARE_MATCHER(alphas);
DECLARE_MATCHER(digits);
DECLARE_MATCHER(xdigits);
DECLARE_MATCHER(alnums);
DECLARE_MATCHER(puncts);
DECLARE_MATCHER(shell_comment);
DECLARE_MATCHER(c_line_comment);
DECLARE_MATCHER(c_block_comment);
DECLARE_MATCHER(double_quoted_string);
DECLARE_MATCHER(single_quoted_string);
DECLARE_MATCHER(interpolant);
DECLARE_MATCHER(lparen);
DECLARE_MATCHER(rparen);
DECLARE_MATCHER(lbrack);
DECLARE_MATCHER(rbrack);
DECLARE_MATCHER(lbrace);
DECLARE_MATCHER(rbrace);
DECLARE_MATCHER(underscore);
DECLARE_MATCHER(hyphen);
DECLARE_MATCHER(semicolon);
DECLARE_MATCHER(colon);
DECLARE_MATCHER(period);
DECLARE_MATCHER(question);
DECLARE_MATCHER(exclamation);
DECLARE_MATCHER(tilde);
DECLARE_MATCHER(backquote);
DECLARE_MATCHER(quote);
DECLARE_MATCHER(apostrophe);
DECLARE_MATCHER(ampersand);
DECLARE_MATCHER(caret);
DECLARE_MATCHER(pipe);
DECLARE_MATCHER(slash);
DECLARE_MATCHER(backslash);
DECLARE_MATCHER(asterisk);
DECLARE_MATCHER(pound);
DECLARE_MATCHER(hash);
DECLARE_MATCHER(plus);
DECLARE_MATCHER(minus);
DECLARE_MATCHER(times);
DECLARE_MATCHER(divide);
DECLARE_MATCHER(percent);
DECLARE_MATCHER(dollar);
DECLARE_MATCHER(gt);
DECLARE_MATCHER(gte);
DECLARE_MATCHER(lt);
DECLARE_MATCHER(lte);
DECLARE_MATCHER(eq);
DECLARE_MATCHER(assign);
DECLARE_MATCHER(equal);
DECLARE_MATCHER(identifier);
DECLARE(space);
DECLARE(alpha);
DECLARE(digit);
DECLARE(xdigit);
DECLARE(alnum);
DECLARE(punct);
DECLARE(spaces);
DECLARE(alphas);
DECLARE(digits);
DECLARE(xdigits);
DECLARE(alnums);
DECLARE(puncts);
DECLARE(shell_comment);
DECLARE(c_line_comment);
DECLARE(c_block_comment);
DECLARE(double_quoted_string);
DECLARE(single_quoted_string);
DECLARE(interpolant);
DECLARE(lparen);
DECLARE(rparen);
DECLARE(lbrack);
DECLARE(rbrack);
DECLARE(lbrace);
DECLARE(rbrace);
DECLARE(underscore);
DECLARE(hyphen);
DECLARE(semicolon);
DECLARE(colon);
DECLARE(period);
DECLARE(question);
DECLARE(exclamation);
DECLARE(tilde);
DECLARE(backquote);
DECLARE(quote);
DECLARE(apostrophe);
DECLARE(ampersand);
DECLARE(caret);
DECLARE(pipe);
DECLARE(slash);
DECLARE(backslash);
DECLARE(asterisk);
DECLARE(pound);
DECLARE(hash);
DECLARE(plus);
DECLARE(minus);
DECLARE(times);
DECLARE(divide);
DECLARE(percent);
DECLARE(dollar);
DECLARE(gt);
DECLARE(gte);
DECLARE(lt);
DECLARE(lte);
DECLARE(eq);
DECLARE(assign);
DECLARE(equal);
DECLARE(identifier);
DECLARE(integer);
\ No newline at end of file
......@@ -27,15 +27,19 @@ int main() {
char *sqstring = "'this \\'is\\' a \"string\" now' blah blah blah";
char *scomment = "# a shell-style comment";
char *bcomment = "/* this is a c comment \\*/ blah blah";
char *noncomment = "/* blah blah";
char *non_comment = "/* blah blah";
char *interpolant = "#{ this is an interpolant \\} blah blah";
char *words = "hello my name is aaron";
char *id1 = "_identifier123{blah bloo}";
char *non_id = "12non_ident_ifier_";
char *word2 = "-blah-blah_blah";
char *word2 = "-blah-blah_bl12-34:foo";
char *non_word = "-12blah-bloo";
char *selector = "#foo > :first-child { color: #abcdef; }";
char *lcomment = "// blah blah blah // end\n blah blah";
char *id2 = "badec4669264hello";
char *integer1 = "3837483+3";
char *integer2 = "+294739-4";
char *integer3 = "-294729+1";
test1(prefix_is_spaces, spaces);
test1(prefix_is_spaces, words);
......@@ -56,7 +60,7 @@ int main() {
test1(prefix_is_shell_comment, lcomment);
test1(prefix_is_c_block_comment, bcomment);
test1(prefix_is_c_block_comment, noncomment);
test1(prefix_is_c_block_comment, non_comment);
test1(prefix_is_double_quoted_string, dqstring);
test1(prefix_is_double_quoted_string, sqstring);
......@@ -68,7 +72,7 @@ int main() {
test1(prefix_is_interpolant, lcomment);
test1(prefix_is_c_line_comment, lcomment);
test1(prefix_is_c_line_comment, noncomment);
test1(prefix_is_c_line_comment, non_comment);
testn(prefix_sequence, id2, prefix_is_alphas, prefix_is_digits);
testn(prefix_sequence, id2, prefix_is_alphas, prefix_is_puncts);
......@@ -85,5 +89,10 @@ int main() {
test1(prefix_is_identifier, id1);
test1(prefix_is_identifier, non_id);
test1(prefix_is_integer, integer1);
test1(prefix_is_integer, integer2);
test1(prefix_is_integer, integer3);
test1(prefix_is_integer, word2);
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment