Commit 03ccb459 by Aaron Leung

More tokenizing stuff. May have to re-invent closures to make this cleaner.

parent 2e51303a
#include "prefix_primitives.h"
int prefix_starts_with_identifier(char *src) { int prefix_starts_with_identifier(char *src) {
int p = prefix_is_alphas(src) || prefix_is_exactly(src, "_"); int p = prefix_is_alphas(src) || prefix_is_exactly(src, "_");
if (!p++) return 0; if (!p++) return 0;
...@@ -18,8 +20,7 @@ DEFINE_EXACT_MATCHER(hash, "#"); ...@@ -18,8 +20,7 @@ DEFINE_EXACT_MATCHER(hash, "#");
DEFINE_EXACT_MATCHER(adjacent_to, "+"); DEFINE_EXACT_MATCHER(adjacent_to, "+");
DEFINE_EXACT_MATCHER(precedes, "~"); DEFINE_EXACT_MATCHER(precedes, "~");
DEFINE_EXACT_MATCHER(parent_of, ">"); DEFINE_EXACT_MATCHER(parent_of, ">");
int (*prefix_starts_with_ancestor_of)(char *src) = prefix_starts_with_spaces; prefix_matcher prefix_starts_with_ancestor_of = prefix_starts_with_spaces;
DEFINE_EXACT_MATCHER(exclamation, "!"); DEFINE_EXACT_MATCHER(exclamation, "!");
DEFINE_EXACT_MATCHER(dollar, "$"); DEFINE_EXACT_MATCHER(dollar, "$");
DEFINE_EXACT_MATCHER(percent, "%"); DEFINE_EXACT_MATCHER(percent, "%");
......
...@@ -34,15 +34,31 @@ int prefix_is_delimited_by(char *src, char *beg, char *end, int esc) { ...@@ -34,15 +34,31 @@ int prefix_is_delimited_by(char *src, char *beg, char *end, int esc) {
} }
} }
int prefix_try_alternatives(char *src, ...) { int prefix_alternatives(char *src, ...) {
int p = 0; int p = 0;
va_list ap; va_list ap;
va_start(ap, src); va_start(ap, src);
prefix_matcher m = va_arg(ap, prefix_matcher); prefix_matcher m = va_arg(ap, prefix_matcher);
while (m && !(p = (*m)(src))) m = va_arg(ap, prefix_matcher); while (m && !(p = (*m)(src))) m = va_arg(ap, prefix_matcher);
va_end(ap);
return p; return p;
} }
int prefix_sequence(char *src, ...) {
int p = 0, p_sum = 0;
va_list ap;
va_start(ap, src);
prefix_matcher m = va_arg(ap, prefix_matcher);
while (m && (p = (*m)(src))) p_sum += p, m = va_arg(ap, prefix_matcher);
va_end(ap);
return p ? p_sum : 0;
}
int prefix_optional(char *src, prefix_matcher m) {
int p = m(src);
return p ? p : -1;
}
DEFINE_SINGLE_CTYPE_MATCHER(space); DEFINE_SINGLE_CTYPE_MATCHER(space);
DEFINE_SINGLE_CTYPE_MATCHER(alpha); DEFINE_SINGLE_CTYPE_MATCHER(alpha);
DEFINE_SINGLE_CTYPE_MATCHER(digit); DEFINE_SINGLE_CTYPE_MATCHER(digit);
...@@ -62,8 +78,7 @@ DEFINE_DELIMITED_MATCHER(single_quoted_string, "'", "'", 1); ...@@ -62,8 +78,7 @@ DEFINE_DELIMITED_MATCHER(single_quoted_string, "'", "'", 1);
DEFINE_DELIMITED_MATCHER(interpolant, "#{", "}", 0); DEFINE_DELIMITED_MATCHER(interpolant, "#{", "}", 0);
int prefix_is_string(char *src) { int prefix_is_string(char *src) {
return prefix_try_alternatives(src, return prefix_alternatives(src, prefix_is_double_quoted_string,
prefix_is_double_quoted_string,
prefix_is_single_quoted_string); prefix_is_single_quoted_string);
} }
...@@ -73,3 +88,48 @@ DEFINE_EXACT_MATCHER(lbrack, "["); ...@@ -73,3 +88,48 @@ DEFINE_EXACT_MATCHER(lbrack, "[");
DEFINE_EXACT_MATCHER(rbrack, "]"); DEFINE_EXACT_MATCHER(rbrack, "]");
DEFINE_EXACT_MATCHER(lbrace, "{"); DEFINE_EXACT_MATCHER(lbrace, "{");
DEFINE_EXACT_MATCHER(rbrace, "}"); DEFINE_EXACT_MATCHER(rbrace, "}");
DEFINE_EXACT_MATCHER(underscore, "_");
DEFINE_EXACT_MATCHER(hyphen, "-");
DEFINE_EXACT_MATCHER(semicolon, ";");
DEFINE_EXACT_MATCHER(colon, ":");
DEFINE_EXACT_MATCHER(period, ".");
DEFINE_EXACT_MATCHER(question, "?");
DEFINE_EXACT_MATCHER(exclamation, "!");
DEFINE_EXACT_MATCHER(tilde, "~");
DEFINE_EXACT_MATCHER(backquote, "`");
DEFINE_EXACT_MATCHER(quote, "\"");
DEFINE_EXACT_MATCHER(apostrophe, "'");
DEFINE_EXACT_MATCHER(ampersand, "&");
DEFINE_EXACT_MATCHER(caret, "^");
DEFINE_EXACT_MATCHER(pipe, "|");
DEFINE_EXACT_MATCHER(slash, "/");
DEFINE_EXACT_MATCHER(backslash, "\\");
DEFINE_EXACT_MATCHER(asterisk, "*");
DEFINE_EXACT_MATCHER(pound, "#");
DEFINE_EXACT_MATCHER(hash, "#");
DEFINE_EXACT_MATCHER(plus, "+");
DEFINE_EXACT_MATCHER(minus, "-");
DEFINE_EXACT_MATCHER(times, "*");
DEFINE_EXACT_MATCHER(divide, "/");
DEFINE_EXACT_MATCHER(percent, "%");
DEFINE_EXACT_MATCHER(dollar, "$");
DEFINE_EXACT_MATCHER(gt, ">");
DEFINE_EXACT_MATCHER(gte, ">=");
DEFINE_EXACT_MATCHER(lt, "<");
DEFINE_EXACT_MATCHER(lte, "<=");
DEFINE_EXACT_MATCHER(eq, "=");
DEFINE_EXACT_MATCHER(assign, "=");
DEFINE_EXACT_MATCHER(equal, "==");
DEFINE_ALTERNATIVES_MATCHER(identifier_initial, prefix_is_alphas, prefix_is_underscore);
DEFINE_ALTERNATIVES_MATCHER(identifier_trailing, prefix_is_alnums, prefix_is_underscore);
DEFINE_FIRST_REST_MATCHER(identifier, prefix_is_identifier_initial, prefix_is_identifier_trailing);
// DEFINE_ALTERNATIVES_MATCHER(
// DEFINE_ALTERNATIVES_MATCHER(word_initial, prefix_is_identifier, prefix_is_hyphen_);
DEFINE_ALTERNATIVES_MATCHER(word_trailing, prefix_is_alnums, prefix_is_underscore, prefix_is_hyphen);
DEFINE_FIRST_REST_MATCHER(word, prefix_is_word_initial, prefix_is_word_trailing);
...@@ -4,6 +4,9 @@ int prefix_is_exactly(char *, char*); ...@@ -4,6 +4,9 @@ int prefix_is_exactly(char *, char*);
int prefix_is_one_of(char *, char *); int prefix_is_one_of(char *, char *);
int prefix_is_some_of(char *, char *); int prefix_is_some_of(char *, char *);
int prefix_is_delimited_by(char *, char *, char *, int); int prefix_is_delimited_by(char *, char *, char *, int);
int prefix_alternatives(char *, ...);
int prefix_sequence(char *, ...);
int prefix_optional(char *, prefix_matcher);
#define DECLARE_MATCHER(name) \ #define DECLARE_MATCHER(name) \
int prefix_is_ ## name(char *) int prefix_is_ ## name(char *)
...@@ -14,7 +17,7 @@ int prefix_is_ ## name(char *src) { \ ...@@ -14,7 +17,7 @@ int prefix_is_ ## name(char *src) { \
} }
#define DEFINE_SINGLE_CTYPE_MATCHER(type) \ #define DEFINE_SINGLE_CTYPE_MATCHER(type) \
int prefix_is_one_ ## type(char *src) { \ int prefix_is_ ## type(char *src) { \
return is ## type(src[0]) ? 1 : 0; \ return is ## type(src[0]) ? 1 : 0; \
} }
...@@ -37,15 +40,38 @@ int prefix_is_ ## name(char *src) { \ ...@@ -37,15 +40,38 @@ int prefix_is_ ## name(char *src) { \
return p; \ return p; \
} }
#define prefix_list(...) { __VA_ARGS__, NULL } #define DEFINE_ALTERNATIVES_MATCHER(name, ...) \
int prefix_is_ ## name(char *src) { \
return prefix_alternatives(src, __VA_ARGS__); \
}
#define DEFINE_SEQUENCE_MATCHER(name, ...) \
int prefix_is_ ## name(char *src) { \
return prefix_sequence(src, __VA_ARGS__); \
}
#define DEFINE_OPTIONAL_MATCHER(name, matcher) \
int prefix_is_ ## name(char *src) { \
return prefix_optional(src, matcher); \
}
#define DEFINE_FIRST_REST_MATCHER(name, first_matcher, rest_matcher) \
int prefix_is_ ## name(char *src) { \
int p = first_matcher(src); \
int p_sum = p; \
while (p) { \
p = rest_matcher(src+p); \
p_sum += p; \
} \
return p_sum; \
}
DECLARE_MATCHER(one_space); DECLARE_MATCHER(space);
DECLARE_MATCHER(one_alpha); DECLARE_MATCHER(alpha);
DECLARE_MATCHER(one_digit); DECLARE_MATCHER(digit);
DECLARE_MATCHER(one_xdigit); DECLARE_MATCHER(xdigit);
DECLARE_MATCHER(one_alnum); DECLARE_MATCHER(alnum);
DECLARE_MATCHER(one_punct); DECLARE_MATCHER(punct);
DECLARE_MATCHER(spaces); DECLARE_MATCHER(spaces);
DECLARE_MATCHER(alphas); DECLARE_MATCHER(alphas);
DECLARE_MATCHER(digits); DECLARE_MATCHER(digits);
...@@ -64,3 +90,39 @@ DECLARE_MATCHER(lbrack); ...@@ -64,3 +90,39 @@ DECLARE_MATCHER(lbrack);
DECLARE_MATCHER(rbrack); DECLARE_MATCHER(rbrack);
DECLARE_MATCHER(lbrace); DECLARE_MATCHER(lbrace);
DECLARE_MATCHER(rbrace); DECLARE_MATCHER(rbrace);
DECLARE_MATCHER(underscore);
DECLARE_MATCHER(hyphen);
DECLARE_MATCHER(semicolon);
DECLARE_MATCHER(colon);
DECLARE_MATCHER(period);
DECLARE_MATCHER(question);
DECLARE_MATCHER(exclamation);
DECLARE_MATCHER(tilde);
DECLARE_MATCHER(backquote);
DECLARE_MATCHER(quote);
DECLARE_MATCHER(apostrophe);
DECLARE_MATCHER(ampersand);
DECLARE_MATCHER(caret);
DECLARE_MATCHER(pipe);
DECLARE_MATCHER(slash);
DECLARE_MATCHER(backslash);
DECLARE_MATCHER(asterisk);
DECLARE_MATCHER(pound);
DECLARE_MATCHER(hash);
DECLARE_MATCHER(plus);
DECLARE_MATCHER(minus);
DECLARE_MATCHER(times);
DECLARE_MATCHER(divide);
DECLARE_MATCHER(percent);
DECLARE_MATCHER(dollar);
DECLARE_MATCHER(gt);
DECLARE_MATCHER(gte);
DECLARE_MATCHER(lt);
DECLARE_MATCHER(lte);
DECLARE_MATCHER(eq);
DECLARE_MATCHER(assign);
DECLARE_MATCHER(equal);
...@@ -19,6 +19,9 @@ int main() { ...@@ -19,6 +19,9 @@ int main() {
char *t = "/* this is a c comment \\*/ blah blah"; char *t = "/* this is a c comment \\*/ blah blah";
char *u = "#{ this is an interpolant \\} blah blah"; char *u = "#{ this is an interpolant \\} blah blah";
char *v = "hello my name is aaron"; char *v = "hello my name is aaron";
char *w = "_identifier123";
char *x = "12nonidentifier_";
char *y = "-blah-blah_blah";
int (*funcs[])(char *) = { prefix_is_string, prefix_is_block_comment, NULL }; int (*funcs[])(char *) = { prefix_is_string, prefix_is_block_comment, NULL };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment