From 44576dd285ae7486f5b92a99130c1728fdc64c92 Mon Sep 17 00:00:00 2001 From: Scott Kyle Date: Tue, 24 May 2016 10:18:09 -0700 Subject: [PATCH 1/2] Improve parsing of string tokens All of the parsing behavior was confirmed to be compatible with the NSPredicate parser. --- src/object-store/src/parser/parser.cpp | 37 ++++++++++++-------------- src/object-store/tests/parser.cpp | 12 ++++++++- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/object-store/src/parser/parser.cpp b/src/object-store/src/parser/parser.cpp index 07b97bb4..299457eb 100644 --- a/src/object-store/src/parser/parser.cpp +++ b/src/object-store/src/parser/parser.cpp @@ -24,8 +24,8 @@ #include #include -// String operators (e.g. AND, OR, NOT) can't be followed by [A-z0-9_]. -#define string_operator_t(s) seq< pegtl_istring_t(s), not_at< identifier_other > > +// String tokens can't be followed by [A-z0-9_]. +#define string_token_t(s) seq< pegtl_istring_t(s), not_at< identifier_other > > using namespace pegtl; @@ -57,9 +57,9 @@ struct int_num : plus< digit > {}; struct number : seq< minus, sor< float_num, hex_num, int_num > > {}; -struct true_value : pegtl_istring_t("true") {}; -struct false_value : pegtl_istring_t("false") {}; -struct null_value : pegtl_istring_t("null") {}; +struct true_value : string_token_t("true") {}; +struct false_value : string_token_t("false") {}; +struct null_value : string_token_t("null") {}; // key paths struct key_path : list< seq< sor< alpha, one< '_' > >, star< sor< alnum, one< '_', '-' > > > >, one< '.' > > {}; @@ -70,37 +70,34 @@ struct argument : seq< one< '$' >, must< argument_index > > {}; // expressions and operators struct expr : sor< dq_string, sq_string, number, argument, true_value, false_value, null_value, key_path > {}; -struct case_insensitive : pegtl_istring_t("[c]"){}; +struct case_insensitive : pegtl_istring_t("[c]") {}; -struct eq : seq< sor< two< '=' >, one< '=' > >, opt< case_insensitive > >{}; +struct eq : seq< sor< two< '=' >, one< '=' > >, star< blank >, opt< case_insensitive > >{}; struct noteq : pegtl::string< '!', '=' > {}; struct lteq : pegtl::string< '<', '=' > {}; struct lt : one< '<' > {}; struct gteq : pegtl::string< '>', '=' > {}; struct gt : one< '>' > {}; -struct contains : pegtl_istring_t("contains") {}; -struct begins : pegtl_istring_t("beginswith") {}; -struct ends : pegtl_istring_t("endswith") {}; +struct contains : string_token_t("contains") {}; +struct begins : string_token_t("beginswith") {}; +struct ends : string_token_t("endswith") {}; -template -struct pad_plus : seq< plus< B >, A, plus< B > > {}; - -struct padded_oper : pad_plus< seq< sor< contains, begins, ends>, opt< case_insensitive > >, blank > {}; +struct string_oper : pad< seq< sor< contains, begins, ends>, star< blank >, opt< case_insensitive > >, blank > {}; struct symbolic_oper : pad< sor< eq, noteq, lteq, lt, gteq, gt >, blank > {}; // predicates -struct comparison_pred : seq< expr, sor< padded_oper, symbolic_oper >, expr > {}; +struct comparison_pred : seq< expr, sor< string_oper, symbolic_oper >, expr > {}; struct pred; struct group_pred : if_must< one< '(' >, pad< pred, blank >, one< ')' > > {}; -struct true_pred : pegtl_istring_t("truepredicate") {}; -struct false_pred : pegtl_istring_t("falsepredicate") {}; +struct true_pred : string_token_t("truepredicate") {}; +struct false_pred : string_token_t("falsepredicate") {}; -struct not_pre : seq< sor< one< '!' >, string_operator_t("not") > > {}; +struct not_pre : seq< sor< one< '!' >, string_token_t("not") > > {}; struct atom_pred : seq< opt< not_pre >, pad< sor< group_pred, true_pred, false_pred, comparison_pred >, blank > > {}; -struct and_op : pad< sor< two< '&' >, string_operator_t("and") >, blank > {}; -struct or_op : pad< sor< two< '|' >, string_operator_t("or") >, blank > {}; +struct and_op : pad< sor< two< '&' >, string_token_t("and") >, blank > {}; +struct or_op : pad< sor< two< '|' >, string_token_t("or") >, blank > {}; struct or_ext : if_must< or_op, pred > {}; struct and_ext : if_must< and_op, pred > {}; diff --git a/src/object-store/tests/parser.cpp b/src/object-store/tests/parser.cpp index 15c5cf2c..d7058d32 100644 --- a/src/object-store/tests/parser.cpp +++ b/src/object-store/tests/parser.cpp @@ -10,6 +10,7 @@ static std::vector valid_queries = { "falsepredicate", " TRUEPREDICATE ", " FALSEPREDICATE ", + "truepredicates = falsepredicates", // keypaths // characters/strings "\"\" = ''", @@ -25,6 +26,9 @@ static std::vector valid_queries = { "10. = -.034", "10.0 = 5.034", "true = false", + "truelove = false", + "true = falsey", + "nullified = null", "_ = a", "_a = _.aZ", "a09._br.z = __-__.Z-9", @@ -34,10 +38,13 @@ static std::vector valid_queries = { // operators "0=0", "0 = 0", + "0 =[c] 0", "0!=0", "0 != 0", "0==0", "0 == 0", + "0==[c]0", + "0 == [c] 0", "0>0", "0 > 0", "0>=0", @@ -47,6 +54,9 @@ static std::vector valid_queries = { "0<=0", "0 <= 0", "0 contains 0", + "a CONTAINS[c] b", + "a contains [c] b", + "'a'CONTAINS[c]b", "0 BeGiNsWiTh 0", "0 ENDSWITH 0", "contains contains 'contains'", @@ -103,7 +113,6 @@ static std::vector invalid_queries = { "1.0. = 1", "1-0 = 1", "0x = 1", - "truey = false", "- = a", "a..b = a", "a$a = a", @@ -116,6 +125,7 @@ static std::vector invalid_queries = { "0===>0", "0 <> 0", "0 contains1", + "a contains_something", "endswith 0", // atoms/groups From 1a192240df8a1f80afc514704899631e3482c514 Mon Sep 17 00:00:00 2001 From: Scott Kyle Date: Thu, 26 May 2016 11:57:43 -0700 Subject: [PATCH 2/2] Factor out padding rule for operators --- src/object-store/src/parser/parser.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/object-store/src/parser/parser.cpp b/src/object-store/src/parser/parser.cpp index 299457eb..dab01c63 100644 --- a/src/object-store/src/parser/parser.cpp +++ b/src/object-store/src/parser/parser.cpp @@ -82,11 +82,11 @@ struct contains : string_token_t("contains") {}; struct begins : string_token_t("beginswith") {}; struct ends : string_token_t("endswith") {}; -struct string_oper : pad< seq< sor< contains, begins, ends>, star< blank >, opt< case_insensitive > >, blank > {}; -struct symbolic_oper : pad< sor< eq, noteq, lteq, lt, gteq, gt >, blank > {}; +struct string_oper : seq< sor< contains, begins, ends>, star< blank >, opt< case_insensitive > > {}; +struct symbolic_oper : sor< eq, noteq, lteq, lt, gteq, gt > {}; // predicates -struct comparison_pred : seq< expr, sor< string_oper, symbolic_oper >, expr > {}; +struct comparison_pred : seq< expr, pad< sor< string_oper, symbolic_oper >, blank >, expr > {}; struct pred; struct group_pred : if_must< one< '(' >, pad< pred, blank >, one< ')' > > {};