Update new conditional parser to create data structures
[freeradius.git] / src / main / parser.c
1 /*
2  * parser.c     Parse various things
3  *
4  * Version:     $Id$
5  *
6  *   This program is free software; you can redistribute it and/or modify
7  *   it under the terms of the GNU General Public License as published by
8  *   the Free Software Foundation; either version 2 of the License, or
9  *   (at your option) any later version.
10  *
11  *   This program is distributed in the hope that it will be useful,
12  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *   GNU General Public License for more details.
15  *
16  *   You should have received a copy of the GNU General Public License
17  *   along with this program; if not, write to the Free Software
18  *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  *
20  * Copyright 2013  Alan DeKok <aland@freeradius.org>
21  */
22
23 RCSID("$Id$")
24
25 #include <freeradius-devel/radiusd.h>
26 #include <freeradius-devel/parser.h>
27 #include <freeradius-devel/rad_assert.h>
28
29 #include <ctype.h>
30
31 #if 0
32 #define COND_DEBUG(fmt, ...) printf(fmt, ## __VA_ARGS__);printf("\n")
33 #endif
34
35 /*
36  *      This file shouldn't use any functions from the server core.
37  */
38 #ifndef COND_DEBUG
39 #if 0
40 #define COND_DEBUG DEBUG
41 #else
42 #define COND_DEBUG(...)
43 #endif
44 #endif
45
46 typedef enum cond_op_t {
47         COND_NONE = 0,
48         COND_TRUE,
49         COND_NOT = '!',
50         COND_AND = '&',
51         COND_OR = '|'
52 } cond_op_t;
53
54
55 typedef struct cond_t cond_t;
56
57 /*
58  *      Allow for the following structures:
59  *
60  *      FOO                     no OP, RHS is NULL
61  *      FOO OP BAR
62  *      (COND)                  no LHS/RHS, child is COND, child OP is TRUE
63  *      (!(COND))               no LHS/RHS, child is COND, child OP is NOT
64  *      (COND1 OP COND2)        no LHS/RHS, next is COND2, next OP is OP
65  */
66 struct cond_t {
67         char            *lhs;
68         char            *rhs;
69         FR_TOKEN        op;
70         int             regex_i;
71
72         cond_op_t       next_op;
73         cond_t          *next;
74         cond_op_t       child_op;
75         cond_t          *child;
76 };
77
78 static void cond_debug(const cond_t *c)
79 {
80
81 next:
82         if (c->child_op == COND_NOT) {
83                 printf("!");
84         }
85
86         if (c->op != T_OP_INVALID) {
87                 rad_assert(c->lhs != NULL);
88                 printf("%s", c->lhs);
89
90                 if (c->op != T_OP_CMP_TRUE) {
91                         printf(" %s ", fr_token_name(c->op));
92
93                         rad_assert(c->rhs != NULL);
94                         printf("%s", c->rhs);
95                 }
96
97         } else {
98                 rad_assert(c->child != NULL);
99
100                 rad_assert(c->child_op != COND_AND);
101                 rad_assert(c->child_op != COND_OR);
102                 rad_assert(c->child != NULL);
103
104                 printf("(");
105                 cond_debug(c->child);
106                 printf(")");
107         }
108
109         if (c->next_op == COND_NONE) {
110                 rad_assert(c->next == NULL);
111                 return;
112         }
113
114         rad_assert(c->next_op != COND_TRUE);
115         rad_assert(c->next_op != COND_NOT);
116
117         if (c->next_op == COND_AND) {
118                 printf(" && ");
119
120         } else if (c->next_op == COND_OR) {
121                 printf(" || ");
122
123         } else {
124                 rad_assert(0 == 1);
125         }
126
127         c = c->next;
128         goto next;
129 }
130
131
132 static ssize_t condition_tokenize_string(TALLOC_CTX *ctx, const char *start, char **out, const char **error)
133 {
134         const char *p = start;
135
136         p++;
137
138         COND_DEBUG("STRING %s", start);
139         while (*p) {
140                 if (*p == *start) {
141                         size_t len = (p + 1) - start;
142
143                         COND_DEBUG("end of string %s", p);
144                         *out = talloc_array(ctx, char, len + 1);
145
146                         memcpy(*out, start, len);
147                         (*out)[len] = '\0';
148                         return len;
149                 }
150
151                 if (*p == '\\') {
152                         p++;
153                         if (!*p) {
154                                 *error = "End of string after escape";
155                                 COND_DEBUG("RETURN %d", __LINE__);
156                                 return -(p - start);
157                         }
158                 }
159         
160                 p++;            /* allow anything else */
161         }
162
163         *error = "Unterminated string";
164         return -1;
165 }
166
167 static ssize_t condition_tokenize_word(TALLOC_CTX *ctx, const char *start, char **out, const char **error)
168 {
169         size_t len;
170         const char *p = start;
171
172         if ((*p == '"') || (*p == '\'') || (*p == '`') || (*p == '/')) {
173                 return condition_tokenize_string(ctx, start, out, error);
174         }
175
176         while (*p) {
177                 /*
178                  *      The LHS should really be limited to only a few
179                  *      things.  For now, we allow pretty much anything.
180                  */
181                 if (*p == '\\') {
182                         *error = "Unexpected escape";
183                         COND_DEBUG("RETURN %d", __LINE__);
184                         return -(p - start);
185                 }
186
187                 /*
188                  *      ("foo") is valid.
189                  */
190                 if (*p == ')') {
191                         break;
192                 }
193
194                 /*
195                  *      Spaces or special characters delineate the word
196                  */
197                 if (isspace((int) *p) || (*p == '&') || (*p == '|') ||
198                     (*p == '!') || (*p == '=') || (*p == '<') || (*p == '>')) {
199                         break;
200                 }
201
202                 if ((*p == '"') || (*p == '\'') || (*p == '`')) {
203                         COND_DEBUG("RETURN %d", __LINE__);
204                         *error = "Unexpected start of string";
205                         return -(p - start);
206                 }
207
208                 p++;
209         }
210
211         len = p - start;
212         if (!len) {
213                 *error = "Empty string is invalid";
214                 return 0;
215         }
216
217         *out = talloc_array(ctx, char, len + 1);
218         memcpy(*out, start, len);
219         (*out)[len] = '\0';
220         COND_DEBUG("PARSED WORD %s", *out);
221         return len;
222 }
223
224 /** Tokenize a conditional check
225  *
226  *  @param[in] start the start of the string to process.  Should be "(..."
227  *  @param[in] brace look for a closing brace
228  *  @param[out] child whether or not a child expression was parsed
229  *  @param[out] error the parse error (if any)
230  *  @return length of the string skipped, or when negative, the offset to the offending error
231  */
232 static ssize_t condition_tokenize(TALLOC_CTX *ctx, const char *start, int brace, cond_t **pcond, const char **error)
233 {
234         int sub;
235         ssize_t slen;
236         const char *p = start;
237         cond_t *c;
238
239         sub = FALSE;
240
241         COND_DEBUG("START %s", p);
242
243         c = talloc_zero(ctx, cond_t);
244
245         rad_assert(c != NULL);
246
247         while (isspace((int) *p)) p++; /* skip spaces before condition */
248
249         if (!*p) {
250                 talloc_free(c);
251                 COND_DEBUG("RETURN %d", __LINE__);
252                 *error = "Empty condition is invalid";
253                 return -(p - start);
254         }
255
256         /*
257          *      !COND
258          */
259         if (*p == '!') {
260                  p++;
261                  c->child_op = COND_NOT;
262                  while (isspace((int) *p)) p++; /* skip spaces after negation */
263         }
264
265         /*
266          *      (COND)
267          */
268         if (*p == '(') {
269                 p++;
270
271                 if (c->child_op == COND_NONE) c->child_op = COND_TRUE;
272
273                 /*
274                  *      We've already eaten one layer of
275                  *      brackets.  Go recurse to get more.
276                  */
277                 slen = condition_tokenize(c, p, TRUE, &c->child, error);
278                 if (slen <= 0) {
279                         talloc_free(c);
280                         COND_DEBUG("RETURN %d", __LINE__);
281                         return slen - (p - start);
282                 }
283
284                 if (!c->child) {
285                         talloc_free(c);
286                         *error = "Empty condition is invalid";
287                         COND_DEBUG("RETURN %d", __LINE__);
288                         return -(p - start);
289                 }
290
291                 p += slen;
292                 while (isspace((int) *p)) p++; /* skip spaces after (COND)*/
293
294         } else { /* it's a bare FOO==BAR */
295                 /*
296                  *      We didn't see anything special.  The condition must be one of
297                  *
298                  *      FOO
299                  *      FOO OP BAR
300                  */
301
302                 /*
303                  *      Grab the LHS
304                  */
305                 COND_DEBUG("LHS %s", p);
306                 slen = condition_tokenize_word(c, p, &c->lhs, error);
307                 if (slen <= 0) {
308                         talloc_free(c);
309                         COND_DEBUG("RETURN %d", __LINE__);
310                         return slen - (p - start);
311                 }
312                 p += slen;
313
314                 while (isspace((int)*p)) p++; /* skip spaces after LHS */
315
316                 /*
317                  *      We may (or not) have an operator
318                  */
319
320
321                 /*
322                  *      (FOO)
323                  */
324                 if (*p == ')') {
325                         /*
326                          *      don't skip the brace.  We'll look for it later.
327                          */
328                         c->op = T_OP_CMP_TRUE;
329
330                         /*
331                          *      FOO
332                          */
333                 } else if (!*p) {
334                         if (brace) {
335                                 talloc_free(c);
336                                 *error = "No closing brace at end of string";
337                                 COND_DEBUG("RETURN %d", __LINE__);
338                                 return -(p - start);
339                         }
340
341                         c->op = T_OP_CMP_TRUE;
342
343                         /*
344                          *      FOO && ...
345                          */
346                 } else if (((p[0] == '&') && (p[1] == '&')) ||
347                            ((p[0] == '|') && (p[1] == '|'))) {
348
349                         c->op = T_OP_CMP_TRUE;
350
351                 } else { /* it's an operator */
352                         int regex;
353
354                         COND_DEBUG("OPERATOR %s", p);
355
356                         /*
357                          *      The next thing should now be a comparison operator.
358                          */
359                         regex = FALSE;
360                         switch (*p) {
361                         default:
362                                 talloc_free(c);
363                                 *error = "Invalid text. Expected comparison operator";
364                                 COND_DEBUG("RETURN %d", __LINE__);
365                                 return -(p - start);
366
367                         case '!':
368                                 if (p[1] == '=') {
369                                         c->op = T_OP_NE;
370                                         p += 2;
371
372                                 } else if (p[1] == '~') {
373                                 regex = TRUE;
374
375                                 c->op = T_OP_REG_NE;
376                                 p += 2;
377
378                                 } else if (p[1] == '*') {
379                                         c->op = T_OP_CMP_FALSE;
380                                         p += 2;
381
382                                 } else {
383                                 invalid_operator:
384                                         talloc_free(c);
385                                         *error = "Invalid operator";
386                                         COND_DEBUG("RETURN %d", __LINE__);
387                                         return -(p - start);
388                                 }
389                                 break;
390
391                         case '=':
392                                 if (p[1] == '=') {
393                                         c->op = T_OP_CMP_EQ;
394                                         p += 2;
395
396                                 } else if (p[1] == '~') {
397                                         regex = TRUE;
398
399                                         c->op = T_OP_REG_EQ;
400                                         p += 2;
401
402                                 } else if (p[1] == '*') {
403                                         c->op = T_OP_CMP_TRUE;
404                                         p += 2;
405
406                                 } else {
407                                         goto invalid_operator;
408                                 }
409
410                                 break;
411
412                         case '<':
413                                 if (p[1] == '=') {
414                                         c->op = T_OP_LE;
415                                         p += 2;
416
417                                 } else {
418                                         c->op = T_OP_LT;
419                                         p++;
420                                 }
421                                 break;
422
423                         case '>':
424                                 if (p[1] == '=') {
425                                         c->op = T_OP_GE;
426                                         p += 2;
427
428                                 } else {
429                                         c->op = T_OP_GT;
430                                         p++;
431                                 }
432                                 break;
433                         }
434
435                         while (isspace((int) *p)) p++; /* skip spaces after operator */
436
437                         if (!*p) {
438                                 talloc_free(c);
439                                 *error = "Expected text after operator";
440                                 COND_DEBUG("RETURN %d", __LINE__);
441                                 return -(p - start);
442                         }
443
444                         COND_DEBUG("RHS %s", p);
445
446                         /*
447                          *      Grab the RHS
448                          */
449                         slen = condition_tokenize_word(c, p, &c->rhs, error);
450                         if (slen <= 0) {
451                                 talloc_free(c);
452                                 COND_DEBUG("RETURN %d", __LINE__);
453                                 return slen - (p - start);
454                         }
455
456                         /*
457                          *      Sanity checks for regexes.
458                          */
459                         if (regex) {
460                                 if (*p != '/') {
461                                         talloc_free(c);
462                                         *error = "Expected regular expression";
463                                         COND_DEBUG("RETURN %d", __LINE__);
464                                         return -(p - start);
465                                 }
466
467                                 /*
468                                  *      Allow /foo/i
469                                  */
470                                 if (p[slen] == 'i') {
471                                         c->regex_i = TRUE;
472                                         slen++;
473                                 }
474
475                                 COND_DEBUG("DONE REGEX %s", p + slen);
476
477                         } else if (!regex && (*p == '/')) {
478                                 talloc_free(c);
479                                 *error = "Unexpected regular expression";
480                                 COND_DEBUG("RETURN %d", __LINE__);
481                                 return -(p - start);
482                         }
483
484                         p += slen;
485
486                         while (isspace((int) *p)) p++; /* skip spaces after RHS */
487                 } /* parse OP RHS */
488         } /* parse a condition (COND) or FOO OP BAR*/
489
490         /*
491          *      ...COND)
492          */
493         if (*p == ')') {
494                 if (!brace) {
495                         talloc_free(c);
496                         *error = "Unexpected closing brace";
497                         COND_DEBUG("RETURN %d", __LINE__);
498                         return -(p - start);
499                 }
500
501                 p++;
502                 while (isspace((int) *p)) p++; /* skip spaces after closing brace */
503                 brace = FALSE;
504                 goto done;
505         }
506
507         /*
508          *      End of string is now allowed.
509          */
510         if (!*p) {
511                 if (brace) {
512                         talloc_free(c);
513                         *error = "No closing brace at end of string";
514                         COND_DEBUG("RETURN %d", __LINE__);
515                         return -(p - start);
516                 }
517
518                 goto done;
519         }
520
521         if (!(((p[0] == '&') && (p[1] == '&')) ||
522               ((p[0] == '|') && (p[1] == '|')))) {
523                 talloc_free(c);
524                 *error = "Unexpected text after condition";
525                 return -(p - start);
526         }
527
528         /*
529          *      Recurse to parse the next condition.
530          */
531         COND_DEBUG("GOT %c%c", p[0], p[1]);
532         c->next_op = p[0];
533         p += 2;
534
535         /*
536          *      May still be looking for a closing brace.
537          */
538         COND_DEBUG("RECURSE AND/OR");
539         slen = condition_tokenize(c, p, brace, &c->next, error);
540         if (slen <= 0) {
541                 talloc_free(c);
542                 COND_DEBUG("RETURN %d", __LINE__);
543                 return slen - (p - start);
544         }
545         p += slen;
546
547 done:
548         *pcond = c;
549         COND_DEBUG("RETURN %d", __LINE__);
550         return p - start;
551 }
552
553 /** Tokenize a conditional check
554  *
555  *  @param[in] start the start of the string to process.  Should be "(..."
556  *  @param[out] error the parse error (if any)
557  *  @return length of the string skipped, or when negative, the offset to the offending error
558  */
559 ssize_t fr_condition_tokenize(const char *start, const char **error)
560 {
561         ssize_t slen;
562         cond_t *c = NULL;
563
564         slen = condition_tokenize(NULL, start, FALSE, &c, error);
565         if (slen <= 0) return slen;
566
567         if (!c) {
568                 COND_DEBUG("RETURN %d", __LINE__);
569                 *error = "Empty condition is invalid";
570                 return -1;
571         }
572
573         talloc_free(c);
574
575         return slen;
576 }