#ifndef CREGEX_H #define CREGEX_H #include #if defined(__SASC) #define INLINE __inline #elif defined(__VBCC__) #define INLINE inline #else #define INLINE #endif typedef enum { REGEX_NODE_TYPE_EPSILON = 0, /* Characters */ REGEX_NODE_TYPE_CHARACTER, REGEX_NODE_TYPE_ANY_CHARACTER, REGEX_NODE_TYPE_CHARACTER_CLASS, REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED, /* Composites */ REGEX_NODE_TYPE_CONCATENATION, REGEX_NODE_TYPE_ALTERNATION, /* Quantifiers */ REGEX_NODE_TYPE_QUANTIFIER, /* Anchors */ REGEX_NODE_TYPE_ANCHOR_BEGIN, REGEX_NODE_TYPE_ANCHOR_END, /* Captures */ REGEX_NODE_TYPE_CAPTURE } cregex_node_type; typedef struct cregex_node { cregex_node_type type; union { /* REGEX_NODE_TYPE_CHARACTER */ struct { LONG ch; } a; /* REGEX_NODE_TYPE_CHARACTER_CLASS, * REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED */ struct { const char *from, *to; } b; /* REGEX_NODE_TYPE_QUANTIFIER */ struct { LONG nmin, nmax, greedy; struct cregex_node *quantified; } c; /* REGEX_NODE_TYPE_CONCATENATION, * REGEX_NODE_TYPE_ALTERNATION */ struct { struct cregex_node *left, *right; } d; /* REGEX_NODE_TYPE_CAPTURE */ struct { struct cregex_node *captured; } e; } u; } cregex_node_t; typedef enum { REGEX_PROGRAM_OPCODE_MATCH = 0, /* Characters */ REGEX_PROGRAM_OPCODE_CHARACTER, REGEX_PROGRAM_OPCODE_ANY_CHARACTER, REGEX_PROGRAM_OPCODE_CHCLS, REGEX_PROGRAM_OPCODE_CHCLS_NEGATED, /* Control-flow */ REGEX_PROGRAM_OPCODE_SPLIT, REGEX_PROGRAM_OPCODE_JUMP, /* Assertions */ REGEX_PROGRAM_OPCODE_ASSERT_BEGIN, REGEX_PROGRAM_OPCODE_ASSERT_END, /* Saving */ REGEX_PROGRAM_OPCODE_SAVE } cregex_program_opcode_t; #include typedef char cregex_char_class[(UCHAR_MAX + CHAR_BIT - 1) / CHAR_BIT]; STATIC INLINE LONG cregex_char_class_contains(const cregex_char_class klass, LONG ch) { return klass[ch / CHAR_BIT] & (1 << ch % CHAR_BIT); } STATIC INLINE LONG cregex_char_class_add(cregex_char_class klass, LONG ch) { klass[ch / CHAR_BIT] |= 1 << (ch % CHAR_BIT); return ch; } typedef struct cregex_program_instr { cregex_program_opcode_t opcode; union { /* REGEX_PROGRAM_OPCODE_CHARACTER */ struct { LONG ch; } a; /* REGEX_PROGRAM_OPCODE_CHCLS, * REGEX_PROGRAM_OPCODE_CHCLS_NEGATED */ struct { cregex_char_class klass; } b; /* REGEX_PROGRAM_OPCODE_SPLIT */ struct { struct cregex_program_instr *first, *second; } c; /* REGEX_PROGRAM_OPCODE_JUMP */ struct { struct cregex_program_instr *target; } d; /* REGEX_PROGRAM_OPCODE_SAVE */ struct { LONG save; } e; } u; } cregex_program_instr_t; typedef struct { LONG ninstructions; cregex_program_instr_t instructions[1]; } cregex_program_t; /* Run program on string */ LONG cregex_program_run(const cregex_program_t *program, const char *string, const char **matches, LONG nmatches); /* Compile a parsed pattern */ cregex_program_t *cregex_compile_node(const cregex_node_t *root); /* Free a compiled program */ VOID cregex_compile_free(cregex_program_t *program); /* Parse a pattern */ cregex_node_t *cregex_parse(const char *pattern); /* Free a parsed pattern */ VOID cregex_parse_free(cregex_node_t *root); #endif