You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
147 lines
3.7 KiB
147 lines
3.7 KiB
#ifndef CREGEX_H
|
|
#define CREGEX_H
|
|
|
|
#include <exec/types.h>
|
|
#if defined(__SASC)
|
|
#define INLINE __inline
|
|
#elif defined(__VBCC__)
|
|
#define INLINE inline
|
|
#else
|
|
#define INLINE
|
|
#endif
|
|
|
|
typedef enum {
|
|
REGEX_NODE_TYPE_EPSILON = 0,
|
|
/* Characters */
|
|
REGEX_NODE_TYPE_CHARACTER,
|
|
REGEX_NODE_TYPE_ANY_CHARACTER,
|
|
REGEX_NODE_TYPE_CHARACTER_CLASS,
|
|
REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED,
|
|
/* Composites */
|
|
REGEX_NODE_TYPE_CONCATENATION,
|
|
REGEX_NODE_TYPE_ALTERNATION,
|
|
/* Quantifiers */
|
|
REGEX_NODE_TYPE_QUANTIFIER,
|
|
/* Anchors */
|
|
REGEX_NODE_TYPE_ANCHOR_BEGIN,
|
|
REGEX_NODE_TYPE_ANCHOR_END,
|
|
/* Captures */
|
|
REGEX_NODE_TYPE_CAPTURE
|
|
} cregex_node_type;
|
|
|
|
typedef struct cregex_node {
|
|
cregex_node_type type;
|
|
union {
|
|
/* REGEX_NODE_TYPE_CHARACTER */
|
|
struct {
|
|
LONG ch;
|
|
} a;
|
|
/* REGEX_NODE_TYPE_CHARACTER_CLASS,
|
|
* REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED
|
|
*/
|
|
struct {
|
|
const char *from, *to;
|
|
} b;
|
|
/* REGEX_NODE_TYPE_QUANTIFIER */
|
|
struct {
|
|
LONG nmin, nmax, greedy;
|
|
struct cregex_node *quantified;
|
|
} c;
|
|
/* REGEX_NODE_TYPE_CONCATENATION,
|
|
* REGEX_NODE_TYPE_ALTERNATION
|
|
*/
|
|
struct {
|
|
struct cregex_node *left, *right;
|
|
} d;
|
|
/* REGEX_NODE_TYPE_CAPTURE */
|
|
struct {
|
|
struct cregex_node *captured;
|
|
} e;
|
|
} u;
|
|
} cregex_node_t;
|
|
|
|
typedef enum {
|
|
REGEX_PROGRAM_OPCODE_MATCH = 0,
|
|
/* Characters */
|
|
REGEX_PROGRAM_OPCODE_CHARACTER,
|
|
REGEX_PROGRAM_OPCODE_ANY_CHARACTER,
|
|
REGEX_PROGRAM_OPCODE_CHCLS,
|
|
REGEX_PROGRAM_OPCODE_CHCLS_NEGATED,
|
|
/* Control-flow */
|
|
REGEX_PROGRAM_OPCODE_SPLIT,
|
|
REGEX_PROGRAM_OPCODE_JUMP,
|
|
/* Assertions */
|
|
REGEX_PROGRAM_OPCODE_ASSERT_BEGIN,
|
|
REGEX_PROGRAM_OPCODE_ASSERT_END,
|
|
/* Saving */
|
|
REGEX_PROGRAM_OPCODE_SAVE
|
|
} cregex_program_opcode_t;
|
|
|
|
#include <limits.h>
|
|
|
|
typedef char cregex_char_class[(UCHAR_MAX + CHAR_BIT - 1) / CHAR_BIT];
|
|
|
|
STATIC INLINE LONG cregex_char_class_contains(const cregex_char_class klass,
|
|
LONG ch)
|
|
{
|
|
return klass[ch / CHAR_BIT] & (1 << ch % CHAR_BIT);
|
|
}
|
|
|
|
STATIC INLINE LONG cregex_char_class_add(cregex_char_class klass, LONG ch)
|
|
{
|
|
klass[ch / CHAR_BIT] |= 1 << (ch % CHAR_BIT);
|
|
return ch;
|
|
}
|
|
|
|
typedef struct cregex_program_instr {
|
|
cregex_program_opcode_t opcode;
|
|
union {
|
|
/* REGEX_PROGRAM_OPCODE_CHARACTER */
|
|
struct {
|
|
LONG ch;
|
|
} a;
|
|
/* REGEX_PROGRAM_OPCODE_CHCLS,
|
|
* REGEX_PROGRAM_OPCODE_CHCLS_NEGATED
|
|
*/
|
|
struct {
|
|
cregex_char_class klass;
|
|
} b;
|
|
/* REGEX_PROGRAM_OPCODE_SPLIT */
|
|
struct {
|
|
struct cregex_program_instr *first, *second;
|
|
} c;
|
|
/* REGEX_PROGRAM_OPCODE_JUMP */
|
|
struct {
|
|
struct cregex_program_instr *target;
|
|
} d;
|
|
/* REGEX_PROGRAM_OPCODE_SAVE */
|
|
struct {
|
|
LONG save;
|
|
} e;
|
|
} u;
|
|
} cregex_program_instr_t;
|
|
|
|
typedef struct {
|
|
LONG ninstructions;
|
|
cregex_program_instr_t instructions[1];
|
|
} cregex_program_t;
|
|
|
|
/* Run program on string */
|
|
LONG cregex_program_run(const cregex_program_t *program,
|
|
const char *string,
|
|
const char **matches,
|
|
LONG nmatches);
|
|
|
|
/* Compile a parsed pattern */
|
|
cregex_program_t *cregex_compile_node(const cregex_node_t *root);
|
|
|
|
/* Free a compiled program */
|
|
VOID cregex_compile_free(cregex_program_t *program);
|
|
|
|
/* Parse a pattern */
|
|
cregex_node_t *cregex_parse(const char *pattern);
|
|
|
|
/* Free a parsed pattern */
|
|
VOID cregex_parse_free(cregex_node_t *root);
|
|
|
|
#endif
|
|
|