You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

147 lines
3.7 KiB

#ifndef CREGEX_H
#define CREGEX_H
#include <exec/types.h>
#if defined(__SASC)
#define INLINE __inline
#elif defined(__VBCC__)
#define INLINE inline
#else
#define INLINE
#endif
typedef enum {
REGEX_NODE_TYPE_EPSILON = 0,
/* Characters */
REGEX_NODE_TYPE_CHARACTER,
REGEX_NODE_TYPE_ANY_CHARACTER,
REGEX_NODE_TYPE_CHARACTER_CLASS,
REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED,
/* Composites */
REGEX_NODE_TYPE_CONCATENATION,
REGEX_NODE_TYPE_ALTERNATION,
/* Quantifiers */
REGEX_NODE_TYPE_QUANTIFIER,
/* Anchors */
REGEX_NODE_TYPE_ANCHOR_BEGIN,
REGEX_NODE_TYPE_ANCHOR_END,
/* Captures */
REGEX_NODE_TYPE_CAPTURE
} cregex_node_type;
typedef struct cregex_node {
cregex_node_type type;
union {
/* REGEX_NODE_TYPE_CHARACTER */
struct {
LONG ch;
} a;
/* REGEX_NODE_TYPE_CHARACTER_CLASS,
* REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED
*/
struct {
const char *from, *to;
} b;
/* REGEX_NODE_TYPE_QUANTIFIER */
struct {
LONG nmin, nmax, greedy;
struct cregex_node *quantified;
} c;
/* REGEX_NODE_TYPE_CONCATENATION,
* REGEX_NODE_TYPE_ALTERNATION
*/
struct {
struct cregex_node *left, *right;
} d;
/* REGEX_NODE_TYPE_CAPTURE */
struct {
struct cregex_node *captured;
} e;
} u;
} cregex_node_t;
typedef enum {
REGEX_PROGRAM_OPCODE_MATCH = 0,
/* Characters */
REGEX_PROGRAM_OPCODE_CHARACTER,
REGEX_PROGRAM_OPCODE_ANY_CHARACTER,
REGEX_PROGRAM_OPCODE_CHCLS,
REGEX_PROGRAM_OPCODE_CHCLS_NEGATED,
/* Control-flow */
REGEX_PROGRAM_OPCODE_SPLIT,
REGEX_PROGRAM_OPCODE_JUMP,
/* Assertions */
REGEX_PROGRAM_OPCODE_ASSERT_BEGIN,
REGEX_PROGRAM_OPCODE_ASSERT_END,
/* Saving */
REGEX_PROGRAM_OPCODE_SAVE
} cregex_program_opcode_t;
#include <limits.h>
typedef char cregex_char_class[(UCHAR_MAX + CHAR_BIT - 1) / CHAR_BIT];
STATIC INLINE LONG cregex_char_class_contains(const cregex_char_class klass,
LONG ch)
{
return klass[ch / CHAR_BIT] & (1 << ch % CHAR_BIT);
}
STATIC INLINE LONG cregex_char_class_add(cregex_char_class klass, LONG ch)
{
klass[ch / CHAR_BIT] |= 1 << (ch % CHAR_BIT);
return ch;
}
typedef struct cregex_program_instr {
cregex_program_opcode_t opcode;
union {
/* REGEX_PROGRAM_OPCODE_CHARACTER */
struct {
LONG ch;
} a;
/* REGEX_PROGRAM_OPCODE_CHCLS,
* REGEX_PROGRAM_OPCODE_CHCLS_NEGATED
*/
struct {
cregex_char_class klass;
} b;
/* REGEX_PROGRAM_OPCODE_SPLIT */
struct {
struct cregex_program_instr *first, *second;
} c;
/* REGEX_PROGRAM_OPCODE_JUMP */
struct {
struct cregex_program_instr *target;
} d;
/* REGEX_PROGRAM_OPCODE_SAVE */
struct {
LONG save;
} e;
} u;
} cregex_program_instr_t;
typedef struct {
LONG ninstructions;
cregex_program_instr_t instructions[1];
} cregex_program_t;
/* Run program on string */
LONG cregex_program_run(const cregex_program_t *program,
const char *string,
const char **matches,
LONG nmatches);
/* Compile a parsed pattern */
cregex_program_t *cregex_compile_node(const cregex_node_t *root);
/* Free a compiled program */
VOID cregex_compile_free(cregex_program_t *program);
/* Parse a pattern */
cregex_node_t *cregex_parse(const char *pattern);
/* Free a parsed pattern */
VOID cregex_parse_free(cregex_node_t *root);
#endif