commit
4efeabf70b
@ -0,0 +1,6 @@ |
||||
*.o |
||||
*.lib |
||||
*.uaem |
||||
configreader |
||||
.DS_Store |
||||
|
@ -0,0 +1,234 @@ |
||||
#include "configmodel.h" |
||||
|
||||
#include "cregex.h" |
||||
#include "stringarray.h" |
||||
|
||||
#include <proto/exec.h> |
||||
#include <proto/dos.h> |
||||
#include <string.h> |
||||
#include <stdio.h> |
||||
|
||||
cregex_program_t* InitialisePattern(CONST_STRPTR pattern); |
||||
Array RunPattern(CONST_STRPTR text, cregex_program_t* patternProgram); |
||||
|
||||
#define WHITESPACE "[ \\t\\n\\r\\f\\v]" |
||||
#define RX_BLANK_LINE "^[ \t\n\r\f\v]*($|#|;)" |
||||
|
||||
#define RX_SECTION_LINE "^[ \t\n\r\f\v]*\\[([a-z0-9]+)([ \t\n\r\f\v]*\"(.+)\")*\\][ \t\n\r\f\v]*($|#|;)" |
||||
#define RX_VARIABLE_LINE "^[ \t\n\r\f\v]*([a-z][a-z0-9]+)[ \t\n\r\f\v]*=[ \t\n\r\f\v]*(.+)[ \t\n\r\f\v]*($|#|;)" |
||||
#define RX_INTEGER "^-?[1-9][0-9]*$" |
||||
|
||||
STATIC cregex_program_t* sectionPatternProgram = NULL; |
||||
STATIC cregex_program_t* variablePatternProgram = NULL; |
||||
STATIC cregex_program_t* blankPatternProgram = NULL; |
||||
STATIC cregex_program_t* integerPatternProgram = NULL; |
||||
|
||||
struct Section |
||||
{ |
||||
StringArray names; |
||||
}; |
||||
|
||||
enum VariableType
|
||||
{ |
||||
TypeBool=0, |
||||
TypeInteger=1, |
||||
TypeString=2, |
||||
}; |
||||
|
||||
struct Variable |
||||
{ |
||||
enum VariableType type; |
||||
CONST_STRPTR name; |
||||
union
|
||||
{ |
||||
CONST_STRPTR stringValue; |
||||
BOOL boolValue; |
||||
LONG longValue; |
||||
} value; |
||||
}; |
||||
|
||||
struct Line |
||||
{ |
||||
STRPTR rawText; |
||||
union
|
||||
{ |
||||
struct Variable* variable; |
||||
struct Section* section; |
||||
}
|
||||
object; |
||||
}; |
||||
|
||||
VOID InitialisePatterns(VOID) |
||||
{ |
||||
sectionPatternProgram = InitialisePattern(RX_SECTION_LINE); |
||||
variablePatternProgram = InitialisePattern(RX_VARIABLE_LINE); |
||||
blankPatternProgram = InitialisePattern(RX_BLANK_LINE); |
||||
integerPatternProgram = InitialisePattern(RX_INTEGER); |
||||
} |
||||
|
||||
VOID ReleasePatterns(VOID) |
||||
{ |
||||
if( sectionPatternProgram != NULL ) cregex_compile_free( sectionPatternProgram ); |
||||
if( variablePatternProgram != NULL ) cregex_compile_free( variablePatternProgram ); |
||||
if( blankPatternProgram != NULL ) cregex_compile_free( blankPatternProgram ); |
||||
if( integerPatternProgram != NULL ) cregex_compile_free( integerPatternProgram ); |
||||
} |
||||
|
||||
cregex_program_t* InitialisePattern(CONST_STRPTR pattern) |
||||
{ |
||||
cregex_program_t* result = NULL; |
||||
cregex_node_t* patternNode = cregex_parse(pattern); |
||||
if( patternNode ) |
||||
{ |
||||
result = cregex_compile_node( patternNode ); |
||||
if( result != NULL ) |
||||
{ |
||||
//Printf("successfully compiled %s\n", pattern);
|
||||
} |
||||
else |
||||
{ |
||||
Printf("failed to compile %s\n", pattern); |
||||
} |
||||
cregex_parse_free( patternNode ); |
||||
} |
||||
else |
||||
{ |
||||
Printf("could not parse %s\n", pattern); |
||||
} |
||||
return result; |
||||
} |
||||
|
||||
Array RunPattern(CONST_STRPTR text, cregex_program_t* patternProgram) |
||||
{ |
||||
Array result = NULL; |
||||
char* localMatches[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; |
||||
if (cregex_program_run(patternProgram, text, localMatches, 20) > 0) { |
||||
int j = 0; |
||||
int nmatches = 0; |
||||
|
||||
// count the matches
|
||||
for (j = 0; j < 20; ++j) |
||||
if (localMatches[j]) |
||||
nmatches = j; |
||||
|
||||
if( nmatches > 0 ) |
||||
{ |
||||
result = StringArrayNew(); |
||||
// loop the matches
|
||||
for (j = 0; j <= nmatches; j += 2) { |
||||
if (localMatches[j] && localMatches[j + 1]) { |
||||
int len = (int)(localMatches[j + 1] - localMatches[j]); |
||||
STRPTR buffer = AllocVec(len+1, MEMF_CLEAR); // freed in the array
|
||||
sprintf(buffer, "%.*s", len, localMatches[j]); |
||||
if( buffer[len-1] == '\n' )
|
||||
{ |
||||
buffer[len-1] = '\0'; |
||||
} |
||||
StringArrayAppend(result, buffer); |
||||
} else { |
||||
//Printf("(NULL,NULL)\n");
|
||||
} |
||||
} |
||||
} |
||||
// end
|
||||
} else { |
||||
//Printf("\"%s\": no match\n", text);
|
||||
} |
||||
return result; |
||||
} |
||||
|
||||
LINEPTR LineReadIncludingContinuation(BPTR file) |
||||
{ |
||||
UBYTE* buffer = AllocVec(512, MEMF_CLEAR); |
||||
ULONG bufLength = 512; |
||||
ULONG bytesReadTotal = 0; |
||||
UBYTE* read = NULL; |
||||
struct Line* result = NULL; |
||||
|
||||
do |
||||
{ |
||||
read = FGets(file, &(buffer[bytesReadTotal]), bufLength-bytesReadTotal); |
||||
bytesReadTotal = strlen(buffer); |
||||
}
|
||||
while( read != NULL && bytesReadTotal >= 2 && bytesReadTotal < bufLength && buffer[bytesReadTotal-1] == '\n' && buffer[bytesReadTotal-2] == '\\' ); |
||||
|
||||
|
||||
if( bytesReadTotal > 0 ) |
||||
{ |
||||
StringArray stringArray = NULL; |
||||
result = AllocVec(sizeof(struct Line), MEMF_CLEAR); |
||||
result->rawText = AllocVec(bytesReadTotal+1, MEMF_CLEAR); |
||||
CopyMem(buffer, result->rawText, bytesReadTotal-1); |
||||
|
||||
// Printf("\n\nraw text = {{{%s}}}\n", result->rawText); //
|
||||
stringArray = RunPattern(result->rawText, sectionPatternProgram); |
||||
if( stringArray )
|
||||
{ |
||||
if( SizeOfArray(stringArray) == 3 ) |
||||
{ |
||||
Printf("\nsection {%s}\n", StringArrayValues(stringArray)[1]); |
||||
} |
||||
else if( SizeOfArray(stringArray) == 5 ) |
||||
{ |
||||
Printf("\nsection {%s.%s}\n", StringArrayValues(stringArray)[1], StringArrayValues(stringArray)[3]); |
||||
} |
||||
// Printf("\nsection size=%ld\n", SizeOfArray(stringArray));
|
||||
// StringArrayForEach(stringArray, Printf("{{%s}}",aString););
|
||||
StringArrayFree(stringArray); |
||||
} |
||||
else |
||||
{ |
||||
stringArray = RunPattern(result->rawText, variablePatternProgram); |
||||
if( stringArray )
|
||||
{ |
||||
Printf("\nvariable {%s} = {%s}\n", StringArrayValues(stringArray)[1], StringArrayValues(stringArray)[2]);
|
||||
// StringArrayForEach(stringArray, Printf("{{%s}}",aString););
|
||||
StringArrayFree(stringArray); |
||||
} |
||||
else |
||||
{ |
||||
stringArray = RunPattern(result->rawText, blankPatternProgram); |
||||
if( stringArray )
|
||||
{ |
||||
Printf("\nYY %s\n", result->rawText); |
||||
StringArrayFree(stringArray); |
||||
} |
||||
else |
||||
{ |
||||
Printf("\nXX %s\n", result->rawText); |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
}
|
||||
FreeVec(buffer); |
||||
return result;
|
||||
} |
||||
|
||||
VOID LineFree(LINEPTR abstractLine) |
||||
{ |
||||
struct Line* line = (struct Line*)abstractLine; |
||||
if( line != NULL ) |
||||
{ |
||||
if( line->rawText != NULL ) |
||||
{ |
||||
FreeVec(line->rawText); |
||||
} |
||||
FreeVec(line); |
||||
} |
||||
} |
||||
|
||||
CONST_STRPTR LineGetRawText(LINEPTR abstractLine) |
||||
{ |
||||
struct Line* line = (struct Line*)abstractLine; |
||||
if( line != NULL ) |
||||
{ |
||||
return (CONST_STRPTR)line->rawText; |
||||
} |
||||
else
|
||||
{ |
||||
return NULL; |
||||
} |
||||
} |
||||
|
@ -0,0 +1,16 @@ |
||||
#ifndef __CONFIGMODEL_H |
||||
#define __CONFIGMODEL_H |
||||
#include <exec/types.h> |
||||
#include <dos/dos.h> |
||||
|
||||
typedef APTR LINEPTR; |
||||
typedef APTR SECTIONPTR; |
||||
typedef APTR VARIABLEPTR; |
||||
|
||||
VOID InitialisePatterns(VOID); |
||||
VOID ReleasePatterns(VOID); |
||||
|
||||
LINEPTR LineReadIncludingContinuation(BPTR file); |
||||
VOID LineFree(LINEPTR abstractLine); |
||||
CONST_STRPTR LineGetRawText(LINEPTR line); |
||||
#endif |
@ -0,0 +1,147 @@ |
||||
#ifndef CREGEX_H |
||||
#define CREGEX_H |
||||
|
||||
#include <exec/types.h> |
||||
#if defined(__SASC) |
||||
#define INLINE __inline |
||||
#elif defined(__VBCC__) |
||||
#define INLINE inline |
||||
#else |
||||
#define INLINE |
||||
#endif |
||||
|
||||
typedef enum { |
||||
REGEX_NODE_TYPE_EPSILON = 0, |
||||
/* Characters */ |
||||
REGEX_NODE_TYPE_CHARACTER, |
||||
REGEX_NODE_TYPE_ANY_CHARACTER, |
||||
REGEX_NODE_TYPE_CHARACTER_CLASS, |
||||
REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED, |
||||
/* Composites */ |
||||
REGEX_NODE_TYPE_CONCATENATION, |
||||
REGEX_NODE_TYPE_ALTERNATION, |
||||
/* Quantifiers */ |
||||
REGEX_NODE_TYPE_QUANTIFIER, |
||||
/* Anchors */ |
||||
REGEX_NODE_TYPE_ANCHOR_BEGIN, |
||||
REGEX_NODE_TYPE_ANCHOR_END, |
||||
/* Captures */ |
||||
REGEX_NODE_TYPE_CAPTURE |
||||
} cregex_node_type; |
||||
|
||||
typedef struct cregex_node { |
||||
cregex_node_type type; |
||||
union { |
||||
/* REGEX_NODE_TYPE_CHARACTER */ |
||||
struct { |
||||
LONG ch; |
||||
} a; |
||||
/* REGEX_NODE_TYPE_CHARACTER_CLASS,
|
||||
* REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED |
||||
*/ |
||||
struct { |
||||
const char *from, *to; |
||||
} b; |
||||
/* REGEX_NODE_TYPE_QUANTIFIER */ |
||||
struct { |
||||
LONG nmin, nmax, greedy; |
||||
struct cregex_node *quantified; |
||||
} c; |
||||
/* REGEX_NODE_TYPE_CONCATENATION,
|
||||
* REGEX_NODE_TYPE_ALTERNATION |
||||
*/ |
||||
struct { |
||||
struct cregex_node *left, *right; |
||||
} d; |
||||
/* REGEX_NODE_TYPE_CAPTURE */ |
||||
struct { |
||||
struct cregex_node *captured; |
||||
} e; |
||||
} u; |
||||
} cregex_node_t; |
||||
|
||||
typedef enum { |
||||
REGEX_PROGRAM_OPCODE_MATCH = 0, |
||||
/* Characters */ |
||||
REGEX_PROGRAM_OPCODE_CHARACTER, |
||||
REGEX_PROGRAM_OPCODE_ANY_CHARACTER, |
||||
REGEX_PROGRAM_OPCODE_CHCLS, |
||||
REGEX_PROGRAM_OPCODE_CHCLS_NEGATED, |
||||
/* Control-flow */ |
||||
REGEX_PROGRAM_OPCODE_SPLIT, |
||||
REGEX_PROGRAM_OPCODE_JUMP, |
||||
/* Assertions */ |
||||
REGEX_PROGRAM_OPCODE_ASSERT_BEGIN, |
||||
REGEX_PROGRAM_OPCODE_ASSERT_END, |
||||
/* Saving */ |
||||
REGEX_PROGRAM_OPCODE_SAVE |
||||
} cregex_program_opcode_t; |
||||
|
||||
#include <limits.h> |
||||
|
||||
typedef char cregex_char_class[(UCHAR_MAX + CHAR_BIT - 1) / CHAR_BIT]; |
||||
|
||||
STATIC INLINE LONG cregex_char_class_contains(const cregex_char_class klass, |
||||
LONG ch) |
||||
{ |
||||
return klass[ch / CHAR_BIT] & (1 << ch % CHAR_BIT); |
||||
} |
||||
|
||||
STATIC INLINE LONG cregex_char_class_add(cregex_char_class klass, LONG ch) |
||||
{ |
||||
klass[ch / CHAR_BIT] |= 1 << (ch % CHAR_BIT); |
||||
return ch; |
||||
} |
||||
|
||||
typedef struct cregex_program_instr { |
||||
cregex_program_opcode_t opcode; |
||||
union { |
||||
/* REGEX_PROGRAM_OPCODE_CHARACTER */ |
||||
struct { |
||||
LONG ch; |
||||
} a; |
||||
/* REGEX_PROGRAM_OPCODE_CHCLS,
|
||||
* REGEX_PROGRAM_OPCODE_CHCLS_NEGATED |
||||
*/ |
||||
struct { |
||||
cregex_char_class klass; |
||||
} b; |
||||
/* REGEX_PROGRAM_OPCODE_SPLIT */ |
||||
struct { |
||||
struct cregex_program_instr *first, *second; |
||||
} c; |
||||
/* REGEX_PROGRAM_OPCODE_JUMP */ |
||||
struct { |
||||
struct cregex_program_instr *target; |
||||
} d; |
||||
/* REGEX_PROGRAM_OPCODE_SAVE */ |
||||
struct { |
||||
LONG save; |
||||
} e; |
||||
} u; |
||||
} cregex_program_instr_t; |
||||
|
||||
typedef struct { |
||||
LONG ninstructions; |
||||
cregex_program_instr_t instructions[1]; |
||||
} cregex_program_t; |
||||
|
||||
/* Run program on string */ |
||||
LONG cregex_program_run(const cregex_program_t *program, |
||||
const char *string, |
||||
const char **matches, |
||||
LONG nmatches); |
||||
|
||||
/* Compile a parsed pattern */ |
||||
cregex_program_t *cregex_compile_node(const cregex_node_t *root); |
||||
|
||||
/* Free a compiled program */ |
||||
VOID cregex_compile_free(cregex_program_t *program); |
||||
|
||||
/* Parse a pattern */ |
||||
cregex_node_t *cregex_parse(const char *pattern); |
||||
|
||||
/* Free a parsed pattern */ |
||||
VOID cregex_parse_free(cregex_node_t *root); |
||||
|
||||
#endif |
@ -0,0 +1,351 @@ |
||||
#include <proto/exec.h> |
||||
#include <stdlib.h> |
||||
|
||||
#include "cregex.h" |
||||
#include <string.h> |
||||
|
||||
typedef struct { |
||||
cregex_program_instr_t *pc; |
||||
LONG ncaptures; |
||||
} regex_compile_context; |
||||
|
||||
STATIC LONG count_instructions(const cregex_node_t *node) |
||||
{ |
||||
switch (node->type) { |
||||
case REGEX_NODE_TYPE_EPSILON: |
||||
return 0; |
||||
|
||||
/* Characters */ |
||||
case REGEX_NODE_TYPE_CHARACTER: |
||||
case REGEX_NODE_TYPE_ANY_CHARACTER: |
||||
case REGEX_NODE_TYPE_CHARACTER_CLASS: |
||||
case REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED: |
||||
return 1; |
||||
|
||||
/* Composites */ |
||||
case REGEX_NODE_TYPE_CONCATENATION: |
||||
return count_instructions(node->u.d.left) + count_instructions(node->u.d.right); |
||||
case REGEX_NODE_TYPE_ALTERNATION: |
||||
return 2 +
|
||||
count_instructions(node->u.d.left) + |
||||
count_instructions(node->u.d.right); |
||||
|
||||
/* Quantifiers */ |
||||
case REGEX_NODE_TYPE_QUANTIFIER: { |
||||
LONG num = count_instructions(node->u.c.quantified); |
||||
if (node->u.c.nmax >= node->u.c.nmin) |
||||
{ |
||||
return node->u.c.nmin * num + (node->u.c.nmax - node->u.c.nmin) * (num + 1); |
||||
} |
||||
else |
||||
{ |
||||
return 1 + (node->u.c.nmin ? node->u.c.nmin * num : num + 1); |
||||
} |
||||
} |
||||
|
||||
/* Anchors */ |
||||
case REGEX_NODE_TYPE_ANCHOR_BEGIN: |
||||
case REGEX_NODE_TYPE_ANCHOR_END: |
||||
return 1; |
||||
|
||||
/* Captures */ |
||||
case REGEX_NODE_TYPE_CAPTURE: |
||||
return 2 + count_instructions(node->u.e.captured); |
||||
} |
||||
|
||||
/* should not reach here */ |
||||
return 0; |
||||
} |
||||
|
||||
STATIC BOOL node_is_anchored(const cregex_node_t *node) |
||||
{ |
||||
switch (node->type) { |
||||
case REGEX_NODE_TYPE_EPSILON: |
||||
return FALSE; |
||||
|
||||
/* Characters */ |
||||
case REGEX_NODE_TYPE_CHARACTER: |
||||
case REGEX_NODE_TYPE_ANY_CHARACTER: |
||||
case REGEX_NODE_TYPE_CHARACTER_CLASS: |
||||
case REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED: |
||||
return FALSE; |
||||
|
||||
/* Composites */ |
||||
case REGEX_NODE_TYPE_CONCATENATION: |
||||
return node_is_anchored(node->u.d.left); |
||||
case REGEX_NODE_TYPE_ALTERNATION: |
||||
return (BOOL)(node_is_anchored(node->u.d.left) && node_is_anchored(node->u.d.right)); |
||||
|
||||
/* Quantifiers */ |
||||
case REGEX_NODE_TYPE_QUANTIFIER: |
||||
return node_is_anchored(node->u.c.quantified); |
||||
|
||||
/* Anchors */ |
||||
case REGEX_NODE_TYPE_ANCHOR_BEGIN: |
||||
return TRUE; |
||||
case REGEX_NODE_TYPE_ANCHOR_END: |
||||
return FALSE; |
||||
|
||||
/* Captures */ |
||||
case REGEX_NODE_TYPE_CAPTURE: |
||||
return node_is_anchored(node->u.e.captured); |
||||
} |
||||
|
||||
/* should not reach here */ |
||||
return FALSE; |
||||
} |
||||
|
||||
STATIC INLINE cregex_program_instr_t *emit( |
||||
regex_compile_context *context, |
||||
const cregex_program_instr_t *instruction) |
||||
{ |
||||
*context->pc = *instruction; |
||||
return context->pc++; |
||||
} |
||||
|
||||
STATIC cregex_program_instr_t *compile_char_class( |
||||
const cregex_node_t *node, |
||||
cregex_program_instr_t *instruction) |
||||
{ |
||||
const char *sp = node->u.b.from; |
||||
|
||||
for (;;) { |
||||
LONG ch = *sp++; |
||||
switch (ch) { |
||||
case ']': |
||||
if (sp - 1 == node->u.b.from) |
||||
goto CHARACTER; |
||||
return instruction; |
||||
case '\\': |
||||
ch = *sp++; |
||||
/* fall-through */ |
||||
default: |
||||
CHARACTER: |
||||
if (*sp == '-' && sp[1] != ']') { |
||||
for (; ch <= sp[1]; ++ch) |
||||
cregex_char_class_add(instruction->u.b.klass, ch); |
||||
sp += 2; |
||||
} else { |
||||
cregex_char_class_add(instruction->u.b.klass, ch); |
||||
} |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
STATIC cregex_program_instr_t *compile_context(regex_compile_context *context, |
||||
const cregex_node_t *node) |
||||
{ |
||||
cregex_program_instr_t *bottom = context->pc, *split, *jump; |
||||
LONG ncaptures = context->ncaptures, capture; |
||||
cregex_program_instr_t newInstr; |
||||
memset(&newInstr, 0, sizeof(cregex_program_instr_t)); |
||||
|
||||
|
||||
switch (node->type) { |
||||
case REGEX_NODE_TYPE_EPSILON: |
||||
break; |
||||
|
||||
/* Characters */ |
||||
case REGEX_NODE_TYPE_CHARACTER: |
||||
newInstr.opcode = REGEX_PROGRAM_OPCODE_CHARACTER; |
||||
newInstr.u.a.ch = node->u.a.ch; |
||||
emit(context, &newInstr); |
||||
break; |
||||
case REGEX_NODE_TYPE_ANY_CHARACTER: |
||||
newInstr.opcode = REGEX_PROGRAM_OPCODE_ANY_CHARACTER; |
||||
emit(context, &newInstr); |
||||
break; |
||||
case REGEX_NODE_TYPE_CHARACTER_CLASS: |
||||
newInstr.opcode = REGEX_PROGRAM_OPCODE_CHCLS;
|
||||
compile_char_class( node, emit(context, &newInstr)); |
||||
break; |
||||
case REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED: |
||||
newInstr.opcode = REGEX_PROGRAM_OPCODE_CHCLS_NEGATED;
|
||||
compile_char_class( node, emit(context, &newInstr)); |
||||
break; |
||||
|
||||
/* Composites */ |
||||
case REGEX_NODE_TYPE_CONCATENATION: |
||||
compile_context(context, node->u.d.left); |
||||
compile_context(context, node->u.d.right); |
||||
break; |
||||
case REGEX_NODE_TYPE_ALTERNATION: { |
||||
cregex_program_instr_t splitInstr; |
||||
cregex_program_instr_t jumpInstr; |
||||
memset(&splitInstr, 0, sizeof(cregex_program_instr_t)); |
||||
memset(&jumpInstr, 0, sizeof(cregex_program_instr_t)); |
||||
splitInstr.opcode = REGEX_PROGRAM_OPCODE_SPLIT; |
||||
jumpInstr.opcode = REGEX_PROGRAM_OPCODE_JUMP; |
||||
|
||||
split = emit(context, &splitInstr); |
||||
split->u.c.first = compile_context(context, node->u.d.left); |
||||
jump = emit(context, &jumpInstr); |
||||
split->u.c.second = compile_context(context, node->u.d.right); |
||||
jump->u.d.target = context->pc; |
||||
} |
||||
break; |
||||
|
||||
/* Quantifiers */ |
||||
case REGEX_NODE_TYPE_QUANTIFIER: { |
||||
cregex_program_instr_t *last = NULL; |
||||
LONG i = 0; |
||||
for (i = 0; i < node->u.c.nmin; ++i) { |
||||
context->ncaptures = ncaptures; |
||||
last = compile_context(context, node->u.c.quantified); |
||||
} |
||||
if (node->u.c.nmax > node->u.c.nmin) { |
||||
for (i = 0; i < node->u.c.nmax - node->u.c.nmin; ++i) { |
||||
memset(&newInstr, 0, sizeof(cregex_program_instr_t)); |
||||
newInstr.opcode = REGEX_PROGRAM_OPCODE_SPLIT; |
||||
context->ncaptures = ncaptures; |
||||
split = emit(context, &newInstr); |
||||
split->u.c.first = compile_context(context, node->u.c.quantified); |
||||
split->u.c.second = context->pc; |
||||
if (!node->u.c.greedy) { |
||||
cregex_program_instr_t *swap = split->u.c.first; |
||||
split->u.c.first = split->u.c.second; |
||||
split->u.c.second = swap; |
||||
} |
||||
} |
||||
} else if (node->u.c.nmax == -1) { |
||||
newInstr.opcode = REGEX_PROGRAM_OPCODE_SPLIT; |
||||
split = emit(context, &newInstr); |
||||
if (node->u.c.nmin == 0) { |
||||
split->u.c.first = compile_context(context, node->u.c.quantified); |
||||
newInstr.opcode = REGEX_PROGRAM_OPCODE_JUMP; |
||||
jump = emit(context, &newInstr); |
||||
split->u.c.second = context->pc; |
||||
jump->u.d.target = split; |
||||
} else { |
||||
split->u.c.first = last; |
||||
split->u.c.second = context->pc; |
||||
} |
||||
if (!node->u.c.greedy) { |
||||
cregex_program_instr_t *swap = split->u.c.first; |
||||
split->u.c.first = split->u.c.second; |
||||
split->u.c.second = swap; |
||||
} |
||||
} |
||||
break; |
||||
} |
||||
|
||||
/* Anchors */ |
||||
case REGEX_NODE_TYPE_ANCHOR_BEGIN: |
||||
newInstr.opcode = REGEX_PROGRAM_OPCODE_ASSERT_BEGIN; |
||||
emit(context, &newInstr); |
||||
break; |
||||
case REGEX_NODE_TYPE_ANCHOR_END: |
||||
newInstr.opcode = REGEX_PROGRAM_OPCODE_ASSERT_END; |
||||
emit(context, &newInstr); |
||||
break; |
||||
|
||||
/* Captures */ |
||||
case REGEX_NODE_TYPE_CAPTURE: |
||||
capture = context->ncaptures++ * 2; |
||||
newInstr.opcode = REGEX_PROGRAM_OPCODE_SAVE; |
||||
|
||||
newInstr.u.e.save = capture; |
||||
emit(context,&newInstr); |
||||
|
||||
compile_context(context, node->u.e.captured); |
||||
|
||||
newInstr.u.e.save = capture + 1;
|
||||
emit(context, &newInstr); |
||||
break; |
||||
} |
||||
|
||||
return bottom; |
||||
} |
||||
|
||||
/* Compile a parsed pattern (using a previously allocated program with at least
|
||||
* estimate_instructions(root) instructions). |
||||
*/ |
||||
STATIC cregex_program_t *compile_node_with_program(const cregex_node_t *root, |
||||
cregex_program_t *program) |
||||
{ |
||||
regex_compile_context context; |
||||
cregex_node_t rootNode; |
||||
cregex_program_instr_t finalInstr; |
||||
|
||||
memset(&rootNode, 0, sizeof(cregex_node_t)); |
||||
rootNode.type = REGEX_NODE_TYPE_CAPTURE; |
||||
rootNode.u.e.captured = (cregex_node_t *)root; |
||||
/* add capture node for entire match */ |
||||
root = &rootNode; |
||||
|
||||
/* add .*? unless pattern starts with ^ */ |
||||
if (!node_is_anchored(root))
|
||||
{ |
||||
cregex_node_t concatNode; |
||||
cregex_node_t quantifierNode; |
||||
cregex_node_t anyCharNode; |
||||
|
||||
memset(&anyCharNode, 0, sizeof(cregex_node_t)); |
||||
anyCharNode.type = REGEX_NODE_TYPE_ANY_CHARACTER; |
||||
|
||||
memset(&quantifierNode, 0, sizeof(cregex_node_t)); |
||||
quantifierNode.type = REGEX_NODE_TYPE_QUANTIFIER; |
||||
quantifierNode.u.c.nmin = 0; |
||||
quantifierNode.u.c.nmax = -1; |
||||
quantifierNode.u.c.greedy = 0; |
||||
quantifierNode.u.c.quantified = &anyCharNode; |
||||
|
||||
memset(&concatNode, 0, sizeof(cregex_node_t)); |
||||
concatNode.type = REGEX_NODE_TYPE_CONCATENATION; |
||||
concatNode.u.d.left = &quantifierNode; |
||||
concatNode.u.d.right = (cregex_node_t*)root; |
||||
|
||||
root = &concatNode; |
||||
} |
||||
|
||||
/* compile */ |
||||
memset(&context, 0, sizeof(regex_compile_context));
|
||||
context.pc = program->instructions; |
||||
context.ncaptures = 0; |
||||
compile_context(&context, root); |
||||
|
||||
/* emit final match instruction */ |
||||
memset(&finalInstr, 0, sizeof(cregex_program_instr_t)); |
||||
finalInstr.opcode = REGEX_PROGRAM_OPCODE_MATCH; |
||||
emit(&context, &finalInstr); |
||||
|
||||
/* set total number of instructions */ |
||||
program->ninstructions = context.pc - program->instructions; |
||||
|
||||
return program; |
||||
} |
||||
|
||||
/* Upper bound of number of instructions required to compile parsed pattern. */ |
||||
STATIC LONG estimate_instructions(const cregex_node_t *root) |
||||
{ |
||||
return count_instructions(root) |
||||
/* .*? is added unless pattern starts with ^,
|
||||
* save instructions are added for beginning and end of match, |
||||
* a final match instruction is added to the end of the program |
||||
*/ |
||||
+ !node_is_anchored(root) * 3 + 2 + 1; |
||||
} |
||||
|
||||
cregex_program_t *cregex_compile_node(const cregex_node_t *root) |
||||
{ |
||||
size_t size = sizeof(cregex_program_t) + |
||||
sizeof(cregex_program_instr_t) * (estimate_instructions(root) - 1); |
||||
cregex_program_t *program; |
||||
|
||||
if (!(program = AllocVec(size, MEMF_CLEAR))) |
||||
return NULL; |
||||
|
||||
if (!compile_node_with_program(root, program)) { |
||||
free(program); |
||||
return NULL; |
||||
} |
||||
|
||||
return program; |
||||
} |
||||
|
||||
/* Free a compiled program */ |
||||
VOID cregex_compile_free(cregex_program_t *program) |
||||
{ |
||||
FreeVec(program); |
||||
} |
@ -0,0 +1,308 @@ |
||||
#include <proto/exec.h> |
||||
#include <proto/dos.h> |
||||
#include <stdlib.h> |
||||
#include <string.h> |
||||
|
||||
#include "cregex.h" |
||||
|
||||
typedef struct { |
||||
const char *sp; |
||||
cregex_node_t *stack, *output; |
||||
} regex_parse_context; |
||||
|
||||
/* Shunting-yard algorithm
|
||||
* See https://en.wikipedia.org/wiki/Shunting-yard_algorithm
|
||||
*/ |
||||
|
||||
STATIC INLINE cregex_node_t *push(regex_parse_context *context, |
||||
const cregex_node_t *node) |
||||
{ |
||||
//assert(context->stack <= context->output);
|
||||
*context->stack = *node; |
||||
return context->stack++; |
||||
} |
||||
|
||||
STATIC INLINE cregex_node_t *drop(regex_parse_context *context) |
||||
{ |
||||
return --context->stack; |
||||
} |
||||
|
||||
STATIC INLINE cregex_node_t *consume(regex_parse_context *context) |
||||
{ |
||||
*--context->output = *--context->stack; |
||||
return context->output; |
||||
} |
||||
|
||||
STATIC INLINE cregex_node_t *concatenate(regex_parse_context *context, |
||||
const cregex_node_t *bottom) |
||||
{ |
||||
cregex_node_t newNode; |
||||
memset(&newNode, 0, sizeof(cregex_node_t)); |
||||
|
||||
if (context->stack == bottom) { |
||||
newNode.type = REGEX_NODE_TYPE_EPSILON; |
||||
push(context, &newNode); |
||||
} |
||||
else { |
||||
newNode.type = REGEX_NODE_TYPE_CONCATENATION; |
||||
while (context->stack - 1 > bottom) { |
||||
cregex_node_t *right = consume(context); |
||||
cregex_node_t *left = consume(context); |
||||
newNode.u.d.left = left; |
||||
newNode.u.d.right = right; |
||||
push(context, &newNode); |
||||
} |
||||
} |
||||
return context->stack - 1; |
||||
} |
||||
|
||||
STATIC cregex_node_t *parse_char_class(regex_parse_context *context) |
||||
{ |
||||
cregex_node_t newNode; |
||||
cregex_node_type type = |
||||
(*context->sp == '^') |
||||
? (++context->sp, REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED) |
||||
: REGEX_NODE_TYPE_CHARACTER_CLASS; |
||||
const char *from = context->sp; |
||||
|
||||
for (;;) { |
||||
LONG ch = *context->sp++; |
||||
memset(&newNode, 0, sizeof(cregex_node_t)); |
||||
switch (ch) { |
||||
case '\0': |
||||
/* premature end of character class */ |
||||
return NULL; |
||||
case ']': |
||||
if (context->sp - 1 == from) { |
||||
goto CHARACTER; |
||||
} else { |
||||
newNode.type = type; |
||||
newNode.u.b.from = from; |
||||
newNode.u.b.to = context->sp - 1; |
||||
return push(context, &newNode); |
||||
} |
||||
case '\\': |
||||
ch = *context->sp++; |
||||
/* fall-through */ |
||||
default: |
||||
CHARACTER: |
||||
if (*context->sp == '-' && context->sp[1] != ']') { |
||||
if (context->sp[1] < ch) |
||||
/* empty range in character class */ |
||||
return NULL; |
||||
context->sp += 2; |
||||
} |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
STATIC cregex_node_t *parse_interval(regex_parse_context *context) |
||||
{ |
||||
const char *from = context->sp; |
||||
LONG nmin, nmax; |
||||
cregex_node_t newNode; |
||||
|
||||
memset(&newNode, 0, sizeof(cregex_node_t)); |
||||
|
||||
for (nmin = 0; *context->sp >= '0' && *context->sp <= '9'; ++context->sp) |
||||
nmin = (nmin * 10) + (*context->sp - '0'); |
||||
|
||||
if (*context->sp == ',') { |
||||
++context->sp; |
||||
if (*from != ',' && *context->sp == '}') |
||||
nmax = -1; |
||||
else { |
||||
for (nmax = 0; *context->sp >= '0' && *context->sp <= '9'; |
||||
++context->sp) |
||||
nmax = (nmax * 10) + (*context->sp - '0'); |
||||
if (*(context->sp - 1) == ',' || *context->sp != '}' || |
||||
nmax < nmin) { |
||||
context->sp = from; |
||||
return NULL; |
||||
} |
||||
} |
||||
} else if (*from != '}' && *context->sp == '}') { |
||||
nmax = nmin; |
||||
} else { |
||||
context->sp = from; |
||||
return NULL; |
||||
} |
||||
|
||||
++context->sp; |
||||
newNode.type = REGEX_NODE_TYPE_QUANTIFIER; |
||||
newNode.u.c.nmin = nmin; |
||||
newNode.u.c.nmax = nmax; |
||||
newNode.u.c.greedy = (*context->sp == '?') ? (++context->sp, 0) : 1; |
||||
newNode.u.c.quantified = consume(context); |
||||
return push(context, &newNode); |
||||
} |
||||
|
||||
STATIC cregex_node_t *parse_context(regex_parse_context *context, LONG depth) |
||||
{ |
||||
cregex_node_t *bottom = context->stack; |
||||
cregex_node_t newNode; |
||||
|
||||
for (;;) { |
||||
LONG ch = *context->sp++; |
||||
memset(&newNode, 0, sizeof(cregex_node_t)); |
||||
switch (ch) { |
||||
/* Characters */ |
||||
case '\\': |
||||
ch = *context->sp++; |
||||
/* fall-through */ |
||||
default: |
||||
CHARACTER: |
||||
newNode.type = REGEX_NODE_TYPE_CHARACTER; |
||||
newNode.u.a.ch = ch; |
||||
push(context, &newNode); |
||||
break; |
||||
case '.': |
||||
newNode.type = REGEX_NODE_TYPE_ANY_CHARACTER; |
||||
push(context, &newNode); |
||||
break; |
||||
case '[': |
||||
if (!parse_char_class(context)) |
||||
return NULL; |
||||
break; |
||||
|
||||
/* Composites */ |
||||
case '|': { |
||||
cregex_node_t *left = concatenate(context, bottom), *right; |
||||
if (!(right = parse_context(context, depth))) |
||||
return NULL; |
||||
if (left->type == REGEX_NODE_TYPE_EPSILON && |
||||
right->type == left->type) { |
||||
drop(context); |
||||
} else if (left->type == REGEX_NODE_TYPE_EPSILON) { |
||||
right = consume(context); |
||||
drop(context); |
||||
newNode.type = REGEX_NODE_TYPE_QUANTIFIER; |
||||
newNode.u.c.nmin = 0; |
||||
newNode.u.c.nmax = 1; |
||||
newNode.u.c.greedy = 1; |
||||
newNode.u.c.quantified = right; |
||||
push(context, &newNode); |
||||
} else if (right->type == REGEX_NODE_TYPE_EPSILON) { |
||||
drop(context); |
||||
left = consume(context); |
||||
newNode.type = REGEX_NODE_TYPE_QUANTIFIER; |
||||
newNode.u.c.nmin = 0; |
||||
newNode.u.c.nmax = 1; |
||||
newNode.u.c.greedy = 1; |
||||
newNode.u.c.quantified = left; |
||||
push(context, &newNode); |
||||
} else { |
||||
right = consume(context); |
||||
left = consume(context); |
||||
newNode.type = REGEX_NODE_TYPE_ALTERNATION; |
||||
newNode.u.d.left = left; |
||||
newNode.u.d.right = right; |
||||
push(context, &newNode); |
||||
} |
||||
return bottom; |
||||
} |
||||
|
||||
#define QUANTIFIER(ch, min, max) \ |
||||
case ch: \
|
||||
if (context->stack == bottom) { \
|
||||
goto CHARACTER; \
|
||||
} else { \
|
||||
newNode.type = REGEX_NODE_TYPE_QUANTIFIER; \
|
||||
newNode.u.c.nmin = min; \
|
||||
newNode.u.c.nmax = max; \
|
||||
newNode.u.c.greedy = (*context->sp == '?') ? (++context->sp, 0) : 1; \
|
||||
newNode.u.c.quantified = consume(context); \
|
||||
push(context, &newNode); \
|
||||
} \
|
||||
break
|
||||
// END-QUANTIFIER
|
||||
/* clang-format off */ |
||||
/* Quantifiers */ |
||||
QUANTIFIER('?', 0, 1); |
||||
QUANTIFIER('*', 0, -1); |
||||
QUANTIFIER('+', 1, -1); |
||||
/* clang-format on */ |
||||
#undef QUANTIFIER |
||||
|
||||
case '{': |
||||
if ((context->stack == bottom) || !parse_interval(context)) |
||||
goto CHARACTER; |
||||
break; |
||||
|
||||
/* Anchors */ |
||||
case '^': |
||||
newNode.type = REGEX_NODE_TYPE_ANCHOR_BEGIN; |
||||
push(context, |
||||
&newNode); |
||||
break; |
||||
case '$': |
||||
newNode.type = REGEX_NODE_TYPE_ANCHOR_END; |
||||
push(context, |
||||
&newNode); |
||||
break; |
||||
|
||||
/* Captures */ |
||||
case '(': |
||||
if (!parse_context(context, depth + 1)) { |
||||
return NULL; |
||||
} else { |
||||
newNode.type = REGEX_NODE_TYPE_CAPTURE; |
||||
newNode.u.e.captured = consume(context); |
||||
push(context, &newNode); |
||||
} |
||||
break; |
||||
case ')': |
||||
if (depth > 0) |
||||
return concatenate(context, bottom); |
||||
/* unmatched close parenthesis */ |
||||
return NULL; |
||||
|
||||
/* End of string */ |
||||
case '\0': |
||||
if (depth == 0) |
||||
return concatenate(context, bottom); |
||||
/* unmatched open parenthesis */ |
||||
return NULL; |
||||
} |
||||
} |
||||
} |
||||
|
||||
STATIC INLINE LONG estimate_nodes(const char *pattern) |
||||
{ |
||||
return (LONG)(strlen(pattern) * 2); |
||||
} |
||||
|
||||
/* Parse a pattern (using a previously allocated buffer of at least
|
||||
* estimate_nodes(pattern) nodes). |
||||
*/ |
||||
STATIC cregex_node_t *parse_with_nodes(const char *pattern, |
||||
cregex_node_t *nodes) |
||||
{ |
||||
regex_parse_context context; |
||||
context.sp = pattern; |
||||
context.stack = nodes, |
||||
context.output = nodes + estimate_nodes(pattern); |
||||
return parse_context(&context, 0); |
||||
} |
||||
|
||||
cregex_node_t *cregex_parse(const char *pattern) |
||||
{ |
||||
size_t size = sizeof(cregex_node_t) * estimate_nodes(pattern); |
||||
cregex_node_t *nodes = AllocVec(size, MEMF_CLEAR); |
||||
// Printf("mallocing %ld bytes for parse\n", size);
|
||||
if (!nodes) |
||||
return NULL; |
||||
|
||||
if (!parse_with_nodes(pattern, nodes)) { |
||||
free(nodes); |
||||
return NULL; |
||||
} |
||||
|
||||
return nodes; |
||||
} |
||||
|
||||
VOID cregex_parse_free(cregex_node_t *root) |
||||
{ |
||||
FreeVec(root); |
||||
} |
@ -0,0 +1,229 @@ |
||||
#include <proto/exec.h> |
||||
#include <stdlib.h> |
||||
#include <string.h> |
||||
|
||||
#include "cregex.h" |
||||
|
||||
#define REGEX_VM_MAX_MATCHES 20 |
||||
|
||||
/* The VM executes one or more threads, each running a regular expression
|
||||
* program, which is just a list of regular expression instructions. Each |
||||
* thread maintains two registers while it runs: a program counter (PC) and |
||||
* a string pointer (SP). |
||||
*/ |
||||
typedef struct { |
||||
LONG visited; |
||||
const cregex_program_instr_t *pc; |
||||
const char *matches[REGEX_VM_MAX_MATCHES]; |
||||
} vm_thread; |
||||
|
||||
/* Run program on string */ |
||||
STATIC LONG vm_run(const cregex_program_t *program, |
||||
const char *string, |
||||
const char **matches, |
||||
LONG nmatches); |
||||
|
||||
/* Run program on string (using a previously allocated buffer of at least
|
||||
* vm_estimate_threads(program) threads) |
||||
*/ |
||||
STATIC LONG vm_run_with_threads(const cregex_program_t *program, |
||||
const char *string, |
||||
const char **matches, |
||||
LONG nmatches, |
||||
vm_thread *threads); |
||||
|
||||
typedef struct { |
||||
LONG nthreads; |
||||
vm_thread *threads; |
||||
} vm_thread_list; |
||||
|
||||
STATIC VOID vm_add_thread(vm_thread_list *list, |
||||
const cregex_program_t *program, |
||||
const cregex_program_instr_t *pc, |
||||
const char *string, |
||||
const char *sp, |
||||
const char **matches, |
||||
LONG nmatches) |
||||
{ |
||||
if (list->threads[pc - program->instructions].visited == sp - string + 1) |
||||
return; |
||||
list->threads[pc - program->instructions].visited = sp - string + 1; |
||||
|
||||
switch (pc->opcode) { |
||||
case REGEX_PROGRAM_OPCODE_MATCH: |
||||
/* fall-through */ |
||||
|
||||
/* Characters */ |
||||
case REGEX_PROGRAM_OPCODE_CHARACTER: |
||||
case REGEX_PROGRAM_OPCODE_ANY_CHARACTER: |
||||
case REGEX_PROGRAM_OPCODE_CHCLS: |
||||
case REGEX_PROGRAM_OPCODE_CHCLS_NEGATED: |
||||
list->threads[list->nthreads].pc = pc; |
||||
memcpy((char*)list->threads[list->nthreads].matches, |
||||
(char*)matches, |
||||
sizeof(matches[0]) * ((nmatches <= REGEX_VM_MAX_MATCHES) |
||||
? nmatches |
||||
: REGEX_VM_MAX_MATCHES)); |
||||
++list->nthreads; |
||||
break; |
||||
|
||||
/* Control-flow */ |
||||
case REGEX_PROGRAM_OPCODE_SPLIT: |
||||
vm_add_thread(list, program, pc->u.c.first, string, sp, matches, nmatches); |
||||
vm_add_thread(list, program, pc->u.c.second, string, sp, matches, nmatches); |
||||
break; |
||||
case REGEX_PROGRAM_OPCODE_JUMP: |
||||
vm_add_thread(list, program, pc->u.d.target, string, sp, matches, nmatches); |
||||
break; |
||||
|
||||
/* Assertions */ |
||||
case REGEX_PROGRAM_OPCODE_ASSERT_BEGIN: |
||||
if (sp == string) |
||||
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches); |
||||
break; |
||||
case REGEX_PROGRAM_OPCODE_ASSERT_END: |
||||
if (!*sp) |
||||
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches); |
||||
break; |
||||
|
||||
/* Saving */ |
||||
case REGEX_PROGRAM_OPCODE_SAVE: |
||||
if (pc->u.e.save < nmatches && pc->u.e.save < REGEX_VM_MAX_MATCHES) { |
||||
const char *saved = matches[pc->u.e.save]; |
||||
matches[pc->u.e.save] = sp; |
||||
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches); |
||||
matches[pc->u.e.save] = saved; |
||||
} else { |
||||
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches); |
||||
} |
||||
break; |
||||
} |
||||
} |
||||
|
||||
/* Upper bound of number of threads required to run program */ |
||||
STATIC LONG vm_estimate_threads(const cregex_program_t *program) |
||||
{ |
||||
return program->ninstructions * 2; |
||||
} |
||||
|
||||
STATIC LONG vm_run(const cregex_program_t *program, |
||||
const char *string, |
||||
const char **matches, |
||||
LONG nmatches) |
||||
{ |
||||
size_t size = sizeof(vm_thread) * vm_estimate_threads(program); |
||||
vm_thread *threads; |
||||
LONG matched; |
||||
|
||||
if (!(threads = AllocVec(size, MEMF_CLEAR))) |
||||
{ |
||||
return -1; |
||||
} |
||||
|
||||
matched = vm_run_with_threads(program, string, matches, nmatches, threads); |
||||
FreeVec(threads); |
||||
return matched; |
||||
} |
||||
|
||||
STATIC LONG vm_run_with_threads(const cregex_program_t *program, |
||||
const char *string, |
||||
const char **matches, |
||||
LONG nmatches, |
||||
vm_thread *threads) |
||||
{ |
||||
vm_thread_list currentList; |
||||
vm_thread_list* current; |
||||
vm_thread_list nextList; |
||||
vm_thread_list* next; |
||||
vm_thread_list* swap = NULL; |
||||
LONG matched = 0; |
||||
const char *sp = NULL; |
||||
LONG i = 0; |
||||
|
||||
memset(¤tList, 0, sizeof(vm_thread_list)); |
||||
currentList.nthreads = 0; |
||||
currentList.threads = threads; |
||||
current = ¤tList; |
||||
|
||||
memset(&nextList, 0, sizeof(vm_thread_list)); |
||||
nextList.nthreads = 0; |
||||
nextList.threads = threads + program->ninstructions; |
||||
next = &nextList; |
||||
|
||||
|
||||
memset(threads, 0, sizeof(vm_thread) * program->ninstructions * 2); |
||||
|
||||
vm_add_thread(current, program, program->instructions, string, string, |
||||
matches, nmatches); |
||||
for (sp = string;; ++sp) { |
||||
for (i = 0; i < current->nthreads; ++i) { |
||||
vm_thread *thread = current->threads + i; |
||||
switch (thread->pc->opcode) { |
||||
case REGEX_PROGRAM_OPCODE_MATCH: |
||||
matched = 1; |
||||
current->nthreads = 0; |
||||
memcpy((char*)matches, (char*)thread->matches, |
||||
sizeof(matches[0]) * ((nmatches <= REGEX_VM_MAX_MATCHES) |
||||
? nmatches |
||||
: REGEX_VM_MAX_MATCHES)); |
||||
continue; |
||||
|
||||
/* Characters */ |
||||
case REGEX_PROGRAM_OPCODE_CHARACTER: |
||||
if (*sp == thread->pc->u.a.ch) |
||||
break; |
||||
continue; |
||||
case REGEX_PROGRAM_OPCODE_ANY_CHARACTER: |
||||
if (*sp) |
||||
break; |
||||
continue; |
||||
case REGEX_PROGRAM_OPCODE_CHCLS: |
||||
if (cregex_char_class_contains(thread->pc->u.b.klass, *sp)) |
||||
break; |
||||
continue; |
||||
case REGEX_PROGRAM_OPCODE_CHCLS_NEGATED: |
||||
if (!cregex_char_class_contains(thread->pc->u.b.klass, *sp)) |
||||
break; |
||||
continue; |
||||
|
||||
/* Control-flow */ |
||||
case REGEX_PROGRAM_OPCODE_SPLIT: |
||||
case REGEX_PROGRAM_OPCODE_JUMP: |
||||
/* fall-through */ |
||||
|
||||
/* Assertions */ |
||||
case REGEX_PROGRAM_OPCODE_ASSERT_BEGIN: |
||||
case REGEX_PROGRAM_OPCODE_ASSERT_END: |
||||
/* fall-through */ |
||||
|
||||
/* Saving */ |
||||
case REGEX_PROGRAM_OPCODE_SAVE: |
||||
/* handled in vm_add_thread() */ |
||||
abort(); |
||||
} |
||||
|
||||
vm_add_thread(next, program, thread->pc + 1, string, sp + 1, |
||||
thread->matches, nmatches); |
||||
} |
||||
|
||||
/* swap current and next thread list */ |
||||
swap = current; |
||||
current = next; |
||||
next = swap; |
||||
next->nthreads = 0; |
||||
|
||||
/* done if no more threads are running or end of string reached */ |
||||
if (current->nthreads == 0 || !*sp) |
||||
break; |
||||
} |
||||
|
||||
return matched; |
||||
} |
||||
|
||||
LONG cregex_program_run(const cregex_program_t *program, |
||||
const char *string, |
||||
const char **matches, |
||||
LONG nmatches) |
||||
{ |
||||
return vm_run(program, string, matches, nmatches); |
||||
} |
@ -0,0 +1,39 @@ |
||||
#include <exec/types.h> |
||||
#include <proto/exec.h> |
||||
#include <proto/containerkit.h> |
||||
#include "linearray.h" |
||||
|
||||
// -----------------------------
|
||||
|
||||
|
||||
// -----------------------------
|
||||
|
||||
|
||||
LineArray LineArrayNew(VOID) |
||||
{ |
||||
#define SIZE_LINEPTR 2 |
||||
return NewArray(SIZE_LINEPTR); |
||||
} |
||||
|
||||
VOID LineArrayAppend(LineArray array, LINEPTR value) |
||||
{ |
||||
AppendToArray(LINEPTR, array, value);
|
||||
} |
||||
|
||||
VOID LineArrayFree(LineArray array) |
||||
{ |
||||
ArrayForEach(LINEPTR, aLine, array, LineFree(aLine);); |
||||
DeleteArray(array); |
||||
} |
||||
|
||||
LINEPTR* LineArrayValues(LineArray array) |
||||
{ |
||||
return ArrayValues(LINEPTR, array); |
||||
} |
||||
|
||||
|
||||
// -----------------------------------------------
|
||||
// -----------------------------------------------
|
||||
|
||||
|
||||
|
@ -0,0 +1,17 @@ |
||||
#ifndef __LINEARRAY_H |
||||
#define __LINEARRAY_H |
||||
|
||||
#include <exec/types.h> |
||||
#include <proto/containerkit.h> |
||||
#include <dos/dos.h> |
||||
#include "configmodel.h" |
||||
|
||||
#define LineArray Array |
||||
|
||||
LineArray LineArrayNew(VOID);
|
||||
VOID LineArrayAppend(LineArray array, LINEPTR value); |
||||
VOID LineArrayFree(LineArray array); |
||||
LINEPTR* LineArrayValues(LineArray array); |
||||
|
||||
#endif |
||||
|
@ -0,0 +1,85 @@ |
||||
// Printf("running (%ld bytes avail)\n", AvailMem(0));
|
||||
#define __CLIB_PRAGMA_LIBCALL |
||||
#include <proto/exec.h> |
||||
#include <proto/dos.h> |
||||
#define __NOLIBBASE__ |
||||
#include "stringarray.h" |
||||
#include "linearray.h" |
||||
#include "configmodel.h" |
||||
#include <proto/containerkit.h> |
||||
|
||||
#include "cregex.h" |
||||
|
||||
#define ZERO ((BPTR)0) |
||||
|
||||
WORD DoTheWork(STRPTR filename); |
||||
VOID ProcessFile(BPTR configFile); |
||||
|
||||
char *vers="\0$VER: ConfigReader (dd.mm.yyyy)"; |
||||
char *stacksize = "$STACK:8192"; // only works when started from CLI
|
||||
|
||||
struct Library *ContainerkitBase; |
||||
|
||||
VOID ProcessFile(BPTR configFile) |
||||
{ |
||||
Array lineArray = NULL; |
||||
LINEPTR line = NULL;
|
||||
|
||||
InitialisePatterns(); |
||||
lineArray = LineArrayNew(); |
||||
line = LineReadIncludingContinuation(configFile); |
||||
while( line != NULL ) |
||||
{ |
||||
// Printf("successfully read line {%s}\n", LineGetRawText(line));
|
||||
LineArrayAppend(lineArray, line); |
||||
line = LineReadIncludingContinuation(configFile); |
||||
} |
||||
LineArrayFree(lineArray); |
||||
ReleasePatterns(); |
||||
} |
||||
|
||||
WORD DoTheWork(STRPTR filename) |
||||
{ |
||||
WORD result = RETURN_OK; |
||||
if (ContainerkitBase) |
||||
{ |
||||
BPTR configFile = ZERO; |
||||
configFile = Open(filename, MODE_OLDFILE); |
||||
if( configFile != ZERO ) |
||||
{ |
||||
ProcessFile(configFile); |
||||
Close(configFile); |
||||
} |
||||
else |
||||
{ |
||||
Printf("file open failed!\n"); |
||||
result = RETURN_ERROR; |
||||
} |
||||
} |
||||
return result; |
||||
} |
||||
|
||||
WORD main(WORD argc, STRPTR *argv) |
||||
{ |
||||
WORD result = RETURN_OK; |
||||
|
||||
// this does nothing but the first call to Print drops a bunch of memory,
|
||||
// I assume because of opening some resource so this means my start and
|
||||
// end markers are "clean" and I can ensure I'm not leaking.
|
||||
Printf("\n");
|
||||
|
||||
ContainerkitBase = OpenLibrary("containerkit.library", 1); |
||||
if( ContainerkitBase ) |
||||
{ |
||||
Printf("\n\nrunning (%ld bytes avail)\n\n", AvailMem(0));
|
||||
result = DoTheWork(argv[1]); |
||||
Printf("\n\ndone (%ld bytes avail)\n\n", AvailMem(0)); |
||||
CloseLibrary(ContainerkitBase); |
||||
}
|
||||
else
|
||||
{ |
||||
Printf("failed to open library\n"); |
||||
result = RETURN_ERROR; |
||||
} |
||||
return result;
|
||||
} |
@ -0,0 +1,34 @@ |
||||
# |
||||
# :ts=8 |
||||
# |
||||
|
||||
############################################################################### |
||||
|
||||
NAME = configreader |
||||
|
||||
LFLAGS = addsym smallcode smalldata noicons batch |
||||
LIBS = lib:sc.lib lib:amiga.lib lib:debug.lib |
||||
|
||||
############################################################################### |
||||
|
||||
$(NAME) : main.o configmodel.o cregex.lib arraytypes.lib |
||||
slink lib:c.o main.o configmodel.o to $(NAME) noicons lib $(LIBS) cregex.lib arraytypes.lib $(LFLAGS) |
||||
|
||||
cregex.lib : cregex_compile.o cregex_parse.o cregex_vm.o |
||||
JOIN cregex_compile.o cregex_parse.o cregex_vm.o AS cregex.lib |
||||
|
||||
arraytypes.lib : stringarray.o linearray.o |
||||
JOIN stringarray.o linearray.o AS arraytypes.lib |
||||
|
||||
clean: |
||||
delete \#?.o \#?.lib $(NAME) ALL |
||||
|
||||
############################################################################### |
||||
|
||||
main.o : main.c stringarray.h |
||||
stringarray.o : stringarray.c stringarray.h |
||||
linearray.o : linearray.c linearray.h |
||||
configmodel.o : configmodel.c configmodel.h |
||||
cregex_compile.o : cregex_compile.c cregex.h |
||||
cregex_parse.o : cregex_parse.c cregex.h |
||||
cregex_vm.o : cregex_vm.c cregex.h |
@ -0,0 +1,35 @@ |
||||
#include "stringarray.h" |
||||
#include <proto/exec.h> |
||||
#include <string.h> |
||||
|
||||
Array StringArrayNew(VOID)
|
||||
{ |
||||
#define SIZE_STRPTR 2 |
||||
return NewArray(SIZE_STRPTR); |
||||
} |
||||
|
||||
VOID StringArrayAppend(Array array, CONST_STRPTR value) |
||||
{ |
||||
AppendToArray(CONST_STRPTR, array, value);
|
||||
} |
||||
|
||||
VOID StringArrayAppendAndRetain(Array array, CONST_STRPTR value) |
||||
{ |
||||
STRPTR localCopy = AllocVec(strlen(value)+1, MEMF_CLEAR); |
||||
CopyMem(value, localCopy, strlen(value)); |
||||
StringArrayAppend(array, localCopy);
|
||||
} |
||||
|
||||
VOID StringArrayFree(Array array) |
||||
{ |
||||
if( array != NULL ) |
||||
{ |
||||
StringArrayForEach(array, FreeVec(aString);); |
||||
DeleteArray(array); |
||||
} |
||||
} |
||||
|
||||
CONST_STRPTR* StringArrayValues(Array array) |
||||
{ |
||||
return ArrayValues(CONST_STRPTR, array); |
||||
} |
@ -0,0 +1,18 @@ |
||||
#ifndef __STRINGARRAY_H |
||||
#define __STRINGARRAY_H |
||||
|
||||
#include <exec/types.h> |
||||
#include <proto/containerkit.h> |
||||
|
||||
#define StringArray Array |
||||
|
||||
StringArray StringArrayNew(VOID);
|
||||
VOID StringArrayAppend(StringArray array, CONST_STRPTR value); |
||||
VOID StringArrayAppendAndRetain(StringArray array, CONST_STRPTR value); |
||||
VOID StringArrayFree(StringArray array); |
||||
CONST_STRPTR* StringArrayValues(StringArray array); |
||||
|
||||
#define StringArrayForEach(array, block) do {STRPTR *afe_123_p = (*(STRPTR **)array); STRPTR aString = *afe_123_p; int afe_123_c = (((ULONG *)array)[1]);\ |
||||
for (; afe_123_c--; aString = *(++afe_123_p)) block} while (0); |
||||
|
||||
#endif |
@ -0,0 +1,22 @@ |
||||
#this is a comment |
||||
[core] |
||||
repositoryformatversion = 0 |
||||
filemode = true |
||||
bare = false |
||||
logallrefupdates = true |
||||
ignorecase = true |
||||
precomposeunicode = true |
||||
|
||||
[remote "origin.foo"] #this is also a coment |
||||
url = git@git.alancfrancis.com:acf/AmigaGit2.git |
||||
fetch = +refs/heads/*:refs/remotes/origin/* |
||||
|
||||
[branch "main"] |
||||
remote = origin |
||||
merge = refs/heads/main |
||||
|
||||
[branch "config-file-parsing-from-book"] |
||||
remote = origin |
||||
merge = refs/heads/config-file-parsing-from-book |
||||
somekey = Alan Francis |
||||
|
Loading…
Reference in new issue