Initial Commit

main
Alan Francis 1 year ago
commit 4efeabf70b
  1. 6
      .gitignore
  2. 234
      configmodel.c
  3. 16
      configmodel.h
  4. 147
      cregex.h
  5. 351
      cregex_compile.c
  6. 308
      cregex_parse.c
  7. 229
      cregex_vm.c
  8. 39
      linearray.c
  9. 17
      linearray.h
  10. 85
      main.c
  11. 34
      smakefile
  12. 35
      stringarray.c
  13. 18
      stringarray.h
  14. 22
      testconfig.cfg

6
.gitignore vendored

@ -0,0 +1,6 @@
*.o
*.lib
*.uaem
configreader
.DS_Store

@ -0,0 +1,234 @@
#include "configmodel.h"
#include "cregex.h"
#include "stringarray.h"
#include <proto/exec.h>
#include <proto/dos.h>
#include <string.h>
#include <stdio.h>
cregex_program_t* InitialisePattern(CONST_STRPTR pattern);
Array RunPattern(CONST_STRPTR text, cregex_program_t* patternProgram);
#define WHITESPACE "[ \\t\\n\\r\\f\\v]"
#define RX_BLANK_LINE "^[ \t\n\r\f\v]*($|#|;)"
#define RX_SECTION_LINE "^[ \t\n\r\f\v]*\\[([a-z0-9]+)([ \t\n\r\f\v]*\"(.+)\")*\\][ \t\n\r\f\v]*($|#|;)"
#define RX_VARIABLE_LINE "^[ \t\n\r\f\v]*([a-z][a-z0-9]+)[ \t\n\r\f\v]*=[ \t\n\r\f\v]*(.+)[ \t\n\r\f\v]*($|#|;)"
#define RX_INTEGER "^-?[1-9][0-9]*$"
STATIC cregex_program_t* sectionPatternProgram = NULL;
STATIC cregex_program_t* variablePatternProgram = NULL;
STATIC cregex_program_t* blankPatternProgram = NULL;
STATIC cregex_program_t* integerPatternProgram = NULL;
struct Section
{
StringArray names;
};
enum VariableType
{
TypeBool=0,
TypeInteger=1,
TypeString=2,
};
struct Variable
{
enum VariableType type;
CONST_STRPTR name;
union
{
CONST_STRPTR stringValue;
BOOL boolValue;
LONG longValue;
} value;
};
struct Line
{
STRPTR rawText;
union
{
struct Variable* variable;
struct Section* section;
}
object;
};
VOID InitialisePatterns(VOID)
{
sectionPatternProgram = InitialisePattern(RX_SECTION_LINE);
variablePatternProgram = InitialisePattern(RX_VARIABLE_LINE);
blankPatternProgram = InitialisePattern(RX_BLANK_LINE);
integerPatternProgram = InitialisePattern(RX_INTEGER);
}
VOID ReleasePatterns(VOID)
{
if( sectionPatternProgram != NULL ) cregex_compile_free( sectionPatternProgram );
if( variablePatternProgram != NULL ) cregex_compile_free( variablePatternProgram );
if( blankPatternProgram != NULL ) cregex_compile_free( blankPatternProgram );
if( integerPatternProgram != NULL ) cregex_compile_free( integerPatternProgram );
}
cregex_program_t* InitialisePattern(CONST_STRPTR pattern)
{
cregex_program_t* result = NULL;
cregex_node_t* patternNode = cregex_parse(pattern);
if( patternNode )
{
result = cregex_compile_node( patternNode );
if( result != NULL )
{
//Printf("successfully compiled %s\n", pattern);
}
else
{
Printf("failed to compile %s\n", pattern);
}
cregex_parse_free( patternNode );
}
else
{
Printf("could not parse %s\n", pattern);
}
return result;
}
Array RunPattern(CONST_STRPTR text, cregex_program_t* patternProgram)
{
Array result = NULL;
char* localMatches[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
if (cregex_program_run(patternProgram, text, localMatches, 20) > 0) {
int j = 0;
int nmatches = 0;
// count the matches
for (j = 0; j < 20; ++j)
if (localMatches[j])
nmatches = j;
if( nmatches > 0 )
{
result = StringArrayNew();
// loop the matches
for (j = 0; j <= nmatches; j += 2) {
if (localMatches[j] && localMatches[j + 1]) {
int len = (int)(localMatches[j + 1] - localMatches[j]);
STRPTR buffer = AllocVec(len+1, MEMF_CLEAR); // freed in the array
sprintf(buffer, "%.*s", len, localMatches[j]);
if( buffer[len-1] == '\n' )
{
buffer[len-1] = '\0';
}
StringArrayAppend(result, buffer);
} else {
//Printf("(NULL,NULL)\n");
}
}
}
// end
} else {
//Printf("\"%s\": no match\n", text);
}
return result;
}
LINEPTR LineReadIncludingContinuation(BPTR file)
{
UBYTE* buffer = AllocVec(512, MEMF_CLEAR);
ULONG bufLength = 512;
ULONG bytesReadTotal = 0;
UBYTE* read = NULL;
struct Line* result = NULL;
do
{
read = FGets(file, &(buffer[bytesReadTotal]), bufLength-bytesReadTotal);
bytesReadTotal = strlen(buffer);
}
while( read != NULL && bytesReadTotal >= 2 && bytesReadTotal < bufLength && buffer[bytesReadTotal-1] == '\n' && buffer[bytesReadTotal-2] == '\\' );
if( bytesReadTotal > 0 )
{
StringArray stringArray = NULL;
result = AllocVec(sizeof(struct Line), MEMF_CLEAR);
result->rawText = AllocVec(bytesReadTotal+1, MEMF_CLEAR);
CopyMem(buffer, result->rawText, bytesReadTotal-1);
// Printf("\n\nraw text = {{{%s}}}\n", result->rawText); //
stringArray = RunPattern(result->rawText, sectionPatternProgram);
if( stringArray )
{
if( SizeOfArray(stringArray) == 3 )
{
Printf("\nsection {%s}\n", StringArrayValues(stringArray)[1]);
}
else if( SizeOfArray(stringArray) == 5 )
{
Printf("\nsection {%s.%s}\n", StringArrayValues(stringArray)[1], StringArrayValues(stringArray)[3]);
}
// Printf("\nsection size=%ld\n", SizeOfArray(stringArray));
// StringArrayForEach(stringArray, Printf("{{%s}}",aString););
StringArrayFree(stringArray);
}
else
{
stringArray = RunPattern(result->rawText, variablePatternProgram);
if( stringArray )
{
Printf("\nvariable {%s} = {%s}\n", StringArrayValues(stringArray)[1], StringArrayValues(stringArray)[2]);
// StringArrayForEach(stringArray, Printf("{{%s}}",aString););
StringArrayFree(stringArray);
}
else
{
stringArray = RunPattern(result->rawText, blankPatternProgram);
if( stringArray )
{
Printf("\nYY %s\n", result->rawText);
StringArrayFree(stringArray);
}
else
{
Printf("\nXX %s\n", result->rawText);
}
}
}
}
FreeVec(buffer);
return result;
}
VOID LineFree(LINEPTR abstractLine)
{
struct Line* line = (struct Line*)abstractLine;
if( line != NULL )
{
if( line->rawText != NULL )
{
FreeVec(line->rawText);
}
FreeVec(line);
}
}
CONST_STRPTR LineGetRawText(LINEPTR abstractLine)
{
struct Line* line = (struct Line*)abstractLine;
if( line != NULL )
{
return (CONST_STRPTR)line->rawText;
}
else
{
return NULL;
}
}

@ -0,0 +1,16 @@
#ifndef __CONFIGMODEL_H
#define __CONFIGMODEL_H
#include <exec/types.h>
#include <dos/dos.h>
typedef APTR LINEPTR;
typedef APTR SECTIONPTR;
typedef APTR VARIABLEPTR;
VOID InitialisePatterns(VOID);
VOID ReleasePatterns(VOID);
LINEPTR LineReadIncludingContinuation(BPTR file);
VOID LineFree(LINEPTR abstractLine);
CONST_STRPTR LineGetRawText(LINEPTR line);
#endif

@ -0,0 +1,147 @@
#ifndef CREGEX_H
#define CREGEX_H
#include <exec/types.h>
#if defined(__SASC)
#define INLINE __inline
#elif defined(__VBCC__)
#define INLINE inline
#else
#define INLINE
#endif
typedef enum {
REGEX_NODE_TYPE_EPSILON = 0,
/* Characters */
REGEX_NODE_TYPE_CHARACTER,
REGEX_NODE_TYPE_ANY_CHARACTER,
REGEX_NODE_TYPE_CHARACTER_CLASS,
REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED,
/* Composites */
REGEX_NODE_TYPE_CONCATENATION,
REGEX_NODE_TYPE_ALTERNATION,
/* Quantifiers */
REGEX_NODE_TYPE_QUANTIFIER,
/* Anchors */
REGEX_NODE_TYPE_ANCHOR_BEGIN,
REGEX_NODE_TYPE_ANCHOR_END,
/* Captures */
REGEX_NODE_TYPE_CAPTURE
} cregex_node_type;
typedef struct cregex_node {
cregex_node_type type;
union {
/* REGEX_NODE_TYPE_CHARACTER */
struct {
LONG ch;
} a;
/* REGEX_NODE_TYPE_CHARACTER_CLASS,
* REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED
*/
struct {
const char *from, *to;
} b;
/* REGEX_NODE_TYPE_QUANTIFIER */
struct {
LONG nmin, nmax, greedy;
struct cregex_node *quantified;
} c;
/* REGEX_NODE_TYPE_CONCATENATION,
* REGEX_NODE_TYPE_ALTERNATION
*/
struct {
struct cregex_node *left, *right;
} d;
/* REGEX_NODE_TYPE_CAPTURE */
struct {
struct cregex_node *captured;
} e;
} u;
} cregex_node_t;
typedef enum {
REGEX_PROGRAM_OPCODE_MATCH = 0,
/* Characters */
REGEX_PROGRAM_OPCODE_CHARACTER,
REGEX_PROGRAM_OPCODE_ANY_CHARACTER,
REGEX_PROGRAM_OPCODE_CHCLS,
REGEX_PROGRAM_OPCODE_CHCLS_NEGATED,
/* Control-flow */
REGEX_PROGRAM_OPCODE_SPLIT,
REGEX_PROGRAM_OPCODE_JUMP,
/* Assertions */
REGEX_PROGRAM_OPCODE_ASSERT_BEGIN,
REGEX_PROGRAM_OPCODE_ASSERT_END,
/* Saving */
REGEX_PROGRAM_OPCODE_SAVE
} cregex_program_opcode_t;
#include <limits.h>
typedef char cregex_char_class[(UCHAR_MAX + CHAR_BIT - 1) / CHAR_BIT];
STATIC INLINE LONG cregex_char_class_contains(const cregex_char_class klass,
LONG ch)
{
return klass[ch / CHAR_BIT] & (1 << ch % CHAR_BIT);
}
STATIC INLINE LONG cregex_char_class_add(cregex_char_class klass, LONG ch)
{
klass[ch / CHAR_BIT] |= 1 << (ch % CHAR_BIT);
return ch;
}
typedef struct cregex_program_instr {
cregex_program_opcode_t opcode;
union {
/* REGEX_PROGRAM_OPCODE_CHARACTER */
struct {
LONG ch;
} a;
/* REGEX_PROGRAM_OPCODE_CHCLS,
* REGEX_PROGRAM_OPCODE_CHCLS_NEGATED
*/
struct {
cregex_char_class klass;
} b;
/* REGEX_PROGRAM_OPCODE_SPLIT */
struct {
struct cregex_program_instr *first, *second;
} c;
/* REGEX_PROGRAM_OPCODE_JUMP */
struct {
struct cregex_program_instr *target;
} d;
/* REGEX_PROGRAM_OPCODE_SAVE */
struct {
LONG save;
} e;
} u;
} cregex_program_instr_t;
typedef struct {
LONG ninstructions;
cregex_program_instr_t instructions[1];
} cregex_program_t;
/* Run program on string */
LONG cregex_program_run(const cregex_program_t *program,
const char *string,
const char **matches,
LONG nmatches);
/* Compile a parsed pattern */
cregex_program_t *cregex_compile_node(const cregex_node_t *root);
/* Free a compiled program */
VOID cregex_compile_free(cregex_program_t *program);
/* Parse a pattern */
cregex_node_t *cregex_parse(const char *pattern);
/* Free a parsed pattern */
VOID cregex_parse_free(cregex_node_t *root);
#endif

@ -0,0 +1,351 @@
#include <proto/exec.h>
#include <stdlib.h>
#include "cregex.h"
#include <string.h>
typedef struct {
cregex_program_instr_t *pc;
LONG ncaptures;
} regex_compile_context;
STATIC LONG count_instructions(const cregex_node_t *node)
{
switch (node->type) {
case REGEX_NODE_TYPE_EPSILON:
return 0;
/* Characters */
case REGEX_NODE_TYPE_CHARACTER:
case REGEX_NODE_TYPE_ANY_CHARACTER:
case REGEX_NODE_TYPE_CHARACTER_CLASS:
case REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED:
return 1;
/* Composites */
case REGEX_NODE_TYPE_CONCATENATION:
return count_instructions(node->u.d.left) + count_instructions(node->u.d.right);
case REGEX_NODE_TYPE_ALTERNATION:
return 2 +
count_instructions(node->u.d.left) +
count_instructions(node->u.d.right);
/* Quantifiers */
case REGEX_NODE_TYPE_QUANTIFIER: {
LONG num = count_instructions(node->u.c.quantified);
if (node->u.c.nmax >= node->u.c.nmin)
{
return node->u.c.nmin * num + (node->u.c.nmax - node->u.c.nmin) * (num + 1);
}
else
{
return 1 + (node->u.c.nmin ? node->u.c.nmin * num : num + 1);
}
}
/* Anchors */
case REGEX_NODE_TYPE_ANCHOR_BEGIN:
case REGEX_NODE_TYPE_ANCHOR_END:
return 1;
/* Captures */
case REGEX_NODE_TYPE_CAPTURE:
return 2 + count_instructions(node->u.e.captured);
}
/* should not reach here */
return 0;
}
STATIC BOOL node_is_anchored(const cregex_node_t *node)
{
switch (node->type) {
case REGEX_NODE_TYPE_EPSILON:
return FALSE;
/* Characters */
case REGEX_NODE_TYPE_CHARACTER:
case REGEX_NODE_TYPE_ANY_CHARACTER:
case REGEX_NODE_TYPE_CHARACTER_CLASS:
case REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED:
return FALSE;
/* Composites */
case REGEX_NODE_TYPE_CONCATENATION:
return node_is_anchored(node->u.d.left);
case REGEX_NODE_TYPE_ALTERNATION:
return (BOOL)(node_is_anchored(node->u.d.left) && node_is_anchored(node->u.d.right));
/* Quantifiers */
case REGEX_NODE_TYPE_QUANTIFIER:
return node_is_anchored(node->u.c.quantified);
/* Anchors */
case REGEX_NODE_TYPE_ANCHOR_BEGIN:
return TRUE;
case REGEX_NODE_TYPE_ANCHOR_END:
return FALSE;
/* Captures */
case REGEX_NODE_TYPE_CAPTURE:
return node_is_anchored(node->u.e.captured);
}
/* should not reach here */
return FALSE;
}
STATIC INLINE cregex_program_instr_t *emit(
regex_compile_context *context,
const cregex_program_instr_t *instruction)
{
*context->pc = *instruction;
return context->pc++;
}
STATIC cregex_program_instr_t *compile_char_class(
const cregex_node_t *node,
cregex_program_instr_t *instruction)
{
const char *sp = node->u.b.from;
for (;;) {
LONG ch = *sp++;
switch (ch) {
case ']':
if (sp - 1 == node->u.b.from)
goto CHARACTER;
return instruction;
case '\\':
ch = *sp++;
/* fall-through */
default:
CHARACTER:
if (*sp == '-' && sp[1] != ']') {
for (; ch <= sp[1]; ++ch)
cregex_char_class_add(instruction->u.b.klass, ch);
sp += 2;
} else {
cregex_char_class_add(instruction->u.b.klass, ch);
}
break;
}
}
}
STATIC cregex_program_instr_t *compile_context(regex_compile_context *context,
const cregex_node_t *node)
{
cregex_program_instr_t *bottom = context->pc, *split, *jump;
LONG ncaptures = context->ncaptures, capture;
cregex_program_instr_t newInstr;
memset(&newInstr, 0, sizeof(cregex_program_instr_t));
switch (node->type) {
case REGEX_NODE_TYPE_EPSILON:
break;
/* Characters */
case REGEX_NODE_TYPE_CHARACTER:
newInstr.opcode = REGEX_PROGRAM_OPCODE_CHARACTER;
newInstr.u.a.ch = node->u.a.ch;
emit(context, &newInstr);
break;
case REGEX_NODE_TYPE_ANY_CHARACTER:
newInstr.opcode = REGEX_PROGRAM_OPCODE_ANY_CHARACTER;
emit(context, &newInstr);
break;
case REGEX_NODE_TYPE_CHARACTER_CLASS:
newInstr.opcode = REGEX_PROGRAM_OPCODE_CHCLS;
compile_char_class( node, emit(context, &newInstr));
break;
case REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED:
newInstr.opcode = REGEX_PROGRAM_OPCODE_CHCLS_NEGATED;
compile_char_class( node, emit(context, &newInstr));
break;
/* Composites */
case REGEX_NODE_TYPE_CONCATENATION:
compile_context(context, node->u.d.left);
compile_context(context, node->u.d.right);
break;
case REGEX_NODE_TYPE_ALTERNATION: {
cregex_program_instr_t splitInstr;
cregex_program_instr_t jumpInstr;
memset(&splitInstr, 0, sizeof(cregex_program_instr_t));
memset(&jumpInstr, 0, sizeof(cregex_program_instr_t));
splitInstr.opcode = REGEX_PROGRAM_OPCODE_SPLIT;
jumpInstr.opcode = REGEX_PROGRAM_OPCODE_JUMP;
split = emit(context, &splitInstr);
split->u.c.first = compile_context(context, node->u.d.left);
jump = emit(context, &jumpInstr);
split->u.c.second = compile_context(context, node->u.d.right);
jump->u.d.target = context->pc;
}
break;
/* Quantifiers */
case REGEX_NODE_TYPE_QUANTIFIER: {
cregex_program_instr_t *last = NULL;
LONG i = 0;
for (i = 0; i < node->u.c.nmin; ++i) {
context->ncaptures = ncaptures;
last = compile_context(context, node->u.c.quantified);
}
if (node->u.c.nmax > node->u.c.nmin) {
for (i = 0; i < node->u.c.nmax - node->u.c.nmin; ++i) {
memset(&newInstr, 0, sizeof(cregex_program_instr_t));
newInstr.opcode = REGEX_PROGRAM_OPCODE_SPLIT;
context->ncaptures = ncaptures;
split = emit(context, &newInstr);
split->u.c.first = compile_context(context, node->u.c.quantified);
split->u.c.second = context->pc;
if (!node->u.c.greedy) {
cregex_program_instr_t *swap = split->u.c.first;
split->u.c.first = split->u.c.second;
split->u.c.second = swap;
}
}
} else if (node->u.c.nmax == -1) {
newInstr.opcode = REGEX_PROGRAM_OPCODE_SPLIT;
split = emit(context, &newInstr);
if (node->u.c.nmin == 0) {
split->u.c.first = compile_context(context, node->u.c.quantified);
newInstr.opcode = REGEX_PROGRAM_OPCODE_JUMP;
jump = emit(context, &newInstr);
split->u.c.second = context->pc;
jump->u.d.target = split;
} else {
split->u.c.first = last;
split->u.c.second = context->pc;
}
if (!node->u.c.greedy) {
cregex_program_instr_t *swap = split->u.c.first;
split->u.c.first = split->u.c.second;
split->u.c.second = swap;
}
}
break;
}
/* Anchors */
case REGEX_NODE_TYPE_ANCHOR_BEGIN:
newInstr.opcode = REGEX_PROGRAM_OPCODE_ASSERT_BEGIN;
emit(context, &newInstr);
break;
case REGEX_NODE_TYPE_ANCHOR_END:
newInstr.opcode = REGEX_PROGRAM_OPCODE_ASSERT_END;
emit(context, &newInstr);
break;
/* Captures */
case REGEX_NODE_TYPE_CAPTURE:
capture = context->ncaptures++ * 2;
newInstr.opcode = REGEX_PROGRAM_OPCODE_SAVE;
newInstr.u.e.save = capture;
emit(context,&newInstr);
compile_context(context, node->u.e.captured);
newInstr.u.e.save = capture + 1;
emit(context, &newInstr);
break;
}
return bottom;
}
/* Compile a parsed pattern (using a previously allocated program with at least
* estimate_instructions(root) instructions).
*/
STATIC cregex_program_t *compile_node_with_program(const cregex_node_t *root,
cregex_program_t *program)
{
regex_compile_context context;
cregex_node_t rootNode;
cregex_program_instr_t finalInstr;
memset(&rootNode, 0, sizeof(cregex_node_t));
rootNode.type = REGEX_NODE_TYPE_CAPTURE;
rootNode.u.e.captured = (cregex_node_t *)root;
/* add capture node for entire match */
root = &rootNode;
/* add .*? unless pattern starts with ^ */
if (!node_is_anchored(root))
{
cregex_node_t concatNode;
cregex_node_t quantifierNode;
cregex_node_t anyCharNode;
memset(&anyCharNode, 0, sizeof(cregex_node_t));
anyCharNode.type = REGEX_NODE_TYPE_ANY_CHARACTER;
memset(&quantifierNode, 0, sizeof(cregex_node_t));
quantifierNode.type = REGEX_NODE_TYPE_QUANTIFIER;
quantifierNode.u.c.nmin = 0;
quantifierNode.u.c.nmax = -1;
quantifierNode.u.c.greedy = 0;
quantifierNode.u.c.quantified = &anyCharNode;
memset(&concatNode, 0, sizeof(cregex_node_t));
concatNode.type = REGEX_NODE_TYPE_CONCATENATION;
concatNode.u.d.left = &quantifierNode;
concatNode.u.d.right = (cregex_node_t*)root;
root = &concatNode;
}
/* compile */
memset(&context, 0, sizeof(regex_compile_context));
context.pc = program->instructions;
context.ncaptures = 0;
compile_context(&context, root);
/* emit final match instruction */
memset(&finalInstr, 0, sizeof(cregex_program_instr_t));
finalInstr.opcode = REGEX_PROGRAM_OPCODE_MATCH;
emit(&context, &finalInstr);
/* set total number of instructions */
program->ninstructions = context.pc - program->instructions;
return program;
}
/* Upper bound of number of instructions required to compile parsed pattern. */
STATIC LONG estimate_instructions(const cregex_node_t *root)
{
return count_instructions(root)
/* .*? is added unless pattern starts with ^,
* save instructions are added for beginning and end of match,
* a final match instruction is added to the end of the program
*/
+ !node_is_anchored(root) * 3 + 2 + 1;
}
cregex_program_t *cregex_compile_node(const cregex_node_t *root)
{
size_t size = sizeof(cregex_program_t) +
sizeof(cregex_program_instr_t) * (estimate_instructions(root) - 1);
cregex_program_t *program;
if (!(program = AllocVec(size, MEMF_CLEAR)))
return NULL;
if (!compile_node_with_program(root, program)) {
free(program);
return NULL;
}
return program;
}
/* Free a compiled program */
VOID cregex_compile_free(cregex_program_t *program)
{
FreeVec(program);
}

@ -0,0 +1,308 @@
#include <proto/exec.h>
#include <proto/dos.h>
#include <stdlib.h>
#include <string.h>
#include "cregex.h"
typedef struct {
const char *sp;
cregex_node_t *stack, *output;
} regex_parse_context;
/* Shunting-yard algorithm
* See https://en.wikipedia.org/wiki/Shunting-yard_algorithm
*/
STATIC INLINE cregex_node_t *push(regex_parse_context *context,
const cregex_node_t *node)
{
//assert(context->stack <= context->output);
*context->stack = *node;
return context->stack++;
}
STATIC INLINE cregex_node_t *drop(regex_parse_context *context)
{
return --context->stack;
}
STATIC INLINE cregex_node_t *consume(regex_parse_context *context)
{
*--context->output = *--context->stack;
return context->output;
}
STATIC INLINE cregex_node_t *concatenate(regex_parse_context *context,
const cregex_node_t *bottom)
{
cregex_node_t newNode;
memset(&newNode, 0, sizeof(cregex_node_t));
if (context->stack == bottom) {
newNode.type = REGEX_NODE_TYPE_EPSILON;
push(context, &newNode);
}
else {
newNode.type = REGEX_NODE_TYPE_CONCATENATION;
while (context->stack - 1 > bottom) {
cregex_node_t *right = consume(context);
cregex_node_t *left = consume(context);
newNode.u.d.left = left;
newNode.u.d.right = right;
push(context, &newNode);
}
}
return context->stack - 1;
}
STATIC cregex_node_t *parse_char_class(regex_parse_context *context)
{
cregex_node_t newNode;
cregex_node_type type =
(*context->sp == '^')
? (++context->sp, REGEX_NODE_TYPE_CHARACTER_CLASS_NEGATED)
: REGEX_NODE_TYPE_CHARACTER_CLASS;
const char *from = context->sp;
for (;;) {
LONG ch = *context->sp++;
memset(&newNode, 0, sizeof(cregex_node_t));
switch (ch) {
case '\0':
/* premature end of character class */
return NULL;
case ']':
if (context->sp - 1 == from) {
goto CHARACTER;
} else {
newNode.type = type;
newNode.u.b.from = from;
newNode.u.b.to = context->sp - 1;
return push(context, &newNode);
}
case '\\':
ch = *context->sp++;
/* fall-through */
default:
CHARACTER:
if (*context->sp == '-' && context->sp[1] != ']') {
if (context->sp[1] < ch)
/* empty range in character class */
return NULL;
context->sp += 2;
}
break;
}
}
}
STATIC cregex_node_t *parse_interval(regex_parse_context *context)
{
const char *from = context->sp;
LONG nmin, nmax;
cregex_node_t newNode;
memset(&newNode, 0, sizeof(cregex_node_t));
for (nmin = 0; *context->sp >= '0' && *context->sp <= '9'; ++context->sp)
nmin = (nmin * 10) + (*context->sp - '0');
if (*context->sp == ',') {
++context->sp;
if (*from != ',' && *context->sp == '}')
nmax = -1;
else {
for (nmax = 0; *context->sp >= '0' && *context->sp <= '9';
++context->sp)
nmax = (nmax * 10) + (*context->sp - '0');
if (*(context->sp - 1) == ',' || *context->sp != '}' ||
nmax < nmin) {
context->sp = from;
return NULL;
}
}
} else if (*from != '}' && *context->sp == '}') {
nmax = nmin;
} else {
context->sp = from;
return NULL;
}
++context->sp;
newNode.type = REGEX_NODE_TYPE_QUANTIFIER;
newNode.u.c.nmin = nmin;
newNode.u.c.nmax = nmax;
newNode.u.c.greedy = (*context->sp == '?') ? (++context->sp, 0) : 1;
newNode.u.c.quantified = consume(context);
return push(context, &newNode);
}
STATIC cregex_node_t *parse_context(regex_parse_context *context, LONG depth)
{
cregex_node_t *bottom = context->stack;
cregex_node_t newNode;
for (;;) {
LONG ch = *context->sp++;
memset(&newNode, 0, sizeof(cregex_node_t));
switch (ch) {
/* Characters */
case '\\':
ch = *context->sp++;
/* fall-through */
default:
CHARACTER:
newNode.type = REGEX_NODE_TYPE_CHARACTER;
newNode.u.a.ch = ch;
push(context, &newNode);
break;
case '.':
newNode.type = REGEX_NODE_TYPE_ANY_CHARACTER;
push(context, &newNode);
break;
case '[':
if (!parse_char_class(context))
return NULL;
break;
/* Composites */
case '|': {
cregex_node_t *left = concatenate(context, bottom), *right;
if (!(right = parse_context(context, depth)))
return NULL;
if (left->type == REGEX_NODE_TYPE_EPSILON &&
right->type == left->type) {
drop(context);
} else if (left->type == REGEX_NODE_TYPE_EPSILON) {
right = consume(context);
drop(context);
newNode.type = REGEX_NODE_TYPE_QUANTIFIER;
newNode.u.c.nmin = 0;
newNode.u.c.nmax = 1;
newNode.u.c.greedy = 1;
newNode.u.c.quantified = right;
push(context, &newNode);
} else if (right->type == REGEX_NODE_TYPE_EPSILON) {
drop(context);
left = consume(context);
newNode.type = REGEX_NODE_TYPE_QUANTIFIER;
newNode.u.c.nmin = 0;
newNode.u.c.nmax = 1;
newNode.u.c.greedy = 1;
newNode.u.c.quantified = left;
push(context, &newNode);
} else {
right = consume(context);
left = consume(context);
newNode.type = REGEX_NODE_TYPE_ALTERNATION;
newNode.u.d.left = left;
newNode.u.d.right = right;
push(context, &newNode);
}
return bottom;
}
#define QUANTIFIER(ch, min, max) \
case ch: \
if (context->stack == bottom) { \
goto CHARACTER; \
} else { \
newNode.type = REGEX_NODE_TYPE_QUANTIFIER; \
newNode.u.c.nmin = min; \
newNode.u.c.nmax = max; \
newNode.u.c.greedy = (*context->sp == '?') ? (++context->sp, 0) : 1; \
newNode.u.c.quantified = consume(context); \
push(context, &newNode); \
} \
break
// END-QUANTIFIER
/* clang-format off */
/* Quantifiers */
QUANTIFIER('?', 0, 1);
QUANTIFIER('*', 0, -1);
QUANTIFIER('+', 1, -1);
/* clang-format on */
#undef QUANTIFIER
case '{':
if ((context->stack == bottom) || !parse_interval(context))
goto CHARACTER;
break;
/* Anchors */
case '^':
newNode.type = REGEX_NODE_TYPE_ANCHOR_BEGIN;
push(context,
&newNode);
break;
case '$':
newNode.type = REGEX_NODE_TYPE_ANCHOR_END;
push(context,
&newNode);
break;
/* Captures */
case '(':
if (!parse_context(context, depth + 1)) {
return NULL;
} else {
newNode.type = REGEX_NODE_TYPE_CAPTURE;
newNode.u.e.captured = consume(context);
push(context, &newNode);
}
break;
case ')':
if (depth > 0)
return concatenate(context, bottom);
/* unmatched close parenthesis */
return NULL;
/* End of string */
case '\0':
if (depth == 0)
return concatenate(context, bottom);
/* unmatched open parenthesis */
return NULL;
}
}
}
STATIC INLINE LONG estimate_nodes(const char *pattern)
{
return (LONG)(strlen(pattern) * 2);
}
/* Parse a pattern (using a previously allocated buffer of at least
* estimate_nodes(pattern) nodes).
*/
STATIC cregex_node_t *parse_with_nodes(const char *pattern,
cregex_node_t *nodes)
{
regex_parse_context context;
context.sp = pattern;
context.stack = nodes,
context.output = nodes + estimate_nodes(pattern);
return parse_context(&context, 0);
}
cregex_node_t *cregex_parse(const char *pattern)
{
size_t size = sizeof(cregex_node_t) * estimate_nodes(pattern);
cregex_node_t *nodes = AllocVec(size, MEMF_CLEAR);
// Printf("mallocing %ld bytes for parse\n", size);
if (!nodes)
return NULL;
if (!parse_with_nodes(pattern, nodes)) {
free(nodes);
return NULL;
}
return nodes;
}
VOID cregex_parse_free(cregex_node_t *root)
{
FreeVec(root);
}

@ -0,0 +1,229 @@
#include <proto/exec.h>
#include <stdlib.h>
#include <string.h>
#include "cregex.h"
#define REGEX_VM_MAX_MATCHES 20
/* The VM executes one or more threads, each running a regular expression
* program, which is just a list of regular expression instructions. Each
* thread maintains two registers while it runs: a program counter (PC) and
* a string pointer (SP).
*/
typedef struct {
LONG visited;
const cregex_program_instr_t *pc;
const char *matches[REGEX_VM_MAX_MATCHES];
} vm_thread;
/* Run program on string */
STATIC LONG vm_run(const cregex_program_t *program,
const char *string,
const char **matches,
LONG nmatches);
/* Run program on string (using a previously allocated buffer of at least
* vm_estimate_threads(program) threads)
*/
STATIC LONG vm_run_with_threads(const cregex_program_t *program,
const char *string,
const char **matches,
LONG nmatches,
vm_thread *threads);
typedef struct {
LONG nthreads;
vm_thread *threads;
} vm_thread_list;
STATIC VOID vm_add_thread(vm_thread_list *list,
const cregex_program_t *program,
const cregex_program_instr_t *pc,
const char *string,
const char *sp,
const char **matches,
LONG nmatches)
{
if (list->threads[pc - program->instructions].visited == sp - string + 1)
return;
list->threads[pc - program->instructions].visited = sp - string + 1;
switch (pc->opcode) {
case REGEX_PROGRAM_OPCODE_MATCH:
/* fall-through */
/* Characters */
case REGEX_PROGRAM_OPCODE_CHARACTER:
case REGEX_PROGRAM_OPCODE_ANY_CHARACTER:
case REGEX_PROGRAM_OPCODE_CHCLS:
case REGEX_PROGRAM_OPCODE_CHCLS_NEGATED:
list->threads[list->nthreads].pc = pc;
memcpy((char*)list->threads[list->nthreads].matches,
(char*)matches,
sizeof(matches[0]) * ((nmatches <= REGEX_VM_MAX_MATCHES)
? nmatches
: REGEX_VM_MAX_MATCHES));
++list->nthreads;
break;
/* Control-flow */
case REGEX_PROGRAM_OPCODE_SPLIT:
vm_add_thread(list, program, pc->u.c.first, string, sp, matches, nmatches);
vm_add_thread(list, program, pc->u.c.second, string, sp, matches, nmatches);
break;
case REGEX_PROGRAM_OPCODE_JUMP:
vm_add_thread(list, program, pc->u.d.target, string, sp, matches, nmatches);
break;
/* Assertions */
case REGEX_PROGRAM_OPCODE_ASSERT_BEGIN:
if (sp == string)
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches);
break;
case REGEX_PROGRAM_OPCODE_ASSERT_END:
if (!*sp)
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches);
break;
/* Saving */
case REGEX_PROGRAM_OPCODE_SAVE:
if (pc->u.e.save < nmatches && pc->u.e.save < REGEX_VM_MAX_MATCHES) {
const char *saved = matches[pc->u.e.save];
matches[pc->u.e.save] = sp;
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches);
matches[pc->u.e.save] = saved;
} else {
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches);
}
break;
}
}
/* Upper bound of number of threads required to run program */
STATIC LONG vm_estimate_threads(const cregex_program_t *program)
{
return program->ninstructions * 2;
}
STATIC LONG vm_run(const cregex_program_t *program,
const char *string,
const char **matches,
LONG nmatches)
{
size_t size = sizeof(vm_thread) * vm_estimate_threads(program);
vm_thread *threads;
LONG matched;
if (!(threads = AllocVec(size, MEMF_CLEAR)))
{
return -1;
}
matched = vm_run_with_threads(program, string, matches, nmatches, threads);
FreeVec(threads);
return matched;
}
STATIC LONG vm_run_with_threads(const cregex_program_t *program,
const char *string,
const char **matches,
LONG nmatches,
vm_thread *threads)
{
vm_thread_list currentList;
vm_thread_list* current;
vm_thread_list nextList;
vm_thread_list* next;
vm_thread_list* swap = NULL;
LONG matched = 0;
const char *sp = NULL;
LONG i = 0;
memset(&currentList, 0, sizeof(vm_thread_list));
currentList.nthreads = 0;
currentList.threads = threads;
current = &currentList;
memset(&nextList, 0, sizeof(vm_thread_list));
nextList.nthreads = 0;
nextList.threads = threads + program->ninstructions;
next = &nextList;
memset(threads, 0, sizeof(vm_thread) * program->ninstructions * 2);
vm_add_thread(current, program, program->instructions, string, string,
matches, nmatches);
for (sp = string;; ++sp) {
for (i = 0; i < current->nthreads; ++i) {
vm_thread *thread = current->threads + i;
switch (thread->pc->opcode) {
case REGEX_PROGRAM_OPCODE_MATCH:
matched = 1;
current->nthreads = 0;
memcpy((char*)matches, (char*)thread->matches,
sizeof(matches[0]) * ((nmatches <= REGEX_VM_MAX_MATCHES)
? nmatches
: REGEX_VM_MAX_MATCHES));
continue;
/* Characters */
case REGEX_PROGRAM_OPCODE_CHARACTER:
if (*sp == thread->pc->u.a.ch)
break;
continue;
case REGEX_PROGRAM_OPCODE_ANY_CHARACTER:
if (*sp)
break;
continue;
case REGEX_PROGRAM_OPCODE_CHCLS:
if (cregex_char_class_contains(thread->pc->u.b.klass, *sp))
break;
continue;
case REGEX_PROGRAM_OPCODE_CHCLS_NEGATED:
if (!cregex_char_class_contains(thread->pc->u.b.klass, *sp))
break;
continue;
/* Control-flow */
case REGEX_PROGRAM_OPCODE_SPLIT:
case REGEX_PROGRAM_OPCODE_JUMP:
/* fall-through */
/* Assertions */
case REGEX_PROGRAM_OPCODE_ASSERT_BEGIN:
case REGEX_PROGRAM_OPCODE_ASSERT_END:
/* fall-through */
/* Saving */
case REGEX_PROGRAM_OPCODE_SAVE:
/* handled in vm_add_thread() */
abort();
}
vm_add_thread(next, program, thread->pc + 1, string, sp + 1,
thread->matches, nmatches);
}
/* swap current and next thread list */
swap = current;
current = next;
next = swap;
next->nthreads = 0;
/* done if no more threads are running or end of string reached */
if (current->nthreads == 0 || !*sp)
break;
}
return matched;
}
LONG cregex_program_run(const cregex_program_t *program,
const char *string,
const char **matches,
LONG nmatches)
{
return vm_run(program, string, matches, nmatches);
}

@ -0,0 +1,39 @@
#include <exec/types.h>
#include <proto/exec.h>
#include <proto/containerkit.h>
#include "linearray.h"
// -----------------------------
// -----------------------------
LineArray LineArrayNew(VOID)
{
#define SIZE_LINEPTR 2
return NewArray(SIZE_LINEPTR);
}
VOID LineArrayAppend(LineArray array, LINEPTR value)
{
AppendToArray(LINEPTR, array, value);
}
VOID LineArrayFree(LineArray array)
{
ArrayForEach(LINEPTR, aLine, array, LineFree(aLine););
DeleteArray(array);
}
LINEPTR* LineArrayValues(LineArray array)
{
return ArrayValues(LINEPTR, array);
}
// -----------------------------------------------
// -----------------------------------------------

@ -0,0 +1,17 @@
#ifndef __LINEARRAY_H
#define __LINEARRAY_H
#include <exec/types.h>
#include <proto/containerkit.h>
#include <dos/dos.h>
#include "configmodel.h"
#define LineArray Array
LineArray LineArrayNew(VOID);
VOID LineArrayAppend(LineArray array, LINEPTR value);
VOID LineArrayFree(LineArray array);
LINEPTR* LineArrayValues(LineArray array);
#endif

@ -0,0 +1,85 @@
// Printf("running (%ld bytes avail)\n", AvailMem(0));
#define __CLIB_PRAGMA_LIBCALL
#include <proto/exec.h>
#include <proto/dos.h>
#define __NOLIBBASE__
#include "stringarray.h"
#include "linearray.h"
#include "configmodel.h"
#include <proto/containerkit.h>
#include "cregex.h"
#define ZERO ((BPTR)0)
WORD DoTheWork(STRPTR filename);
VOID ProcessFile(BPTR configFile);
char *vers="\0$VER: ConfigReader (dd.mm.yyyy)";
char *stacksize = "$STACK:8192"; // only works when started from CLI
struct Library *ContainerkitBase;
VOID ProcessFile(BPTR configFile)
{
Array lineArray = NULL;
LINEPTR line = NULL;
InitialisePatterns();
lineArray = LineArrayNew();
line = LineReadIncludingContinuation(configFile);
while( line != NULL )
{
// Printf("successfully read line {%s}\n", LineGetRawText(line));
LineArrayAppend(lineArray, line);
line = LineReadIncludingContinuation(configFile);
}
LineArrayFree(lineArray);
ReleasePatterns();
}
WORD DoTheWork(STRPTR filename)
{
WORD result = RETURN_OK;
if (ContainerkitBase)
{
BPTR configFile = ZERO;
configFile = Open(filename, MODE_OLDFILE);
if( configFile != ZERO )
{
ProcessFile(configFile);
Close(configFile);
}
else
{
Printf("file open failed!\n");
result = RETURN_ERROR;
}
}
return result;
}
WORD main(WORD argc, STRPTR *argv)
{
WORD result = RETURN_OK;
// this does nothing but the first call to Print drops a bunch of memory,
// I assume because of opening some resource so this means my start and
// end markers are "clean" and I can ensure I'm not leaking.
Printf("\n");
ContainerkitBase = OpenLibrary("containerkit.library", 1);
if( ContainerkitBase )
{
Printf("\n\nrunning (%ld bytes avail)\n\n", AvailMem(0));
result = DoTheWork(argv[1]);
Printf("\n\ndone (%ld bytes avail)\n\n", AvailMem(0));
CloseLibrary(ContainerkitBase);
}
else
{
Printf("failed to open library\n");
result = RETURN_ERROR;
}
return result;
}

@ -0,0 +1,34 @@
#
# :ts=8
#
###############################################################################
NAME = configreader
LFLAGS = addsym smallcode smalldata noicons batch
LIBS = lib:sc.lib lib:amiga.lib lib:debug.lib
###############################################################################
$(NAME) : main.o configmodel.o cregex.lib arraytypes.lib
slink lib:c.o main.o configmodel.o to $(NAME) noicons lib $(LIBS) cregex.lib arraytypes.lib $(LFLAGS)
cregex.lib : cregex_compile.o cregex_parse.o cregex_vm.o
JOIN cregex_compile.o cregex_parse.o cregex_vm.o AS cregex.lib
arraytypes.lib : stringarray.o linearray.o
JOIN stringarray.o linearray.o AS arraytypes.lib
clean:
delete \#?.o \#?.lib $(NAME) ALL
###############################################################################
main.o : main.c stringarray.h
stringarray.o : stringarray.c stringarray.h
linearray.o : linearray.c linearray.h
configmodel.o : configmodel.c configmodel.h
cregex_compile.o : cregex_compile.c cregex.h
cregex_parse.o : cregex_parse.c cregex.h
cregex_vm.o : cregex_vm.c cregex.h

@ -0,0 +1,35 @@
#include "stringarray.h"
#include <proto/exec.h>
#include <string.h>
Array StringArrayNew(VOID)
{
#define SIZE_STRPTR 2
return NewArray(SIZE_STRPTR);
}
VOID StringArrayAppend(Array array, CONST_STRPTR value)
{
AppendToArray(CONST_STRPTR, array, value);
}
VOID StringArrayAppendAndRetain(Array array, CONST_STRPTR value)
{
STRPTR localCopy = AllocVec(strlen(value)+1, MEMF_CLEAR);
CopyMem(value, localCopy, strlen(value));
StringArrayAppend(array, localCopy);
}
VOID StringArrayFree(Array array)
{
if( array != NULL )
{
StringArrayForEach(array, FreeVec(aString););
DeleteArray(array);
}
}
CONST_STRPTR* StringArrayValues(Array array)
{
return ArrayValues(CONST_STRPTR, array);
}

@ -0,0 +1,18 @@
#ifndef __STRINGARRAY_H
#define __STRINGARRAY_H
#include <exec/types.h>
#include <proto/containerkit.h>
#define StringArray Array
StringArray StringArrayNew(VOID);
VOID StringArrayAppend(StringArray array, CONST_STRPTR value);
VOID StringArrayAppendAndRetain(StringArray array, CONST_STRPTR value);
VOID StringArrayFree(StringArray array);
CONST_STRPTR* StringArrayValues(StringArray array);
#define StringArrayForEach(array, block) do {STRPTR *afe_123_p = (*(STRPTR **)array); STRPTR aString = *afe_123_p; int afe_123_c = (((ULONG *)array)[1]);\
for (; afe_123_c--; aString = *(++afe_123_p)) block} while (0);
#endif

@ -0,0 +1,22 @@
#this is a comment
[core]
repositoryformatversion = 0
filemode = true
bare = false
logallrefupdates = true
ignorecase = true
precomposeunicode = true
[remote "origin.foo"] #this is also a coment
url = git@git.alancfrancis.com:acf/AmigaGit2.git
fetch = +refs/heads/*:refs/remotes/origin/*
[branch "main"]
remote = origin
merge = refs/heads/main
[branch "config-file-parsing-from-book"]
remote = origin
merge = refs/heads/config-file-parsing-from-book
somekey = Alan Francis
Loading…
Cancel
Save