You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
configreader/cregex/cregex_vm.c

229 lines
7.4 KiB

#include <proto/exec.h>
#include <stdlib.h>
#include <string.h>
#include "cregex.h"
#define REGEX_VM_MAX_MATCHES 20
/* The VM executes one or more threads, each running a regular expression
* program, which is just a list of regular expression instructions. Each
* thread maintains two registers while it runs: a program counter (PC) and
* a string pointer (SP).
*/
typedef struct {
LONG visited;
const cregex_program_instr_t *pc;
const char *matches[REGEX_VM_MAX_MATCHES];
} vm_thread;
/* Run program on string */
STATIC LONG vm_run(const cregex_program_t *program,
const char *string,
const char **matches,
LONG nmatches);
/* Run program on string (using a previously allocated buffer of at least
* vm_estimate_threads(program) threads)
*/
STATIC LONG vm_run_with_threads(const cregex_program_t *program,
const char *string,
const char **matches,
LONG nmatches,
vm_thread *threads);
typedef struct {
LONG nthreads;
vm_thread *threads;
} vm_thread_list;
STATIC VOID vm_add_thread(vm_thread_list *list,
const cregex_program_t *program,
const cregex_program_instr_t *pc,
const char *string,
const char *sp,
const char **matches,
LONG nmatches)
{
if (list->threads[pc - program->instructions].visited == sp - string + 1)
return;
list->threads[pc - program->instructions].visited = sp - string + 1;
switch (pc->opcode) {
case REGEX_PROGRAM_OPCODE_MATCH:
/* fall-through */
/* Characters */
case REGEX_PROGRAM_OPCODE_CHARACTER:
case REGEX_PROGRAM_OPCODE_ANY_CHARACTER:
case REGEX_PROGRAM_OPCODE_CHCLS:
case REGEX_PROGRAM_OPCODE_CHCLS_NEGATED:
list->threads[list->nthreads].pc = pc;
memcpy((char*)list->threads[list->nthreads].matches,
(char*)matches,
sizeof(matches[0]) * ((nmatches <= REGEX_VM_MAX_MATCHES)
? nmatches
: REGEX_VM_MAX_MATCHES));
++list->nthreads;
break;
/* Control-flow */
case REGEX_PROGRAM_OPCODE_SPLIT:
vm_add_thread(list, program, pc->u.c.first, string, sp, matches, nmatches);
vm_add_thread(list, program, pc->u.c.second, string, sp, matches, nmatches);
break;
case REGEX_PROGRAM_OPCODE_JUMP:
vm_add_thread(list, program, pc->u.d.target, string, sp, matches, nmatches);
break;
/* Assertions */
case REGEX_PROGRAM_OPCODE_ASSERT_BEGIN:
if (sp == string)
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches);
break;
case REGEX_PROGRAM_OPCODE_ASSERT_END:
if (!*sp)
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches);
break;
/* Saving */
case REGEX_PROGRAM_OPCODE_SAVE:
if (pc->u.e.save < nmatches && pc->u.e.save < REGEX_VM_MAX_MATCHES) {
const char *saved = matches[pc->u.e.save];
matches[pc->u.e.save] = sp;
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches);
matches[pc->u.e.save] = saved;
} else {
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches);
}
break;
}
}
/* Upper bound of number of threads required to run program */
STATIC LONG vm_estimate_threads(const cregex_program_t *program)
{
return program->ninstructions * 2;
}
STATIC LONG vm_run(const cregex_program_t *program,
const char *string,
const char **matches,
LONG nmatches)
{
size_t size = sizeof(vm_thread) * vm_estimate_threads(program);
vm_thread *threads;
LONG matched;
if (!(threads = AllocVec(size, MEMF_CLEAR)))
{
return -1;
}
matched = vm_run_with_threads(program, string, matches, nmatches, threads);
FreeVec(threads);
return matched;
}
STATIC LONG vm_run_with_threads(const cregex_program_t *program,
const char *string,
const char **matches,
LONG nmatches,
vm_thread *threads)
{
vm_thread_list currentList;
vm_thread_list* current;
vm_thread_list nextList;
vm_thread_list* next;
vm_thread_list* swap = NULL;
LONG matched = 0;
const char *sp = NULL;
LONG i = 0;
memset(&currentList, 0, sizeof(vm_thread_list));
currentList.nthreads = 0;
currentList.threads = threads;
current = &currentList;
memset(&nextList, 0, sizeof(vm_thread_list));
nextList.nthreads = 0;
nextList.threads = threads + program->ninstructions;
next = &nextList;
memset(threads, 0, sizeof(vm_thread) * program->ninstructions * 2);
vm_add_thread(current, program, program->instructions, string, string,
matches, nmatches);
for (sp = string;; ++sp) {
for (i = 0; i < current->nthreads; ++i) {
vm_thread *thread = current->threads + i;
switch (thread->pc->opcode) {
case REGEX_PROGRAM_OPCODE_MATCH:
matched = 1;
current->nthreads = 0;
memcpy((char*)matches, (char*)thread->matches,
sizeof(matches[0]) * ((nmatches <= REGEX_VM_MAX_MATCHES)
? nmatches
: REGEX_VM_MAX_MATCHES));
continue;
/* Characters */
case REGEX_PROGRAM_OPCODE_CHARACTER:
if (*sp == thread->pc->u.a.ch)
break;
continue;
case REGEX_PROGRAM_OPCODE_ANY_CHARACTER:
if (*sp)
break;
continue;
case REGEX_PROGRAM_OPCODE_CHCLS:
if (cregex_char_class_contains(thread->pc->u.b.klass, *sp))
break;
continue;
case REGEX_PROGRAM_OPCODE_CHCLS_NEGATED:
if (!cregex_char_class_contains(thread->pc->u.b.klass, *sp))
break;
continue;
/* Control-flow */
case REGEX_PROGRAM_OPCODE_SPLIT:
case REGEX_PROGRAM_OPCODE_JUMP:
/* fall-through */
/* Assertions */
case REGEX_PROGRAM_OPCODE_ASSERT_BEGIN:
case REGEX_PROGRAM_OPCODE_ASSERT_END:
/* fall-through */
/* Saving */
case REGEX_PROGRAM_OPCODE_SAVE:
/* handled in vm_add_thread() */
abort();
}
vm_add_thread(next, program, thread->pc + 1, string, sp + 1,
thread->matches, nmatches);
}
/* swap current and next thread list */
swap = current;
current = next;
next = swap;
next->nthreads = 0;
/* done if no more threads are running or end of string reached */
if (current->nthreads == 0 || !*sp)
break;
}
return matched;
}
LONG cregex_program_run(const cregex_program_t *program,
const char *string,
const char **matches,
LONG nmatches)
{
return vm_run(program, string, matches, nmatches);
}