You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
229 lines
7.4 KiB
229 lines
7.4 KiB
#include <proto/exec.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "cregex.h"
|
|
|
|
#define REGEX_VM_MAX_MATCHES 20
|
|
|
|
/* The VM executes one or more threads, each running a regular expression
|
|
* program, which is just a list of regular expression instructions. Each
|
|
* thread maintains two registers while it runs: a program counter (PC) and
|
|
* a string pointer (SP).
|
|
*/
|
|
typedef struct {
|
|
LONG visited;
|
|
const cregex_program_instr_t *pc;
|
|
const char *matches[REGEX_VM_MAX_MATCHES];
|
|
} vm_thread;
|
|
|
|
/* Run program on string */
|
|
STATIC LONG vm_run(const cregex_program_t *program,
|
|
const char *string,
|
|
const char **matches,
|
|
LONG nmatches);
|
|
|
|
/* Run program on string (using a previously allocated buffer of at least
|
|
* vm_estimate_threads(program) threads)
|
|
*/
|
|
STATIC LONG vm_run_with_threads(const cregex_program_t *program,
|
|
const char *string,
|
|
const char **matches,
|
|
LONG nmatches,
|
|
vm_thread *threads);
|
|
|
|
typedef struct {
|
|
LONG nthreads;
|
|
vm_thread *threads;
|
|
} vm_thread_list;
|
|
|
|
STATIC VOID vm_add_thread(vm_thread_list *list,
|
|
const cregex_program_t *program,
|
|
const cregex_program_instr_t *pc,
|
|
const char *string,
|
|
const char *sp,
|
|
const char **matches,
|
|
LONG nmatches)
|
|
{
|
|
if (list->threads[pc - program->instructions].visited == sp - string + 1)
|
|
return;
|
|
list->threads[pc - program->instructions].visited = sp - string + 1;
|
|
|
|
switch (pc->opcode) {
|
|
case REGEX_PROGRAM_OPCODE_MATCH:
|
|
/* fall-through */
|
|
|
|
/* Characters */
|
|
case REGEX_PROGRAM_OPCODE_CHARACTER:
|
|
case REGEX_PROGRAM_OPCODE_ANY_CHARACTER:
|
|
case REGEX_PROGRAM_OPCODE_CHCLS:
|
|
case REGEX_PROGRAM_OPCODE_CHCLS_NEGATED:
|
|
list->threads[list->nthreads].pc = pc;
|
|
memcpy((char*)list->threads[list->nthreads].matches,
|
|
(char*)matches,
|
|
sizeof(matches[0]) * ((nmatches <= REGEX_VM_MAX_MATCHES)
|
|
? nmatches
|
|
: REGEX_VM_MAX_MATCHES));
|
|
++list->nthreads;
|
|
break;
|
|
|
|
/* Control-flow */
|
|
case REGEX_PROGRAM_OPCODE_SPLIT:
|
|
vm_add_thread(list, program, pc->u.c.first, string, sp, matches, nmatches);
|
|
vm_add_thread(list, program, pc->u.c.second, string, sp, matches, nmatches);
|
|
break;
|
|
case REGEX_PROGRAM_OPCODE_JUMP:
|
|
vm_add_thread(list, program, pc->u.d.target, string, sp, matches, nmatches);
|
|
break;
|
|
|
|
/* Assertions */
|
|
case REGEX_PROGRAM_OPCODE_ASSERT_BEGIN:
|
|
if (sp == string)
|
|
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches);
|
|
break;
|
|
case REGEX_PROGRAM_OPCODE_ASSERT_END:
|
|
if (!*sp)
|
|
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches);
|
|
break;
|
|
|
|
/* Saving */
|
|
case REGEX_PROGRAM_OPCODE_SAVE:
|
|
if (pc->u.e.save < nmatches && pc->u.e.save < REGEX_VM_MAX_MATCHES) {
|
|
const char *saved = matches[pc->u.e.save];
|
|
matches[pc->u.e.save] = sp;
|
|
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches);
|
|
matches[pc->u.e.save] = saved;
|
|
} else {
|
|
vm_add_thread(list, program, pc + 1, string, sp, matches, nmatches);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Upper bound of number of threads required to run program */
|
|
STATIC LONG vm_estimate_threads(const cregex_program_t *program)
|
|
{
|
|
return program->ninstructions * 2;
|
|
}
|
|
|
|
STATIC LONG vm_run(const cregex_program_t *program,
|
|
const char *string,
|
|
const char **matches,
|
|
LONG nmatches)
|
|
{
|
|
size_t size = sizeof(vm_thread) * vm_estimate_threads(program);
|
|
vm_thread *threads;
|
|
LONG matched;
|
|
|
|
if (!(threads = AllocVec(size, MEMF_CLEAR)))
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
matched = vm_run_with_threads(program, string, matches, nmatches, threads);
|
|
FreeVec(threads);
|
|
return matched;
|
|
}
|
|
|
|
STATIC LONG vm_run_with_threads(const cregex_program_t *program,
|
|
const char *string,
|
|
const char **matches,
|
|
LONG nmatches,
|
|
vm_thread *threads)
|
|
{
|
|
vm_thread_list currentList;
|
|
vm_thread_list* current;
|
|
vm_thread_list nextList;
|
|
vm_thread_list* next;
|
|
vm_thread_list* swap = NULL;
|
|
LONG matched = 0;
|
|
const char *sp = NULL;
|
|
LONG i = 0;
|
|
|
|
memset(¤tList, 0, sizeof(vm_thread_list));
|
|
currentList.nthreads = 0;
|
|
currentList.threads = threads;
|
|
current = ¤tList;
|
|
|
|
memset(&nextList, 0, sizeof(vm_thread_list));
|
|
nextList.nthreads = 0;
|
|
nextList.threads = threads + program->ninstructions;
|
|
next = &nextList;
|
|
|
|
|
|
memset(threads, 0, sizeof(vm_thread) * program->ninstructions * 2);
|
|
|
|
vm_add_thread(current, program, program->instructions, string, string,
|
|
matches, nmatches);
|
|
for (sp = string;; ++sp) {
|
|
for (i = 0; i < current->nthreads; ++i) {
|
|
vm_thread *thread = current->threads + i;
|
|
switch (thread->pc->opcode) {
|
|
case REGEX_PROGRAM_OPCODE_MATCH:
|
|
matched = 1;
|
|
current->nthreads = 0;
|
|
memcpy((char*)matches, (char*)thread->matches,
|
|
sizeof(matches[0]) * ((nmatches <= REGEX_VM_MAX_MATCHES)
|
|
? nmatches
|
|
: REGEX_VM_MAX_MATCHES));
|
|
continue;
|
|
|
|
/* Characters */
|
|
case REGEX_PROGRAM_OPCODE_CHARACTER:
|
|
if (*sp == thread->pc->u.a.ch)
|
|
break;
|
|
continue;
|
|
case REGEX_PROGRAM_OPCODE_ANY_CHARACTER:
|
|
if (*sp)
|
|
break;
|
|
continue;
|
|
case REGEX_PROGRAM_OPCODE_CHCLS:
|
|
if (cregex_char_class_contains(thread->pc->u.b.klass, *sp))
|
|
break;
|
|
continue;
|
|
case REGEX_PROGRAM_OPCODE_CHCLS_NEGATED:
|
|
if (!cregex_char_class_contains(thread->pc->u.b.klass, *sp))
|
|
break;
|
|
continue;
|
|
|
|
/* Control-flow */
|
|
case REGEX_PROGRAM_OPCODE_SPLIT:
|
|
case REGEX_PROGRAM_OPCODE_JUMP:
|
|
/* fall-through */
|
|
|
|
/* Assertions */
|
|
case REGEX_PROGRAM_OPCODE_ASSERT_BEGIN:
|
|
case REGEX_PROGRAM_OPCODE_ASSERT_END:
|
|
/* fall-through */
|
|
|
|
/* Saving */
|
|
case REGEX_PROGRAM_OPCODE_SAVE:
|
|
/* handled in vm_add_thread() */
|
|
abort();
|
|
}
|
|
|
|
vm_add_thread(next, program, thread->pc + 1, string, sp + 1,
|
|
thread->matches, nmatches);
|
|
}
|
|
|
|
/* swap current and next thread list */
|
|
swap = current;
|
|
current = next;
|
|
next = swap;
|
|
next->nthreads = 0;
|
|
|
|
/* done if no more threads are running or end of string reached */
|
|
if (current->nthreads == 0 || !*sp)
|
|
break;
|
|
}
|
|
|
|
return matched;
|
|
}
|
|
|
|
LONG cregex_program_run(const cregex_program_t *program,
|
|
const char *string,
|
|
const char **matches,
|
|
LONG nmatches)
|
|
{
|
|
return vm_run(program, string, matches, nmatches);
|
|
}
|
|
|