107 lines
2.8 KiB
C
107 lines
2.8 KiB
C
|
|
||
|
// Regular Expressions
|
||
|
// Copyright (c) 2000-2001 by Digital Mars
|
||
|
// All Rights Reserved
|
||
|
// Written by Walter Bright
|
||
|
|
||
|
#ifndef REGEXP_H
|
||
|
#define REGEXP_H
|
||
|
|
||
|
#include <tchar.h>
|
||
|
|
||
|
/*
|
||
|
Escape sequences:
|
||
|
|
||
|
\nnn starts out a 1, 2 or 3 digit octal sequence,
|
||
|
where n is an octal digit. If nnn is larger than
|
||
|
0377, then the 3rd digit is not part of the sequence
|
||
|
and is not consumed.
|
||
|
For maximal portability, use exactly 3 digits.
|
||
|
|
||
|
\xXX starts out a 1 or 2 digit hex sequence. X
|
||
|
is a hex character. If the first character after the \x
|
||
|
is not a hex character, the value of the sequence is 'x'
|
||
|
and the XX are not consumed.
|
||
|
For maximal portability, use exactly 2 digits.
|
||
|
|
||
|
\uUUUU is a unicode sequence. There are exactly
|
||
|
4 hex characters after the \u, if any are not, then
|
||
|
the value of the sequence is 'u', and the UUUU are not
|
||
|
consumed.
|
||
|
|
||
|
Character classes:
|
||
|
|
||
|
[a-b], where a is greater than b, will produce
|
||
|
an error.
|
||
|
*/
|
||
|
|
||
|
typedef struct regmatch
|
||
|
{
|
||
|
int rm_so; // index of start of match
|
||
|
int rm_eo; // index past end of match
|
||
|
} regmatch_t;
|
||
|
|
||
|
struct Range;
|
||
|
struct RegBuffer;
|
||
|
|
||
|
struct RegExp
|
||
|
{
|
||
|
RegExp();
|
||
|
~RegExp();
|
||
|
|
||
|
unsigned re_nsub; // number of parenthesized subexpression matches
|
||
|
regmatch_t *pmatch; // array [re_nsub + 1]
|
||
|
|
||
|
TCHAR *input; // the string to search
|
||
|
|
||
|
// per instance:
|
||
|
|
||
|
int ref; // !=0 means don't make our own copy of pattern
|
||
|
TCHAR *pattern; // source text of the regular expression
|
||
|
|
||
|
TCHAR flags[3 + 1]; // source text of the attributes parameter
|
||
|
// (3 TCHARs max plus terminating 0)
|
||
|
int errors;
|
||
|
|
||
|
unsigned attributes;
|
||
|
|
||
|
#define REAglobal 1 // has the g attribute
|
||
|
#define REAignoreCase 2 // has the i attribute
|
||
|
#define REAmultiline 4 // if treat as multiple lines separated
|
||
|
// by newlines, or as a single line
|
||
|
#define REAdotmatchlf 8 // if . matches \n
|
||
|
|
||
|
int compile(TCHAR *pattern, TCHAR *attributes, int ref);
|
||
|
int test(TCHAR *string, int startindex = 0);
|
||
|
|
||
|
TCHAR *replace(TCHAR *format);
|
||
|
TCHAR *replace2(TCHAR *format);
|
||
|
static TCHAR *replace3(TCHAR *format, TCHAR *input,
|
||
|
unsigned re_nsub, regmatch_t *pmatch);
|
||
|
static TCHAR *replace4(TCHAR *input, regmatch_t *match, TCHAR *replacement);
|
||
|
|
||
|
private:
|
||
|
TCHAR *src; // current source pointer
|
||
|
TCHAR *src_start; // starting position for match
|
||
|
TCHAR *p; // position of parser in pattern
|
||
|
regmatch_t match; // match for the entire regular expression
|
||
|
// (serves as storage for pmatch[0])
|
||
|
|
||
|
char *program;
|
||
|
RegBuffer *buf;
|
||
|
|
||
|
void printProgram(char *prog);
|
||
|
int trymatch(char *prog, char *progend);
|
||
|
int parseRegexp();
|
||
|
int parsePiece();
|
||
|
int parseAtom();
|
||
|
int parseRange();
|
||
|
int escape();
|
||
|
void error(char *msg);
|
||
|
void optimize();
|
||
|
int startchars(Range *r, char *prog, char *progend);
|
||
|
};
|
||
|
|
||
|
#endif
|
||
|
|