160 lines
3.2 KiB
Plaintext
160 lines
3.2 KiB
Plaintext
|
package urn
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
)
|
||
|
|
||
|
var (
|
||
|
errPrefix = "expecting the prefix to be the \"urn\" string (whatever case) [col %d]"
|
||
|
errIdentifier = "expecting the identifier to be string (1..31 alnum chars, also containing dashes but not at its start) [col %d]"
|
||
|
errSpecificString = "expecting the specific string to be a string containing alnum, hex, or others ([()+,-.:=@;$_!*']) chars [col %d]"
|
||
|
errNoUrnWithinID = "expecting the identifier to not contain the \"urn\" reserved string [col %d]"
|
||
|
errHex = "expecting the specific string hex chars to be well-formed (%%alnum{2}) [col %d]"
|
||
|
errParse = "parsing error [col %d]"
|
||
|
)
|
||
|
|
||
|
%%{
|
||
|
machine urn;
|
||
|
|
||
|
# unsigned alphabet
|
||
|
alphtype uint8;
|
||
|
|
||
|
action mark {
|
||
|
m.pb = m.p
|
||
|
}
|
||
|
|
||
|
action tolower {
|
||
|
m.tolower = append(m.tolower, m.p - m.pb)
|
||
|
}
|
||
|
|
||
|
action set_pre {
|
||
|
output.prefix = string(m.text())
|
||
|
}
|
||
|
|
||
|
action set_nid {
|
||
|
output.ID = string(m.text())
|
||
|
}
|
||
|
|
||
|
action set_nss {
|
||
|
raw := m.text()
|
||
|
output.SS = string(raw)
|
||
|
// Iterate upper letters lowering them
|
||
|
for _, i := range m.tolower {
|
||
|
raw[i] = raw[i] + 32
|
||
|
}
|
||
|
output.norm = string(raw)
|
||
|
}
|
||
|
|
||
|
action err_pre {
|
||
|
m.err = fmt.Errorf(errPrefix, m.p)
|
||
|
fhold;
|
||
|
fgoto fail;
|
||
|
}
|
||
|
|
||
|
action err_nid {
|
||
|
m.err = fmt.Errorf(errIdentifier, m.p)
|
||
|
fhold;
|
||
|
fgoto fail;
|
||
|
}
|
||
|
|
||
|
action err_nss {
|
||
|
m.err = fmt.Errorf(errSpecificString, m.p)
|
||
|
fhold;
|
||
|
fgoto fail;
|
||
|
}
|
||
|
|
||
|
action err_urn {
|
||
|
m.err = fmt.Errorf(errNoUrnWithinID, m.p)
|
||
|
fhold;
|
||
|
fgoto fail;
|
||
|
}
|
||
|
|
||
|
action err_hex {
|
||
|
m.err = fmt.Errorf(errHex, m.p)
|
||
|
fhold;
|
||
|
fgoto fail;
|
||
|
}
|
||
|
|
||
|
action err_parse {
|
||
|
m.err = fmt.Errorf(errParse, m.p)
|
||
|
fhold;
|
||
|
fgoto fail;
|
||
|
}
|
||
|
|
||
|
pre = ([uU][rR][nN] @err(err_pre)) >mark %set_pre;
|
||
|
|
||
|
nid = (alnum >mark (alnum | '-'){0,31}) %set_nid;
|
||
|
|
||
|
hex = '%' (digit | lower | upper >tolower){2} $err(err_hex);
|
||
|
|
||
|
sss = (alnum | [()+,\-.:=@;$_!*']);
|
||
|
|
||
|
nss = (sss | hex)+ $err(err_nss);
|
||
|
|
||
|
fail := (any - [\n\r])* @err{ fgoto main; };
|
||
|
|
||
|
main := (pre ':' (nid - pre %err(err_urn)) $err(err_nid) ':' nss >mark %set_nss) $err(err_parse);
|
||
|
|
||
|
}%%
|
||
|
|
||
|
%% write data noerror noprefix;
|
||
|
|
||
|
// Machine is the interface representing the FSM
|
||
|
type Machine interface {
|
||
|
Error() error
|
||
|
Parse(input []byte) (*URN, error)
|
||
|
}
|
||
|
|
||
|
type machine struct {
|
||
|
data []byte
|
||
|
cs int
|
||
|
p, pe, eof, pb int
|
||
|
err error
|
||
|
tolower []int
|
||
|
}
|
||
|
|
||
|
// NewMachine creates a new FSM able to parse RFC 2141 strings.
|
||
|
func NewMachine() Machine {
|
||
|
m := &machine{}
|
||
|
|
||
|
%% access m.;
|
||
|
%% variable p m.p;
|
||
|
%% variable pe m.pe;
|
||
|
%% variable eof m.eof;
|
||
|
%% variable data m.data;
|
||
|
|
||
|
return m
|
||
|
}
|
||
|
|
||
|
// Err returns the error that occurred on the last call to Parse.
|
||
|
//
|
||
|
// If the result is nil, then the line was parsed successfully.
|
||
|
func (m *machine) Error() error {
|
||
|
return m.err
|
||
|
}
|
||
|
|
||
|
func (m *machine) text() []byte {
|
||
|
return m.data[m.pb:m.p]
|
||
|
}
|
||
|
|
||
|
// Parse parses the input byte array as a RFC 2141 string.
|
||
|
func (m *machine) Parse(input []byte) (*URN, error) {
|
||
|
m.data = input
|
||
|
m.p = 0
|
||
|
m.pb = 0
|
||
|
m.pe = len(input)
|
||
|
m.eof = len(input)
|
||
|
m.err = nil
|
||
|
m.tolower = []int{}
|
||
|
output := &URN{}
|
||
|
|
||
|
%% write init;
|
||
|
%% write exec;
|
||
|
|
||
|
if m.cs < first_final || m.cs == en_fail {
|
||
|
return nil, m.err
|
||
|
}
|
||
|
|
||
|
return output, nil
|
||
|
}
|