Lexer fsm style

This commit is contained in:
madmaurice 2016-12-16 17:33:54 +01:00
parent 0f9d904c71
commit f72c297c95

52
lexer-fsm.py Normal file
View file

@ -0,0 +1,52 @@
#!/usr/bin/env python
import string
TOKEN_ID ="IDENT"
TOKEN_NUM="NUMBER"
def lex(s):
end = object()
table = [
{string.ascii_letters: 1, string.whitespace: 2, string.digits:3},
{string.ascii_letters+string.digits: 1},
{string.whitespace: 2},
{string.digits: 3}
]
finish = [
None,
lambda s: (TOKEN_ID, s),
lambda s: None,
lambda s: (TOKEN_NUM, int(s))
]
state = 0
it = iter(s)
c = next(it, end)
s = ""
while c != end:
found = False
for cs, target in table[state].items():
if c in cs:
s += c
c = next(it, end)
state = target
found = ( c != end )
break
if not found:
f = finish[state]
if f is None:
raise Exception("Unknown character %s" % s)
t = finish[state](s)
if t is not None:
yield t
state = 0
s = ""
def main():
s = "Hallo 2 Welt"
for token in lex(s):
print(token)
if __name__ == '__main__':
main()