#!/usr/bin/env python import string TOKEN_ID ="IDENT" TOKEN_NUM="NUMBER" def lex(s): end = object() table = [ {string.ascii_letters: 1, string.whitespace: 2, string.digits:3}, {string.ascii_letters+string.digits: 1}, {string.whitespace: 2}, {string.digits: 3} ] finish = [ None, lambda s: (TOKEN_ID, s), lambda s: None, lambda s: (TOKEN_NUM, int(s)) ] state = 0 it = iter(s) c = next(it, end) s = "" while c != end: found = False for cs, target in table[state].items(): if c in cs: s += c c = next(it, end) state = target found = ( c != end ) break if not found: f = finish[state] if f is None: raise Exception("Unknown character %s" % s) t = finish[state](s) if t is not None: yield t state = 0 s = "" def main(): s = "Hallo 2 Welt" for token in lex(s): print(token) if __name__ == '__main__': main()