one-file-projects/markovfun.py
madmaurice b8b6518057 3 more files
Signed-off-by: Valentin Gehrke <madmaurice@zom.bi>
2016-11-09 02:02:03 +01:00

79 lines
2.1 KiB
Python

#!/usr/bin/env python
from sys import argv
from random import randint, choice
class MarkovGenerator:
overall = object()
def __init__(self):
self.distribution = {}
def add(self, first, second):
if first not in self.distribution:
self.distribution[first] = { MarkovGenerator.overall : 0 }
if second not in self.distribution[first]:
self.distribution[first][second] = 0
self.distribution[first][MarkovGenerator.overall] += 1
self.distribution[first][second] += 1
def getrandomfollower(self,word):
i = randint(0,self.distribution[word][MarkovGenerator.overall]-1)
for follower, occurances in self.distribution[word].items():
if follower == MarkovGenerator.overall:
continue
if i < occurances:
return follower
else:
i -= occurances
return None
def scantext(self,text):
prevtoken = None
while len(text) > 0:
parts = text.split(" ",1)
if len(parts) == 1:
text = ""
token = parts[0]
else:
token, text = parts
token = token.strip(".,!?\"()[]{}\n")
if prevtoken is not None:
self.add(prevtoken,token)
prevtoken = token
def getrandomword(self):
return choice(list(self.distribution.keys()))
def generate(self, n):
word = self.getrandomword()
text = word
for i in range(1,n):
word = self.getrandomfollower(word)
if word is None:
word = self.getrandomword()
text += ". " + word
else:
text += " " + word
return text
def debug(self):
print("\n".join(self.distribution.keys()))
def main():
if len(argv) > 1:
filename = argv[1]
else:
filename = "test.txt"
text = open(filename,"r").read()
mg = MarkovGenerator()
mg.scantext(text)
print(mg.generate(100))
if __name__ == '__main__':
main()