#!/usr/bin/env python from sys import argv from random import randint, choice class MarkovGenerator: overall = object() def __init__(self): self.distribution = {} def add(self, first, second): if first not in self.distribution: self.distribution[first] = { MarkovGenerator.overall : 0 } if second not in self.distribution[first]: self.distribution[first][second] = 0 self.distribution[first][MarkovGenerator.overall] += 1 self.distribution[first][second] += 1 def getrandomfollower(self,word): i = randint(0,self.distribution[word][MarkovGenerator.overall]-1) for follower, occurances in self.distribution[word].items(): if follower == MarkovGenerator.overall: continue if i < occurances: return follower else: i -= occurances return None def scantext(self,text): prevtoken = None while len(text) > 0: parts = text.split(" ",1) if len(parts) == 1: text = "" token = parts[0] else: token, text = parts token = token.strip(".,!?\"()[]{}\n") if prevtoken is not None: self.add(prevtoken,token) prevtoken = token def getrandomword(self): return choice(list(self.distribution.keys())) def generate(self, n): word = self.getrandomword() text = word for i in range(1,n): word = self.getrandomfollower(word) if word is None: word = self.getrandomword() text += ". " + word else: text += " " + word return text def debug(self): print("\n".join(self.distribution.keys())) def main(): if len(argv) > 1: filename = argv[1] else: filename = "test.txt" text = open(filename,"r").read() mg = MarkovGenerator() mg.scantext(text) print(mg.generate(100)) if __name__ == '__main__': main()