import nltk import random f = open("/root/experiment/crying/crying") cry = f.read() tokens = nltk.word_tokenize(cry) text = nltk.Text(tokens) words = [w.lower() for w in text] fdist1 = nltk.FreqDist(words) vocab = fdist1.keys() bigrams=nltk.bigrams(words) cfd = nltk.ConditionalFreqDist(bigrams) #TODO: save the frequency distribution def markov_encode(cfdist, word, encodedword): wd = bitstring = '' addedbits=wert=tootal=xx=jj=rr=0 encodedword+="x" loppy=(len(encodedword)*8) lippy=len(encodedword) while wert < lippy: # number of bits wd += " " + word # how many bits can we encode in next decision lenny = len(cfdist[word].items()) while lenny<=1: # choose random next word word = random.choice(vocab) lenny = len(cfdist[word].items()) wd = wd + " " + word bits = 1 while (2 << bits) < lenny: bits+=1 addedbits=0 for j in range(bits): addedbits+=(ord(encodedword[wert])>>(rr%8)&1)<>(rr%8)&1) rr+=1 xx+=1 if (xx%8)==0: bitstring='' wert+=1 if xx == loppy: break word =''.join(cfdist[word].items()[addedbits][:1]) tootal+=bits return wd def markov_decode(cfdist, text): newtext=text.split() decodedchar=j=xx=result=jj=rr=0 bitstring = wd = "" for i in range(len(newtext)-1): lenny = len(cfdist[newtext[i]].items()) # bits are encoded for next word if lenny<=1: continue # goto next word bits = 1 while (2 << bits) < lenny: bits+=1 xx=0 for x in cfdist[newtext[i]].items(): yy = ''.join(x[:1]) if yy == newtext[i+1]: result=xx xx = 0 break xx+=1 for j in range(bits): if (j+jj)%8 == 0: if (j+jj)>0: wd+= chr(decodedchar) bitstring='' decodedchar=0 decodedbit=(result>>j)&1 bitstring += str(decodedbit) rr+=1 decodedchar+=decodedbit<<((j+jj)%8) jj+=bits return wd