Haiku Twitter Bot

Over summer 2016, I decided to make a Twitter bot just for fun. Here, I'll describe the bot's programming at a high level. But first, a little background...

I started undergraduate as an english major... then graduated with a degree in neuroscience. Although I have pretty firmly switched my career plans toward math and science, I still appreciate and respect art. As a reflection on my artistic pipedream, I designed my bot to write haikus. If you don't remember haikus from high school, hit that link in the previous sentence.

For connecting to Twitter, I used the tweepy library. I simply wrote a subclass of StreamListener to process incoming tweets and write them to a file called tweets.txt. You can read more about this process in the tweepy docs (linked above).

class StdOutListener(StreamListener):
""" A listener handles tweets that are received from the stream.
This is a basic listener that just prints received tweets to stdout.
"""
def __init__(self):
self.dat = list()
self.i = 0
def on_data(self, data):
sdat = unicode(data.split('"text":')[1].split(',')[0].replace('"',''))
ssdat = [sw for sw in sdat.split(' ') if not any([na for na in ['/', '\\'] if na in sw])]
with open('tweets.txt', 'a+') as fw:
for ss in ssdat:
if ss not in self.dat:
print(ss)
fw.write(ss+'\n')
self.dat.extend(ssdat)
self.i += 1
return True
def on_error(self, status):
print(status)

Each time I start my bot, most of the high-level functionality is located in one function, do_tweet().

def do_tweet(auth, tttw=10):
api = API(auth) # connect to Twitter using my authorization info
words = get_words('tweets.txt') # Get words from general tweets
words.extend(get_haikus('haiku.txt')) # Get previously written but un-tweeted haikus
words.extend(get_best('best.txt')) # Get list of "best" tweets
words.extend(get_words('search.txt')) # Get list of specifically searched terms
sdict = make_sdict(words) # Make a dictionary that maps words to their number of syllables
# Make a list of haikus and tweet them every 15 minutes
haik = list()
j = 0
while j < tttw:
h = haiku(sdict).lower()
haik.append(h)
print(h)
api.update_status(h)
print("Tweeted! at %s" % time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()))
time.sleep(900)
j+=1
view raw do_tweet.py hosted with ❤ by GitHub

For counting syllables and determining parts of speech, I used NLTK.

Counting syllables:

from nltk.corpus import cmudict
d = cmudict.dict()
def nsyl(word):
return [len(list(y for y in x if y[-1].isdigit())) for x in d[word.lower()]]
view raw count_syll.py hosted with ❤ by GitHub

Choosing words for the haiku:
import numpy as np
from nltk.tokenize import word_tokenize
from nltk import pos_tag
def pick_syl(target):
si = 0
choices = list()
while si != target:
if target-si != 1:
cr = np.random.randint(1, target-si)
if cr > 3: cr = 1
choices.append(cr)
si += cr
else:
choices.append(1)
return choices
return choices
def syl_of_size(sdict, size):
return [w for w in sdict.keys() if sdict[w] == size]
def choose_words(sdict, nr_syl, line):
words = list()
ix = 0
for sw in pick_syl(nr_syl):
if ix == 0 and line == 0:
sdict1 = dict([(sk, sv) for sk, sv in sdict.items() if pos_tag(word_tokenize(sk), tagset='universal')[0][1]=='NOUN'])
gsyls = syl_of_size(sdict1, sw)
elif ix == 1 and line == 0:
sdict2 = dict([(sk, sv) for sk, sv in sdict.items() if pos_tag(word_tokenize(sk), tagset='universal')[0][1]=='VERB'])
gsyls = syl_of_size(sdict2, sw)
elif ix == 2 and line == 0:
sdict3 = dict([(sk, sv) for sk, sv in sdict.items() if pos_tag(word_tokenize(sk), tagset='universal')[0][1] not in ['NOUN', 'VERB']])
gsyls = syl_of_size(sdict3, sw)
else:
gsyls = syl_of_size(sdict, sw)
cwr = np.random.randint(len(gsyls))
chosen = gsyls[cwr]
words.append(chosen)
ix += 1
return words
view raw choose_words.py hosted with ❤ by GitHub

As you can see, the algorithm randomly chooses a word of syllable length sw, which is generated by the pick_syl() function. If this is hard to interpret, don't worry. It took me a while to come up with an algorithm that would randomly choose words but still adhere to the 5/7/5 haiku format. The conditions like if ix == 0 and line == 0 are there to determine which line of the haiku is being written. In this case, the first word of the first line is being chosen. Then the lines:

sdict1 = dict([(sk, sv) for sk, sv in sdict.items() if pos_tag(word_tokenize(sk),
tagset='universal')[0][1]=='NOUN'])
# Chooses a noun
gsyls = syl_of_size(sdict1, sw) # Chooses a noun of syllable length sw

Then, setting up the haiku in order and writing to the file haiku.txt:

def haiku(sdict):
sdict = dict([(sk, sv) for sk, sv in sdict.items() if len(sk) >= 3])
first_5 = choose_words(sdict, 5, 0)
seven = choose_words(sdict, 7, 1)
sec_5 = choose_words(sdict, 5, 2)
first_line = ' '.join(first_5)
second_line = ' '.join(seven)
third_line = ' '.join(sec_5)
hai = ', '.join([first_line, second_line, third_line])
with open('haiku.txt', 'a+') as h:
h.write(hai+'\n')
return hai
view raw haiku.py hosted with ❤ by GitHub

So currently this bot would write haikus with randomly chosen words, which is cool... but we can do better! ;^)
Eventually when I have a little more time, I'll give meta ai ai haiku more smarts, but for now the bot uses a simple algorithm for determining the "best" tweets, then recycles words from these tweets. Dumb, I know... it's a work in progress.

import numpy as np
from tweepy import API, OAuthHandler
import json
auth = OAuthHandler(consumer_key, consumer_secret) # Connect to twitter via oAuth
auth.set_access_token(access_token, access_token_secret)
api = API(auth)
def get_haikus(api):
return api.user_timeline('@meta_aiai_haiku') # Get haikus from my timeline
def get_search(api):
return api.search('genius') # Search for a term (e.g. 'genius')
counts = dict()
rs = list()
fs = list()
for h in get_haikus(api):
rs.append(h.retweet_count) # Count number of retweets and favorites for each tweet in the list
fs.append(h.favorite_count)
stext = [ss for ss in h.text.split(' ') if '/' not in ss]
counts[' '.join(stext)] = (h.retweet_count, h.favorite_count) # Store in a dict
for st in get_search(api): # Do the same for any specific search terms
rs.append(st.retweet_count)
fs.append(st.favorite_count)
stext = [ss for ss in st.text.split(' ') if '/' not in ss]
counts[' '.join(stext)] = (st.retweet_count, st.favorite_count)
rs = np.array(rs) # Make an array with the retweet counts
if any([rr for rr in rs if rr != 0.0]):
norm_r = (rs - np.min(rs))*100.00 / ((np.max(rs) - np.min(rs))*100.00) # Normalize the counts
else: norm_r = rs
fs = np.array(fs)
norm_f = (fs - np.min(fs))*100.00 / ((np.max(fs) - np.min(fs))*100.00) # Same for the favorites
for ix, k in enumerate(counts.keys()):
weight = 0.01*np.random.randint(55, 77) * (norm_r[ix] + norm_f[ix]) # Calculate the likelihood of a tweet showing up by adding these metrics and multiplying by a random value
counts[k] = weight
with open('best.txt', 'w+') as bf: # dump to best.txt
json.dump(counts, bf)
view raw make_better.py hosted with ❤ by GitHub

The function get_best() chooses tweets with the highest "weight" to include in the bot's corpus for writing future haikus.
def get_best(file):
words = list()
with open(file, 'r') as f:
bf = json.load(f)
sbf = sorted(bf.items(), key=itemgetter(1), reverse=True)
sbf = [bb[0] for bb in sbf]
return sbf[0:int(len(sbf)/2.0)] # chooses the top half
view raw get_best.py hosted with ❤ by GitHub

Here's a link to my Twitter bot meta ai ai haiku: robcapps.com/docs/haiku. Please feel free to ask questions in the comments section!

Comments