Figure out what this uncommented code is doing.
from __future__ import division
from time import sleep
import random
from twitter import *
consumer_key = 'your-consumer-key'
consumer_secret = 'your-consumer-secret'
access_token = 'your-access-token'
access_token_secret = 'your-token-secret'
max_id = 2327203915
# Keys for authenticating Python
consumer_key = 'your-consumer-key'
consumer_secret = 'your-consumer-secret'
access_token = 'your-access-token'
access_token_secret = 'your-token-secret'
# Highest observed Twitter account number
max_id = 2327203915
def make_random_id_numbers(list_size, max_id):
random_integers = [random.randint(0,max_id) for id in range(0,list_size)]
random_integers = [str(id) for id in random_integers]
numbers = ','.join(random_integers)
return numbers
def make_random_id_numbers(list_size, max_id):
'''
Returns a random list of size list_size of possible twitter ids.
IDs cound range from 0 to max_id size
'''
random_integers = [random.randint(0,max_id) for id in range(0,list_size)]
random_integers = [str(id) for id in random_integers]
numbers = ','.join(random_integers)
return numbers
twitter_client = Twitter(auth=OAuth(access_token,
access_token_secret,
consumer_key,
consumer_secret))
user_database = []
for sample_number in range(1,100):
numbers = make_random_id_numbers(100, max_id)
sample_user_information = twitter_client.
user_database = user_database + sample_user_information
print sample_number
sleep(15)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
Hint: Twitter calls a tweet a status
registered_user_count = 0
ever_tweeted_count = 0
this_year_count = 0
for user in user_database:
registered_user_count = registered_user_count + 1
if 'status' in user:
ever_tweeted_count = ever_tweeted_count + 1
if '2014' in user['status']['created_at']:
this_year_count = this_year_count + 1
print registered_user_count
4042
print ever_tweeted_count
2029
print this_year_count
494
percent_valid = registered_user_count / (sample_number * 100)
print percent_valid
0.408282828283
estimated_user_total = percent_valid * max_id
print 'How many accounts have been set up?'
print estimated_user_total
How many accounts have been set up? 950157396.407
print 'How many people have ever tweeted?'
print ever_tweeted_count / registered_user_count * estimated_user_total
How many people have ever tweeted? 476959267.024
print 'How many people have tweeted in 2014?'
print this_year_count / registered_user_count * estimated_user_total
How many people have tweeted in 2014? 116125124.647
print 'How many people have tweeted in 2014?'
print '{:,.0f}'.format(this_year_count / registered_user_count * estimated_user_total)
How many people have tweeted in 2014? 116,125,125
import matplotlib.pyplot as plt
%matplotlib inline
years = [user['created_at'].split()[-1] for user in user_database if 'status' in user]
years = [int(year) for year in years if year != '2014']
plt.hist(years, bins = len(set(years)) )
plt.ticklabel_format(useOffset=False)
years_eng = [user['created_at'].split()[-1] for user in user_database if 'en' in user['lang'] and 'status' in user]
years_eng = [int(year) for year in years_eng if year != '2014']
plt.hist(years, bins = len(set(years_eng)) )
plt.hist(years_eng, bins = len(set(years_eng)) )
plt.ticklabel_format(useOffset=False)
Should we go back and see you many people have Tweeted in February? Or weekly usage?