Your mission:

Figure out what this uncommented code is doing.

In [1]:
from __future__ import division
In [2]:
from time import sleep
import random

from twitter import *
In [2]:
consumer_key        = 'your-consumer-key'
consumer_secret     = 'your-consumer-secret'
access_token        = 'your-access-token'
access_token_secret = 'your-token-secret'
In [4]:
max_id = 2327203915
In [1]:
# Keys for authenticating Python
consumer_key        = 'your-consumer-key'
consumer_secret     = 'your-consumer-secret'
access_token        = 'your-access-token'
access_token_secret = 'your-token-secret'

# Highest observed Twitter account number
max_id = 2327203915
In [6]:
def make_random_id_numbers(list_size, max_id):
    random_integers = [random.randint(0,max_id) for id in range(0,list_size)]
    random_integers = [str(id) for id in random_integers]
    numbers = ','.join(random_integers)
    return numbers
In [7]:
def make_random_id_numbers(list_size, max_id):
    '''
    Returns a random list of size list_size of possible twitter ids.
    IDs cound range from 0 to max_id size
    '''
    random_integers = [random.randint(0,max_id) for id in range(0,list_size)]
    random_integers = [str(id) for id in random_integers]
    numbers = ','.join(random_integers)
    return numbers
In [6]:
twitter_client =   Twitter(auth=OAuth(access_token, 
                           access_token_secret,
                           consumer_key, 
                           consumer_secret))
In [7]:
user_database = []
In [8]:
for sample_number in range(1,100):
    
    numbers = make_random_id_numbers(100, max_id)
    
    sample_user_information = twitter_client.
    user_database = user_database + sample_user_information
    
    print sample_number
    sleep(15)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99

Hint: Twitter calls a tweet a status

In [9]:
registered_user_count = 0
ever_tweeted_count = 0
this_year_count    = 0

for user in user_database:
    registered_user_count = registered_user_count + 1 
    
    if 'status' in user:
        ever_tweeted_count = ever_tweeted_count + 1
        
        if '2014' in user['status']['created_at']:
            this_year_count = this_year_count + 1
In [10]:
print registered_user_count
4042

In [11]:
print ever_tweeted_count
2029

In [12]:
print this_year_count
494

In [13]:
percent_valid = registered_user_count / (sample_number * 100)

print percent_valid
0.408282828283

In [14]:
estimated_user_total = percent_valid * max_id
In [15]:
print 'How many accounts have been set up?'
print estimated_user_total
How many accounts have been set up?
950157396.407

In [16]:
print 'How many people have ever tweeted?'
print ever_tweeted_count / registered_user_count * estimated_user_total
How many people have ever tweeted?
476959267.024

In [17]:
print 'How many people have tweeted in 2014?'
print this_year_count / registered_user_count * estimated_user_total
How many people have tweeted in 2014?
116125124.647

In [18]:
print 'How many people have tweeted in 2014?'
print '{:,.0f}'.format(this_year_count / registered_user_count * estimated_user_total)
How many people have tweeted in 2014?
116,125,125

In [19]:
import matplotlib.pyplot as plt
%matplotlib inline
In [20]:
years = [user['created_at'].split()[-1] for user in user_database if 'status' in user]
years = [int(year) for year in years if year != '2014']

plt.hist(years, bins = len(set(years)) )
plt.ticklabel_format(useOffset=False)
In [21]:
years_eng = [user['created_at'].split()[-1] for user in user_database  if 'en' in user['lang'] and 'status' in user]
years_eng   = [int(year) for year in years_eng if year != '2014']

plt.hist(years, bins = len(set(years_eng)) )
plt.hist(years_eng, bins = len(set(years_eng)) )
plt.ticklabel_format(useOffset=False)

Should we go back and see you many people have Tweeted in February? Or weekly usage?