Lotto Top winners Numbers

Notebook by Pablo

In [2]:
import pandas as pd
In [3]:
df = pd.read_csv('C:/Users/pablo/Downloads/Loto.csv')
In [5]:
df.head()
Out[5]:
Draw Date Winning Numbers Multiplier
0 02/03/2010 17 22 36 37 52 24 2
1 02/06/2010 14 22 52 54 59 04 3
2 02/10/2010 05 08 29 37 38 34 5
3 02/13/2010 10 14 30 40 51 01 4
4 02/17/2010 07 08 19 26 36 15 3
In [140]:
df.tail()
Out[140]:
Draw Date Winning Numbers Multiplier
663 06/15/2016 04 22 24 31 33 10 2
664 06/18/2016 02 23 41 53 63 11 2
665 06/22/2016 14 40 42 43 52 17 3
666 06/25/2016 03 27 36 56 69 25 2
667 06/29/2016 23 29 37 60 64 06 2
In [123]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 668 entries, 0 to 667
Data columns (total 3 columns):
Draw Date          668 non-null object
Winning Numbers    668 non-null object
Multiplier         458 non-null float64
dtypes: float64(1), object(2)
memory usage: 20.9+ KB
In [6]:
df1 = df['Winning Numbers']
In [9]:
df1.head()
Out[9]:
0    17 22 36 37 52 24
1    14 22 52 54 59 04
2    05 08 29 37 38 34
3    10 14 30 40 51 01
4    07 08 19 26 36 15
Name: Winning Numbers, dtype: object
In [13]:
s = df1.str.split(' ').apply(pd.Series, 1) 
In [14]:
s.head()
Out[14]:
0 1 2 3 4 5
0 17 22 36 37 52 24
1 14 22 52 54 59 04
2 05 08 29 37 38 34
3 10 14 30 40 51 01
4 07 08 19 26 36 15
In [142]:
len(s.index)
Out[142]:
668

Function to get Top wining Numbers

In [172]:
def get_top_numbers(df):
    col = range(0,6)
    pick_list = []
    
    for column in col:
        pick_dict = {}
        if column == 0:
            ind = column + 1
        else:
            ind = 0
        top = df.groupby(column).count()
        top = top.sort_values(ind,ascending=False).head(2).reset_index()
        number = top[column][0]
        count = top[ind][0]
        
        pick_dict['Number'] = number
        pick_dict['count'] = count
        pick_dict['out of ']= len(df.index)
        
        pick_list.append(pick_dict)
        
    return pick_list
In [173]:
numbers = get_top_numbers(s)

Winning Numbers

Top Winning number from 2010 to 2016

In [174]:
numbers
Out[174]:
[{'Number': '02', 'count': 51, 'out of ': 668},
 {'Number': '12', 'count': 35, 'out of ': 668},
 {'Number': '25', 'count': 27, 'out of ': 668},
 {'Number': '52', 'count': 32, 'out of ': 668},
 {'Number': '59', 'count': 54, 'out of ': 668},
 {'Number': '29', 'count': 27, 'out of ': 668}]

sanity check

In [166]:
g1 = s.groupby(2).count()
In [167]:
g1 = g1.sort_values(0,ascending=False).head(2).reset_index()
In [168]:
g1
Out[168]:
2 0 1 3 4 5
0 25 27 27 27 27 27
1 40 25 25 25 25 25
In [169]:
g1[1][0]
Out[169]:
27
In [ ]:
 
In [ ]:
 
    
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: