Basic Pandas Data Frames

In [1]:
import pandas as pd
import numpy as np
In [7]:
pd.__version__
Out[7]:
u'0.17.0'

Pandas Series

In [9]:
data = pd.Series([3,6,9,11,45])
In [10]:
data
Out[10]:
0     3
1     6
2     9
3    11
4    45
dtype: int64
In [11]:
data.values
Out[11]:
array([ 3,  6,  9, 11, 45])
In [12]:
data.index
Out[12]:
Int64Index([0, 1, 2, 3, 4], dtype='int64')
In [13]:
data[1]
Out[13]:
6
In [15]:
data[2:5]
Out[15]:
2     9
3    11
4    45
dtype: int64
In [18]:
data1=pd.Series([12,4,67,34,23],index=['a','b','c','d','f'])
In [19]:
data1
Out[19]:
a    12
b     4
c    67
d    34
f    23
dtype: int64
In [20]:
data1['a']
Out[20]:
12

Dictionary to Pandas Series

In [21]:
popula_dict = {
           'Argentina':123456,
            'USA':435454353,
            'Canada':45453334,
            'Mexico':7644565,} 
In [23]:
population = pd.Series(popula_dict)

population

In [25]:
population['Argentina']
Out[25]:
123456
In [27]:
population[0]
Out[27]:
123456

Scalar fill

In [29]:
pd.Series(5,index=[123,234,567])
Out[29]:
123    5
234    5
567    5
dtype: int64

Pandas Data Frame

In [30]:
population = pd.Series({
             'Los Angeles':34567,
              'New York':76443,
              'Chicago':43534,
})
In [33]:
area = pd.Series({
        'Los Angeles':345,
        'New York':143,
        'Chicago':876,
})
In [39]:
States = pd.DataFrame({
           'Population':population,
            'Area':area,
})
In [40]:
States
Out[40]:
Area Population
Chicago 876 43534
Los Angeles 345 34567
New York 143 76443
In [41]:
States.index
Out[41]:
Index([u'Chicago', u'Los Angeles', u'New York'], dtype='object')
In [42]:
States.columns
Out[42]:
Index([u'Area', u'Population'], dtype='object')
In [48]:
States['Population']
Out[48]:
Chicago        43534
Los Angeles    34567
New York       76443
Name: Population, dtype: int64
In [48]:
 
In [50]:
pd.DataFrame(population,columns=['Population'])
Out[50]:
Population
Chicago 43534
Los Angeles 34567
New York 76443
In [3]:
pd.DataFrame([{'a':2,'b':5},{'b':7,'c':1}])
Out[3]:
a b c
0 2 5 NaN
1 NaN 7 1
In [3]:
 pd.DataFrame(np.random.rand(3,2),
              index= [ 'a','b','c'],
               columns = ['temp','altitutud'],

               )
Out[3]:
temp altitutud
a 0.598884 0.905368
b 0.789763 0.083621
c 0.106822 0.527391
In []: