# import numpy as np
import pandas as pd
print "Using {} , version {}".format(pd.__name__,pd.__version__)
Using pandas , version 0.23.4

Dataframe

df = pd.DataFrame()
print(df)
Empty DataFrame
Columns: []
Index: []
dict = {'name':["Tom", "Bob", "Mary", "James"],
        'age': [18, 30, 25, 40],
        'city':["Beijing", "ShangHai","GuangZhou", "ShenZhen"]}

df = pd.DataFrame(dict)
df
age city name
0 18 Beijing Tom
1 30 ShangHai Bob
2 25 GuangZhou Mary
3 40 ShenZhen James
index = pd.Index(["Tom", "Bob", "Mary", "James"],name = 'person')
cols = ['age','city']
data = [[18,'Beijing'],
        [30,'ShangHai'],
        [25,'GuangZhou'],
        [40,'ShenZhen']]

df =pd.DataFrame(index = index,data =data,columns = cols)
df
age city
person
Tom 18 Beijing
Bob 30 ShangHai
Mary 25 GuangZhou
James 40 ShenZhen

2.Dataframe

2.1 columns

add column

dict = {'name':["Tom", "Bob", "Mary", "James"],
        'age': [18, 30, 25, 40],
        'city':["Beijing", "ShangHai","GuangZhou", "ShenZhen"]}

df = pd.DataFrame(dict)
df
age city name
0 18 Beijing Tom
1 30 ShangHai Bob
2 25 GuangZhou Mary
3 40 ShenZhen James
df['country'] = 'USA'
df
age city name country
0 18 Beijing Tom USA
1 30 ShangHai Bob USA
2 25 GuangZhou Mary USA
3 40 ShenZhen James USA
df['adress'] = df['country']
df
age city name country adress
0 18 Beijing Tom USA USA
1 30 ShangHai Bob USA USA
2 25 GuangZhou Mary USA USA
3 40 ShenZhen James USA USA

Change column values

df['country'] = 'China'
df
age city name country adress
0 18 Beijing Tom China USA
1 30 ShangHai Bob China USA
2 25 GuangZhou Mary China USA
3 40 ShenZhen James China USA
df['adress'] = df['city']+','+ df['country']
df
age city name country adress
0 18 Beijing Tom China Beijing,China
1 30 ShangHai Bob China ShangHai,China
2 25 GuangZhou Mary China GuangZhou,China
3 40 ShenZhen James China ShenZhen,China

Delete columns

df.drop('country',axis=1, inplace=True)
del df['city']
df
age name adress
0 18 Tom Beijing,China
1 30 Bob ShangHai,China
2 25 Mary GuangZhou,China
3 40 James ShenZhen,China

Select columns

df['age']
0    18
1    30
2    25
3    40
Name: age, dtype: int64
df.name
0      Tom
1      Bob
2     Mary
3    James
Name: name, dtype: object
df[['age','name']]
age name
0 18 Tom
1 30 Bob
2 25 Mary
3 40 James
df.columns
Index([u'age', u'name', u'adress'], dtype='object')
# df.columns = ['Age','Name','Adress']
# df
# df.rename(index = str, columns = {'age':'Age','name':'Name','adress':'Adress'})
df.rename(str.capitalize, axis='columns',inplace =True)
df
Age Name Adress
0 18 Tom Beijing,China
1 30 Bob ShangHai,China
2 25 Mary GuangZhou,China
3 40 James ShenZhen,China

Set column value with conditions

df['Group'] = 'elderly'
df.loc[df['Age']<=18,'Group']='young'
df.loc[(df['Age'] >18) & (df['Age'] <= 30),'Group']='middle_aged'
df
Age Name Adress Group
0 18 Tom Beijing,China young
1 30 Bob ShangHai,China middle_aged
2 25 Mary GuangZhou,China middle_aged
3 40 James ShenZhen,China elderly