Subset/select rows and columns in a pandas dataframe

import pandas as pd

df = pd.DataFrame({'Date':['10/2/2011', '10/2/2011', '11/2/2011', '12/2/2011', '13/2/2011'],
                   'Product':['umbrella', 'matress', 'badminton', 'shuttle', 'jacket'],
                   'Updated_Price':[880, 1250, 1450, 1550, 400],
                   'Discount':[10, 8, 20, 15, 10]})
df

	Date	Product	Updated_Price	Discount
0	10/2/2011	umbrella	880	10
1	10/2/2011	matress	1250	8
2	11/2/2011	badminton	1450	20
3	12/2/2011	shuttle	1550	15
4	13/2/2011	jacket	400	10

subset columns with column names

df[['Product', 'Discount']]

	Product	Discount
0	umbrella	10
1	matress	8
2	badminton	20
3	shuttle	15
4	jacket	10

subset columns with column indexes

df.iloc[:, [1,3]]

	Product	Discount
0	umbrella	10
1	matress	8
2	badminton	20
3	shuttle	15
4	jacket	10

subset rows with row indexes

df.iloc[[1,3], :]

	Date	Product	Updated_Price	Discount
1	10/2/2011	matress	1250	8
3	12/2/2011	shuttle	1550	15

select rows that contain a particular value

#perfect match
df[df['Product'] == 'shuttle']

	Date	Product	Updated_Price	Discount
3	12/2/2011	shuttle	1550	15

#partial match
df[df['Product'].str.contains("ll", na=False)]

	Date	Product	Updated_Price	Discount
0	10/2/2011	umbrella	880	10

select rows based on a condition

df[df['Updated_Price'] > 1000]

	Date	Product	Updated_Price	Discount
1	10/2/2011	matress	1250	8
2	11/2/2011	badminton	1450	20
3	12/2/2011	shuttle	1550	15

select both rows and columns with indexes

df.iloc[0:2, 1:3]

	Product	Updated_Price
0	umbrella	880
1	matress	1250

select every nth row

df.iloc[::2, :] #every 2nd row

	Date	Product	Updated_Price	Discount
0	10/2/2011	umbrella	880	10
2	11/2/2011	badminton	1450	20
4	13/2/2011	jacket	400	10

select columns of a certain data type

df.select_dtypes(include = "number")

	Updated_Price	Discount
0	880	10
1	1250	8
2	1450	20
3	1550	15
4	400	10

select rows that do (not) exist in another dataset

df2 = pd.DataFrame({'Date':['10/2/2011', '9/2/2011', '11/2/2011'],
                   'Product':['umbrella', 'couch', 'badminton'],
                   'Updated_Price':[880, 1750, 1450],
                   'Discount':[10, 9, 20]})

df[~df.isin(df2).all(1)] #remove ~ if you want to select rows that do exist in df2

	Date	Product	Updated_Price	Discount
1	10/2/2011	matress	1250	8
3	12/2/2011	shuttle	1550	15
4	13/2/2011	jacket	400	10