Linguistics PhD
import pandas as pd
df = pd.DataFrame({'Date':['10/2/2011', '10/2/2011', '11/2/2011', '12/2/2011', '13/2/2011'],
'Product':['umbrella', 'matress', 'badminton', 'shuttle', 'jacket'],
'Updated_Price':[880, 1250, 1450, 1550, 400],
'Discount':[10, 8, 20, 15, 10]})
df
Date | Product | Updated_Price | Discount | |
---|---|---|---|---|
0 | 10/2/2011 | umbrella | 880 | 10 |
1 | 10/2/2011 | matress | 1250 | 8 |
2 | 11/2/2011 | badminton | 1450 | 20 |
3 | 12/2/2011 | shuttle | 1550 | 15 |
4 | 13/2/2011 | jacket | 400 | 10 |
df[['Product', 'Discount']]
Product | Discount | |
---|---|---|
0 | umbrella | 10 |
1 | matress | 8 |
2 | badminton | 20 |
3 | shuttle | 15 |
4 | jacket | 10 |
df.iloc[:, [1,3]]
Product | Discount | |
---|---|---|
0 | umbrella | 10 |
1 | matress | 8 |
2 | badminton | 20 |
3 | shuttle | 15 |
4 | jacket | 10 |
df.iloc[[1,3], :]
Date | Product | Updated_Price | Discount | |
---|---|---|---|---|
1 | 10/2/2011 | matress | 1250 | 8 |
3 | 12/2/2011 | shuttle | 1550 | 15 |
#perfect match
df[df['Product'] == 'shuttle']
Date | Product | Updated_Price | Discount | |
---|---|---|---|---|
3 | 12/2/2011 | shuttle | 1550 | 15 |
#partial match
df[df['Product'].str.contains("ll", na=False)]
Date | Product | Updated_Price | Discount | |
---|---|---|---|---|
0 | 10/2/2011 | umbrella | 880 | 10 |
df[df['Updated_Price'] > 1000]
Date | Product | Updated_Price | Discount | |
---|---|---|---|---|
1 | 10/2/2011 | matress | 1250 | 8 |
2 | 11/2/2011 | badminton | 1450 | 20 |
3 | 12/2/2011 | shuttle | 1550 | 15 |
df.iloc[0:2, 1:3]
Product | Updated_Price | |
---|---|---|
0 | umbrella | 880 |
1 | matress | 1250 |
df.iloc[::2, :] #every 2nd row
Date | Product | Updated_Price | Discount | |
---|---|---|---|---|
0 | 10/2/2011 | umbrella | 880 | 10 |
2 | 11/2/2011 | badminton | 1450 | 20 |
4 | 13/2/2011 | jacket | 400 | 10 |
df.select_dtypes(include = "number")
Updated_Price | Discount | |
---|---|---|
0 | 880 | 10 |
1 | 1250 | 8 |
2 | 1450 | 20 |
3 | 1550 | 15 |
4 | 400 | 10 |
df2 = pd.DataFrame({'Date':['10/2/2011', '9/2/2011', '11/2/2011'],
'Product':['umbrella', 'couch', 'badminton'],
'Updated_Price':[880, 1750, 1450],
'Discount':[10, 9, 20]})
df[~df.isin(df2).all(1)] #remove ~ if you want to select rows that do exist in df2
Date | Product | Updated_Price | Discount | |
---|---|---|---|---|
1 | 10/2/2011 | matress | 1250 | 8 |
3 | 12/2/2011 | shuttle | 1550 | 15 |
4 | 13/2/2011 | jacket | 400 | 10 |