1、创建 DataFrame
import pandas as pd data = [['张三', 21, '男'], ['李四', 26, '女'], ['王五', 33, '男']]df = pd.DataFrame(data, columns=['姓名', '年龄', '性别'])print(df)
2、DataFrame中添加数据
1)添加一列
import pandas as pd data = [['张三', 21, '男'], ['李四', 26, '女'], ['王五', 33, '男']]df = pd.DataFrame(data, columns=['姓名', '年龄', '性别'])#方法一#添加空列df['住址'] = None#添加非空列df['体重'] =['60kg','50kg','66kg']#方法二df2 = df.assign(address = ['地址1','地址2','地址3'])print(df2)
2)添加一行
import pandas as pd data = [['张三', 21, '男'], ['李四', 26, '女'], ['王五', 33, '男']]df = pd.DataFrame(data, columns=['姓名', '年龄', '性别'])#方法一df.loc[len(df.index)] = ['亮亮', 30, '男'] print(df)#方法二dfi = pd.DataFrame([['小红', 18, '女']], columns=['姓名', '年龄', '性别'])df2 = pd.concat([df, dfi])df2.reset_index()print(df2)
3、删除DataFrame中数据
import pandas as pd data = [['张三', 21, '男'], ['李四', 26, '女'], ['王五', 33, '男']]df = pd.DataFrame(data, columns=['姓名', '年龄', '性别'], index=['a', 'b', 'c'])print(df)#删除列print(df.drop('性别',axis=1))#inplace=True是不创建新的对象,直接对原始对象进行修改#df.drop('column_name', axis=1, inplace=True)#使用del删除列#del df['column_name']#删除一行#df.drop('c')#删除多行df1 = df.drop(index=['a','c'])print(df1)
4、修改DataFrame中数据
import pandas as pd data = [['张三', 21, '男'], ['李四', 26, '女'], ['王五', 33, '男']]df = pd.DataFrame(data, columns=['姓名', '年龄', '性别'])print(df)#修改行名df1 = df.rename(index={2: "b", 3: "c"}) print(df1)#修改列名df2 = df.rename(columns={"年龄": "[年龄]"})print(df2)#通过行名修改行数据df.loc[1,:] = ["小李", 22, "女"] print(df)#通过列名修改列数据df.loc[ :,"性别" ] = ["男", "男", "男"] print(df)
5、查询DataFrame中数据
1)要查询列值等于some_value
df.loc[df['column_name'] == some_value]
2)要查询列包含在可迭代的some_values
df.loc[df['column_name'].isin(some_values)]
3)查询多个条件可以使用&
df.loc[(df['column_name'] >= A) & (df['column_name'] <= B)]
4)要查询列值不等于some_value
df.loc[df['column_name'] != some_value]
5)isin返回布尔值是在其中的条件,不在其中可以使用~
df.loc[~df['column_name'].isin(some_values)]
例如,
import pandas as pd data = [['张三', 21, '男'], ['李四', 26, '女'], ['王五', 33, '男']]df = pd.DataFrame(data, columns=['姓名', '年龄', '性别'])print(df.loc[df['性别'] == '男'])