微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

pandas tricks

# Check for equality
# 创建DataFrame
df = pd.DataFrame({'a':[10, 40, np.nan], 'b':[10, 40, np.nan]})
print('data:\n', df)
print()
# 查看a列与b列是否相同
print('df.a == df.b:')
print(df.a == df.b)
print()
# 查看两个空值是否相同,返回False
print('np.nan == np.nan:')
print(np.nan == np.nan)
# 可以用equals()方法
print()
print('df.a.equals(df.b):')
print(df.a.equals(df.b))
print()
# 可以使用assert_series_equal函数
print('pd.testing.assert_series_equal(df.a, df.b, check_names=False, check_dtype=False):')
print(pd.testing.assert_series_equal(df.a, df.b, check_names=False, check_dtype=False))
print()
# assert_frame_equal函数查看是否相同,异常则输出
df_new = df.copy()
pd.testing.assert_frame_equal(df, df_new)
# Use NumPy without importing NumPy
pd.np.random.seed(0)
d1 = pd.DataFrame(pd.np.random.rand(2, 4))
print('d1:\n', d1)
d1.loc[0,0] = pd.np.nan
print('d1:\n', d1)
# Calculate memory usage
df.info(memory_usage='deep')
# calculate memory used by each column
df.memory_usage(deep=True)
# Convert one set of values to another
df['c'] = df.a.factorize()[0]
print(df)
df = pd.DataFrame([[12, 25, 2019, 'christmas'], [11, 28, 2019, 'thanksgiving']],
columns=['month', 'day', 'year', 'holiday'])
print(df)
df['date'] = pd.to_datetime(df[['month', 'day', 'year']])
print(df)
# Create an example DataFrame
pd.util.testing.makeDataFrame().head()
pd.util.testing.makeMissingDataframe().head()
df = pd.util.testing.makeTimeDataFrame().head()
df.resample('M').A.mean()
df = pd.util.testing.makeTimeDataFrame().head()
df.reset_index(inplace=True)
df.resample('D', on='index').A.mean()
# 保持CSV可以压缩
df.to_csv('dataframe.csv.zip')
df.to_csv('dataframe.csv.gz')
df.to_csv('dataframe.csv.bz2')
df.to_csv('dataframe.csv.xz')
# Fill missing values using interpolation
df = pd.DataFrame({'a':[100, 120, 130, np.nan, 140], 'b':[9, 9, np.nan, 7.5, 6.5]})
df.index = pd.to_datetime(['2019-01', '2019-02', '2019-03', '2019-04', '2019-05'])
df
df.interpolate()
# Check for duplicate merge keys
left = pd.DataFrame({'color': ['green', 'yellow', 'red'], 'num':[1, 2, 3]})
left
right = pd.DataFrame({'color': ['green', 'yellow', 'pink', 'green'], 'size':['S', 'M', 'L', 'XL']})
right
pd.merge(left, right, how='inner', validate='one_to_many')
# 创建其他数据集方法
[x for x in dir(pd.util.testing) if x.startswith('make')]

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。

相关推荐