Python pandas 模块,IndexSlice() 实例源码
我们从Python开源项目中,提取了以下23个代码示例,用于说明如何使用pandas.IndexSlice()。
def calc_shared(self, label):
"""
Get the subset of scores that are shared across all Selections in each
Condition.
"""
if self.check_store("/main/{}/scores_shared".format(label)):
return
idx = pd.IndexSlice
logging.info("Identifying subset shared across all Selections ({})"
"".format(label), extra={'oname': self.name})
data = self.store.select("/main/{}/scores_shared_full".format(label))
# identify variants found in all selections in at least one condition
complete = np.full(len(data.index), False, dtype=bool)
for cnd in data.columns.levels[0]:
complete = np.logical_or(complete,
data.loc[:, idx[cnd, :, :]].notnull().all(
axis='columns'))
data = data.loc[complete]
self.store.put("/main/{}/scores_shared".format(label), data,
format="table")
def applymap(self, func, subset=None, **kwargs):
"""
Apply a function elementwise,updating the HTML
representation with the result.
.. versionadded:: 0.17.1
Parameters
----------
func : function
subset : IndexSlice
a valid indexer to limit ``data`` to *before* applying the
function. Consider using a pandas.IndexSlice
kwargs : dict
pass along to ``func``
Returns
-------
self : Styler
"""
self._todo.append((lambda instance: getattr(instance, '_applymap'),
(func, subset), kwargs))
return self
def highlight_max(self, color='yellow', axis=0):
"""
Highlight the maximum by shading the background
.. versionadded:: 0.17.1
Parameters
----------
subset: IndexSlice,default None
a valid slice for ``data`` to limit the style application to
color: str,default 'yellow'
axis: int,str,or None; default None
0 or 'index' for columnwise,1 or 'columns' for rowwise
or ``None`` for tablewise (the default)
Returns
-------
self : Styler
"""
return self._highlight_handler(subset=subset, color=color, axis=axis,
max_=True)
def highlight_min(self, axis=0):
"""
Highlight the minimum by shading the background
.. versionadded:: 0.17.1
Parameters
----------
subset: IndexSlice,
max_=False)
def test_slice_with_negative_step(self):
ts = Series(np.arange(20),
period_range('2014-01', periods=20, freq='M'))
SLC = pd.IndexSlice
def assert_slices_equivalent(l_slc, i_slc):
assert_series_equal(ts[l_slc], ts.iloc[i_slc])
assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
assert_series_equal(ts.ix[l_slc], ts.iloc[i_slc])
assert_slices_equivalent(SLC[Period('2014-10')::-1], SLC[9::-1])
assert_slices_equivalent(SLC['2014-10'::-1], SLC[9::-1])
assert_slices_equivalent(SLC[:Period('2014-10'):-1], SLC[:8:-1])
assert_slices_equivalent(SLC[:'2014-10':-1], SLC[:8:-1])
assert_slices_equivalent(SLC['2015-02':'2014-10':-1], SLC[13:8:-1])
assert_slices_equivalent(SLC[Period('2015-02'):Period('2014-10'):-1],
SLC[13:8:-1])
assert_slices_equivalent(SLC['2015-02':Period('2014-10'):-1],
SLC[13:8:-1])
assert_slices_equivalent(SLC[Period('2015-02'):'2014-10':-1],
SLC[13:8:-1])
assert_slices_equivalent(SLC['2014-10':'2015-02':-1], SLC[:0])
def test_slice_with_negative_step(self):
ts = Series(np.arange(20), timedelta_range('0', freq='H'))
SLC = pd.IndexSlice
def assert_slices_equivalent(l_slc, ts.iloc[i_slc])
assert_slices_equivalent(SLC[timedelta(hours=7)::-1], SLC[7::-1])
assert_slices_equivalent(SLC['7 hours'::-1], SLC[7::-1])
assert_slices_equivalent(SLC[:timedelta(hours=7):-1], SLC[:6:-1])
assert_slices_equivalent(SLC[:'7 hours':-1], SLC[:6:-1])
assert_slices_equivalent(SLC['15 hours':'7 hours':-1], SLC[15:6:-1])
assert_slices_equivalent(SLC[timedelta(hours=15):timedelta(hours=7):-
1], SLC[15:6:-1])
assert_slices_equivalent(SLC['15 hours':timedelta(hours=7):-1],
SLC[15:6:-1])
assert_slices_equivalent(SLC[timedelta(hours=15):'7 hours':-1],
SLC[15:6:-1])
assert_slices_equivalent(SLC['7 hours':'15 hours':-1], SLC[:0])
def test_applymap_subset(self):
def f(x):
return 'foo: bar'
slices = [pd.IndexSlice[:], pd.IndexSlice[:, ['A']],
pd.IndexSlice[[1], :], pd.IndexSlice[[1],
pd.IndexSlice[:2, ['A', 'B']]]
for slice_ in slices:
result = self.df.style.applymap(f, subset=slice_)._compute().ctx
expected = dict(((r, c), ['foo: bar'])
for r, row in enumerate(self.df.index)
for c, col in enumerate(self.df.columns)
if row in self.df.loc[slice_].index and
col in self.df.loc[slice_].columns)
self.assertEqual(result, expected)
def _member_to_beacon_proximity(m2badge, beacons):
"""Creates a member-to-beacon proximity DataFrame from member-to-badge proximity data.
Parameters
----------
m2badge : pd.DataFrame
The member-to-badge proximity data,as returned by `member_to_badge_proximity`.
beacons : list of str
A list of beacon ids.
Returns
-------
pd.DataFrame :
The member-to-member proximity data.
"""
df = m2badge.copy()
# Rename 'observed_id' to 'beacon'
df = df.rename_axis(['datetime', 'member', 'beacon'])
# Filter out ids that are not in `beacons`
return df.loc[pd.IndexSlice[:, beacons],:]
def apply(self, axis=0, **kwargs):
"""
Apply a function column-wise,row-wise,or table-wase,
updating the HTML representation with the result.
.. versionadded:: 0.17.1
Parameters
----------
func: function
axis: int,str or None
apply to each column (``axis=0`` or ``'index'``)
or to each row (``axis=1`` or ``'columns'``) or
to the entire DataFrame at once with ``axis=None``.
subset: IndexSlice
a valid indexer to limit ``data`` to *before* applying the
function. Consider using a pandas.IndexSlice
kwargs: dict
pass along to ``func``
Returns
-------
self : Styler
Notes
-----
This is similar to ``DataFrame.apply``,except that ``axis=None``
applies the function to the entire DataFrame at once,
rather than column-wise or row-wise.
"""
self._todo.append((lambda instance: getattr(instance, '_apply'), axis, kwargs))
return self
def background_gradient(self, cmap='PuBu', low=0, high=0,
subset=None):
"""
Color the background in a gradient according to
the data in each column (optionally row).
Requires matplotlib.
.. versionadded:: 0.17.1
Parameters
----------
cmap: str or colormap
matplotlib colormap
low,high: float
compress the range by these values.
axis: int or str
1 or 'columns' for colunwise,0 or 'index' for rowwise
subset: IndexSlice
a valid slice for ``data`` to limit the style application to
Returns
-------
self : Styler
Notes
-----
Tune ``low`` and ``high`` to keep the text legible by
not using the entire range of the color map. These extend
the range of the data by ``low * (x.max() - x.min())``
and ``high * (x.max() - x.min())`` before normalizing.
"""
subset = _maybe_numeric_slice(self.data, subset)
subset = _non_reducing_slice(subset)
self.apply(self._background_gradient, cmap=cmap, subset=subset,
axis=axis, low=low, high=high)
return self
def set_properties(self, **kwargs):
"""
Convience method for setting one or more non-data dependent
properties or each cell.
.. versionadded:: 0.17.1
Parameters
----------
subset: IndexSlice
a valid slice for ``data`` to limit the style application to
kwargs: dict
property: value pairs to be set for each cell
Returns
-------
self : Styler
Examples
--------
>>> df = pd.DataFrame(np.random.randn(10,4))
>>> df.style.set_properties(color="white",align="right")
"""
values = ';'.join('{p}: {v}'.format(p=p, v=v)
for p, v in kwargs.items())
f = lambda x: values
return self.applymap(f, subset=subset)
def test_slice_with_negative_step(self):
ts = Series(np.arange(20),
date_range('2014-01-01', freq='MS'))
SLC = pd.IndexSlice
def assert_slices_equivalent(l_slc, ts.iloc[i_slc])
assert_slices_equivalent(SLC[Timestamp('2014-10-01')::-1], SLC[9::-1])
assert_slices_equivalent(SLC['2014-10-01'::-1], SLC[9::-1])
assert_slices_equivalent(SLC[:Timestamp('2014-10-01'):-1], SLC[:8:-1])
assert_slices_equivalent(SLC[:'2014-10-01':-1], SLC[:8:-1])
assert_slices_equivalent(SLC['2015-02-01':'2014-10-01':-1],
SLC[13:8:-1])
assert_slices_equivalent(SLC[Timestamp('2015-02-01'):Timestamp(
'2014-10-01'):-1], SLC[13:8:-1])
assert_slices_equivalent(SLC['2015-02-01':Timestamp('2014-10-01'):-1],
SLC[13:8:-1])
assert_slices_equivalent(SLC[Timestamp('2015-02-01'):'2014-10-01':-1],
SLC[13:8:-1])
assert_slices_equivalent(SLC['2014-10-01':'2015-02-01':-1], SLC[:0])
def test_set_properties_subset(self):
df = pd.DataFrame({'A': [0, 1]})
result = df.style.set_properties(subset=pd.IndexSlice[0, 'A'],
color='white')._compute().ctx
expected = {(0, 0): ['color: white']}
self.assertEqual(result, expected)
def test_apply_subset(self):
axes = [0, 1]
slices = [pd.IndexSlice[:], 'B']]]
for ax in axes:
for slice_ in slices:
result = self.df.style.apply(self.h, axis=ax, subset=slice_,
foo='baz')._compute().ctx
expected = dict(((r, ['color: baz'])
for r, row in enumerate(self.df.index)
for c, col in enumerate(self.df.columns)
if row in self.df.loc[slice_].index and
col in self.df.loc[slice_].columns)
self.assertEqual(result, expected)
def test_display_subset(self):
df = pd.DataFrame([[.1234, .1234], [1.1234, 1.1234]],
columns=['a', 'b'])
ctx = df.style.format({"a": "{:0.1f}", "b": "{0:.2%}"},
subset=pd.IndexSlice[0, :])._translate()
expected = '0.1'
self.assertEqual(ctx['body'][0][1]['display_value'], expected)
self.assertEqual(ctx['body'][1][1]['display_value'], '1.1234')
self.assertEqual(ctx['body'][0][2]['display_value'], '12.34%')
raw_11 = '1.1234'
ctx = df.style.format("{:0.1f}", :])._translate()
self.assertEqual(ctx['body'][0][1]['display_value'], raw_11)
ctx = df.style.format("{:0.1f}",
subset=pd.IndexSlice['a'])._translate()
self.assertEqual(ctx['body'][0][1]['display_value'], expected)
self.assertEqual(ctx['body'][0][2]['display_value'], '0.1234')
ctx = df.style.format("{:0.1f}", 'a'])._translate()
self.assertEqual(ctx['body'][0][1]['display_value'],
subset=pd.IndexSlice[[0, 1], ['a']])._translate()
self.assertEqual(ctx['body'][0][1]['display_value'], '1.1')
self.assertEqual(ctx['body'][0][2]['display_value'], '0.1234')
self.assertEqual(ctx['body'][1][2]['display_value'], '1.1234')
def test_background_gradient(self):
df = pd.DataFrame([[1, 2], [2, 4]], columns=['A', 'B'])
for axis in [0, 1, 'index', 'columns']:
for cmap in [None, 'YlOrRd']:
result = df.style.background_gradient(cmap=cmap)._compute().ctx
self.assertTrue(all("#" in x[0] for x in result.values()))
self.assertEqual(result[(0, 0)], result[(0, 1)])
self.assertEqual(result[(1, result[(1, 1)])
result = (df.style.background_gradient(subset=pd.IndexSlice[1, 'A'])
._compute().ctx)
self.assertEqual(result[(1, ['background-color: #fff7fb'])
def calc_pvalues_wt(self, label):
"""
Calculate uncorrected pvalue for each variant compared to wild type.
"""
if self.check_store("/main/{}/scores_pvalues_wt".format(label)):
return
idx = pd.IndexSlice
wt = self.store.select("/main/{}/scores".format(label),
"index=WILD_TYPE_VARIANT")
if len(wt) == 0: # no wild type score
logging.info("Failed to find wild type score,skipping wild type "
"p-value calculations", extra={'oname': self.name})
return
data = self.store.select("/main/{}/scores".format(label),
"index!=WILD_TYPE_VARIANT")
columns = pd.MultiIndex.from_product([sorted(self.child_names()),
sorted(["z", "pvalue_raw"])],
names=["condition", "value"])
result_df = pd.DataFrame(index=data.index, columns=columns)
condition_labels = data.columns.levels[0]
for cnd in condition_labels:
result_df.loc[:, 'z']] = \
np.absolute(wt.loc[WILD_TYPE_VARIANT, 'score']] -
data.loc[:, 'score']]) / \
np.sqrt(wt.loc[WILD_TYPE_VARIANT, 'SE']] ** 2 +
data.loc[:, 'SE']] ** 2)
result_df.loc[:, 'pvalue_raw']] = \
2 * stats.norm.sf(result_df.loc[:, 'z']])
self.store.put("/main/{}/scores_pvalues_wt".format(label), result_df,
format="table")
def calc_pvalues_pairwise(self, label):
"""
Calculate pvalues for each variant in each pair of Conditions.
"""
if self.check_store("/main/{}/scores_pvalues".format(label)):
return
data = self.store['/main/{}/scores'.format(label)]
cnd1_index = list()
cnd2_index = list()
values_index = list()
values_list = ["z", "pvalue_raw"]
condition_labels = data.columns.levels[0]
for i, cnd1 in enumerate(condition_labels):
for cnd2 in condition_labels[i + 1:]:
cnd1_index.extend([cnd1] * len(values_list))
cnd2_index.extend([cnd2] * len(values_list))
values_index.extend(sorted(values_list))
columns = pd.MultiIndex.from_tuples(zip(cnd1_index, cnd2_index,
values_index),
names=["condition1", "condition2",
"value"])
idx = pd.IndexSlice
result_df = pd.DataFrame(np.nan, index=data.index, columns=columns)
for i, cnd1 in enumerate(condition_labels):
for cnd2 in condition_labels[i + 1:]:
result_df.loc[:, idx[cnd1, cnd2, 'z']] = \
np.absolute(data.loc[:, 'score']] -
data.loc[:, idx[cnd2, 'score']]) / \
np.sqrt(data.loc[:, 'SE']] ** 2 +
data.loc[:, 'SE']] ** 2)
result_df.loc[:, 'pvalue_raw']] = \
2 * stats.norm.sf(result_df.loc[:, 'z']])
self.store.put("/main/{}/scores_pvalues".format(label),
format="table")
def crack_egg(egg, subjects=None, lists=None):
'''
Takes an egg and returns a subset of the subjects or lists
Parameters
----------
egg : Egg data object
Egg that you want to crack
subjects : list
List of subject idxs
lists : list
List of lists idxs
Returns
----------
new_egg : Egg data object
A sliced egg,good on a salad
'''
from .egg import Egg
all_have_features = egg.features is not None
opts = {}
if subjects is None:
subjects = egg.pres.index.levels[0].values.tolist()
elif type(subjects) is not list:
subjects = list(subjects)
if lists is None:
lists = egg.pres.index.levels[1].values.tolist()
elif type(lists) is not list:
lists = list(lists)
idx = pd.IndexSlice
pres = egg.pres.loc[idx[subjects,lists],egg.pres.columns]
rec = egg.rec.loc[idx[subjects,egg.rec.columns]
pres = [pres.loc[sub,:].values.tolist() for sub in subjects]
rec = [rec.loc[sub,:].values.tolist() for sub in subjects]
if all_have_features:
features = egg.features.loc[idx[subjects,egg.features.columns]
opts['features'] = [features.loc[sub,:].values.tolist() for sub in subjects]
return Egg(pres=pres, rec=rec, **opts)
def calc_scores(self, label):
"""
Combine the scores and standard errors within each condition.
"""
if self.check_store("/main/{}/scores".format(label)):
return
logging.info("Calculating per-condition scores ({})".format(label),
extra={'oname': self.name})
# set up new data frame
shared_index = self.store.select("/main/{}/scores_shared"
"".format(label),
"columns='index'").index
columns = pd.MultiIndex.from_product([sorted(self.child_names()),
sorted(["score", "SE",
"epsilon"])], "value"])
data = pd.DataFrame(np.nan, index=shared_index, columns=columns)
del shared_index
del columns
# set up local variables
idx = pd.IndexSlice
score_df = self.store.select("/main/{}/scores_shared".format(label))
if self.scoring_method == "simple":
# special case for simple ratios that have no SE
# calculates the average score
for cnd in score_df.columns.levels[0]:
data.loc[:, 'score']] = \
score_df.loc[:, 'score']].mean(axis=1)
else:
for cnd in score_df.columns.levels[0]:
y = np.array(score_df.loc[:, 'score']].values).T
sigma2i = \
np.array(score_df.loc[:, 'SE']].values ** 2).T
# single replicate of the condition
if y.shape[0] == 1:
data.loc[:, 'score']] = y.ravel()
data.loc[:, 'SE']] = np.sqrt(sigma2i).ravel()
data.loc[:, 'epsilon']] = 0.
# multiple replicates
else:
betaML, var_betaML, eps = rml_estimator(y, sigma2i)
data.loc[:, 'score']] = betaML
data.loc[:, 'SE']] = np.sqrt(var_betaML)
data.loc[:, 'epsilon']] = eps
# special case for normalized wild type variant
if self.logr_method == "wt" and WILD_TYPE_VARIANT in \
data.index:
data.loc[WILD_TYPE_VARIANT, idx[:, 'SE']] = 0.
data.loc[WILD_TYPE_VARIANT, 'score']] = 0.
data.loc[WILD_TYPE_VARIANT, 'epsilon']] = 0.
# store the data
self.store.put("/main/{}/scores".format(label), format="table")
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。