Python pandas 模块,to_timedelta() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pandas.to_timedelta()。
def _wrap_result(self, result, block=None, obj=None):
""" wrap a single result """
if obj is None:
obj = self._selected_obj
if isinstance(result, np.ndarray):
# coerce if necessary
if block is not None:
if com.is_timedelta64_dtype(block.values.dtype):
result = pd.to_timedelta(
result.ravel(), unit='ns').values.reshape(result.shape)
if result.ndim == 1:
from pandas import Series
return Series(result, obj.index, name=obj.name)
return type(obj)(result, index=obj.index, columns=block.columns)
return result
def test_to_timedelta_invalid(self):
# these will error
self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo'))
self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo'))
# time not supported ATM
self.assertRaises(ValueError, lambda: to_timedelta(time(second=1)))
self.assertTrue(to_timedelta(
time(second=1), errors='coerce') is pd.NaT)
self.assertRaises(ValueError, lambda: to_timedelta(['foo', 'bar']))
tm.assert_index_equal(timedeltaIndex([pd.NaT, pd.NaT]),
to_timedelta(['foo', 'bar'], errors='coerce'))
tm.assert_index_equal(timedeltaIndex(['1 day', pd.NaT, '1 min']),
to_timedelta(['1 day', 'bar', '1 min'],
errors='coerce'))
def test_to_timedelta_on_missing_values(self):
# GH5438
timedelta_NaT = np.timedelta64('NaT')
actual = pd.to_timedelta(Series(['00:00:01', np.nan]))
expected = Series([np.timedelta64(1000000000, 'ns'),
timedelta_NaT], dtype='<m8[ns]')
assert_series_equal(actual, expected)
actual = pd.to_timedelta(Series(['00:00:01', pd.NaT]))
assert_series_equal(actual, expected)
actual = pd.to_timedelta(np.nan)
self.assertEqual(actual.value, timedelta_NaT.astype('int64'))
actual = pd.to_timedelta(pd.NaT)
self.assertEqual(actual.value, timedelta_NaT.astype('int64'))
def test_apply_to_timedelta(self):
timedelta_NaT = pd.to_timedelta('NaT')
list_of_valid_strings = ['00:00:01', '00:00:02']
a = pd.to_timedelta(list_of_valid_strings)
b = Series(list_of_valid_strings).apply(pd.to_timedelta)
# Can't compare until apply on a Series gives the correct dtype
# assert_series_equal(a,b)
list_of_strings = ['00:00:01', np.nan, timedelta_NaT]
# Todo: unused?
a = pd.to_timedelta(list_of_strings) # noqa
b = Series(list_of_strings).apply(pd.to_timedelta) # noqa
# Can't compare until apply on a Series gives the correct dtype
# assert_series_equal(a,b)
def test_get_indexer(self):
idx = pd.date_range('2000-01-01', periods=3)
tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2])
target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours',
'1 day 1 hour'])
tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1])
tm.assert_numpy_array_equal(
idx.get_indexer(target, 'backfill'), 2])
tm.assert_numpy_array_equal(
idx.get_indexer(target, 'nearest'), 'nearest',
tolerance=pd.timedelta('1 hour')),
[0, -1, 1])
with tm.assertRaises(ValueError):
idx.get_indexer(idx[[0]], method='nearest', tolerance='foo')
def test_get_loc(self):
idx = pd.to_timedelta(['0 days', '1 days', '2 days'])
for method in [None, 'pad', 'backfill', 'nearest']:
self.assertEqual(idx.get_loc(idx[1], method), 1)
self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), 1)
self.assertEqual(idx.get_loc(str(idx[1]), 1)
self.assertEqual(
idx.get_loc(idx[1], tolerance=pd.timedelta(0)), 1)
self.assertEqual(
idx.get_loc(idx[1], tolerance=np.timedelta64(0, 's')), 1)
self.assertEqual(idx.get_loc(idx[1], tolerance=timedelta(0)), 1)
with tm.assertRaisesRegexp(ValueError, 'must be convertible'):
idx.get_loc(idx[1], tolerance='foo')
for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]:
self.assertEqual(idx.get_loc('1 day 1 hour', loc)
def test_cummax_timedelta64(self):
s = pd.Series(pd.to_timedelta(['NaT',
'2 min',
'NaT',
'1 min',
'3 min', ]))
expected = pd.Series(pd.to_timedelta(['NaT',
'2 min',
'NaT',
'3 min', ]))
result = s.cummax(skipna=True)
self.assert_series_equal(expected, result)
expected = pd.Series(pd.to_timedelta(['NaT', ]))
result = s.cummax(skipna=False)
self.assert_series_equal(expected, result)
def test_constructor_dict_timedelta_index(self):
# GH #12169 : Resample category data with timedelta index
# construct Series from dict as data and timedeltaIndex as index
# will result NaN in result Series data
expected = Series(
data=['A', 'B', 'C'],
index=pd.to_timedelta([0, 10, 20], unit='s')
)
result = Series(
data={pd.to_timedelta(0, unit='s'): 'A',
pd.to_timedelta(10, unit='s'): 'B',
pd.to_timedelta(20, unit='s'): 'C'}, unit='s')
)
# this should work
assert_series_equal(result, expected)
def test_timedelta64_dtype_array_returned(self):
# GH 9431
expected = np.array([31200, 45678, 10000], dtype='m8[ns]')
td_index = pd.to_timedelta([31200, 31200, 10000, 45678])
result = algos.unique(td_index)
tm.assert_numpy_array_equal(result, expected)
self.assertEqual(result.dtype, expected.dtype)
s = pd.Series(td_index)
result = algos.unique(s)
tm.assert_numpy_array_equal(result, expected.dtype)
arr = s.values
result = algos.unique(arr)
tm.assert_numpy_array_equal(result, expected.dtype)
def make_fea_set(sku_fea, user_fea, train_start_date, train_end_time,action_data):
start_days = "2016-02-01"
# generate ????
actions = None
for i in (1, 2, 3, 5, 7, 15, 21, 30):
#for i in (1,2,3,5,7,10,15,21,30):
start_time = train_end_time - pd.to_timedelta(str(i)+' days')
if actions is None:
actions = get_action_feat(start_time,action_data)
else:
actions = pd.merge(actions, get_action_feat(start_time,action_data), how='left',
on=['user_id', 'sku_id'])
actions = pd.merge(actions, on='user_id')
actions = pd.merge(actions, sku_fea, on='sku_id')
actions = actions.fillna(0)
print 'fea_weidu3',actions.shape
#actions.to_csv('test'+str(train_end_time).split(' ')[0]+'.csv')
return actions
def output(self):
'''
Generate data wrapper for Mahali temperatures
@return Mahali temperature data wrapper
'''
# Function to extract date from filename (only month/day/year,no hours/minutes/seconds)
def toDateTime(in_filename):
return pd.to_datetime(pd.to_datetime(in_filename[7:25]).strftime('%Y-%m-%d'))
# Read in file list:
mahali_temperature_info = resource_filename('skdaccess', os.path.join('support','mahali_temperature_info.txt'))
filenames = pd.read_csv(mahali_temperature_info,header=None,
names=('station','filename'),
skipinitialspace=True)
# Create a columns of dates
filenames['date'] = filenames['filename'].apply(toDateTime)
# Need to grab day before as data can spill over
adjusted_start_date = self.start_date - pd.to_timedelta('1d')
adjusted_end_date = self.end_date + pd.to_timedelta('1d')
station_list = self.ap_paramList[0]()
# Get data for each selected station one day before until one day afte requested date
index_to_retrieve = np.logical_and.reduce([filenames.loc[:, 'station'].apply(lambda x: x in station_list),
filenames.loc[:, 'date'] >= adjusted_start_date, 'date'] <= self.end_date])
all_temperature_data = self.retrieveOnlineData(filenames[index_to_retrieve])
# Due to data spillover,cut each data frame in dictionary
for station in all_temperature_data.keys():
all_temperature_data[station] = all_temperature_data[station].loc[adjusted_start_date:adjusted_end_date]
# Return table wrapper of data
return TableWrapper(all_temperature_data, default_columns = ['Temperature'])
def output(self):
'''
Generate data wrapper for USGS geomagnetic data
@return geomagnetic data wrapper
'''
observatory_list = self.ap_paramList[0]()
# USGS Edge server
base_url = 'cwbpub.cr.usgs.gov'
factory = EdgeFactory(host=base_url, port=2060)
data_dict = OrderedDict()
for observatory in observatory_list:
ret_data = factory.get_timeseries( observatory=observatory,
interval=self.interval,
type=self.data_type,
channels=self.channels,
starttime=UTCDateTime(self.start_time),
endtime=UTCDateTime(self.end_time))
obs_data = OrderedDict()
for label, trace in zip(self.channels, ret_data):
time = pd.to_datetime(trace.stats['starttime'].datetime) + pd.to_timedelta(trace.times(),unit='s')
obs_data[label] = pd.Series(trace.data,time)
data_dict[observatory] = pd.DataFrame(obs_data)
return TableWrapper(data_dict, default_columns=self.channels)
def dateMismatch(dates, days=10):
'''
Check if dates are not within a certain number of days of each other
@param dates: Iterable container of pandas timestamps
@param days: Number of days
@return true if they are not with 10 days,false otherwise
'''
for combo in combinations(dates,2):
if np.abs(combo[0] - combo[1]) > pd.to_timedelta(days, 'D'):
return True
return False
def convert_gps_time(gpsweek, gpsweekseconds, format='unix'):
"""
convert_gps_time :: (String -> String) -> Float
Converts a GPS time format (weeks + seconds since 6 Jan 1980) to a UNIX timestamp
(seconds since 1 Jan 1970) without correcting for UTC leap seconds.
Static values gps_delta and gpsweek_cf are defined by the below functions (optimization)
gps_delta is the time difference (in seconds) between UNIX time and GPS time.
gps_delta = (dt.datetime(1980,1,6) - dt.datetime(1970,1)).total_seconds()
gpsweek_cf is the coefficient to convert weeks to seconds
gpsweek_cf = 7 * 24 * 60 * 60 # 604800
:param gpsweek: Number of weeks since beginning of GPS time (1980-01-06 00:00:00)
:param gpsweekseconds: Number of seconds since the GPS week parameter
:return: (float) unix timestamp (number of seconds since 1970-01-01 00:00:00)
"""
# GPS time begins 1980 Jan 6 00:00,UNIX time begins 1970 Jan 1 00:00
gps_delta = 315964800.0
gpsweek_cf = 604800
if isinstance(gpsweek, pd.Series) and isinstance(gpsweekseconds, pd.Series):
gps_ticks = (gpsweek.astype('float64') * gpsweek_cf) + gpsweekseconds.astype('float64')
else:
gps_ticks = (float(gpsweek) * gpsweek_cf) + float(gpsweekseconds)
timestamp = gps_delta + gps_ticks
if format == 'unix':
return timestamp
elif format == 'datetime':
return datetime.datetime(1970, 1) + pd.to_timedelta(timestamp, unit='s')
def datenum_to_datetime(timestamp):
if isinstance(timestamp, pd.Series):
return (timestamp.astype(int).map(datetime.datetime.fromordinal) +
pd.to_timedelta(timestamp % 1, unit='D') -
pd.to_timedelta('366 days'))
else:
return (datetime.datetime.fromordinal(int(timestamp) - 366) +
datetime.timedelta(days=timestamp % 1))
def _convert_ulysses_time(data):
"""Method to convert timestamps to datetimes"""
data.loc[data['year'] > 50, 'year'] += 1900
data.loc[data['year'] < 50, 'year'] += 2000
data['Time'] = pd.to_datetime(data['year'].astype(str) + ':' +
data['doy'].astype(str),
format='%Y:%j')
data['Time'] += (pd.to_timedelta(data['hour'], unit='h') +
pd.to_timedelta(data['minute'], unit='m') +
pd.to_timedelta(data['second'], unit='s'))
data = data.drop(['year', 'doy', 'hour', 'minute', 'second'],
axis=1)
return data
def test_resample_with_timedeltas(self):
expected = DataFrame({'A': np.arange(1480)})
expected = expected.groupby(expected.index // 30).sum()
expected.index = pd.timedelta_range('0 days', freq='30T', periods=50)
df = DataFrame({'A': np.arange(1480)}, index=pd.to_timedelta(
np.arange(1480), unit='T'))
result = df.resample('30T').sum()
assert_frame_equal(result, expected)
s = df['A']
result = s.resample('30T').sum()
assert_series_equal(result, expected['A'])
def test_ops_ndarray(self):
td = timedelta('1 day')
# timedelta,timedelta
other = pd.to_timedelta(['1 day']).values
expected = pd.to_timedelta(['2 days']).values
self.assert_numpy_array_equal(td + other, expected)
if LooseVersion(np.__version__) >= '1.8':
self.assert_numpy_array_equal(other + td, expected)
self.assertRaises(TypeError, lambda: td + np.array([1]))
self.assertRaises(TypeError, lambda: np.array([1]) + td)
expected = pd.to_timedelta(['0 days']).values
self.assert_numpy_array_equal(td - other, expected)
if LooseVersion(np.__version__) >= '1.8':
self.assert_numpy_array_equal(-other + td, lambda: td - np.array([1]))
self.assertRaises(TypeError, lambda: np.array([1]) - td)
expected = pd.to_timedelta(['2 days']).values
self.assert_numpy_array_equal(td * np.array([2]), expected)
self.assert_numpy_array_equal(np.array([2]) * td, lambda: td * other)
self.assertRaises(TypeError, lambda: other * td)
self.assert_numpy_array_equal(td / other, np.array([1]))
if LooseVersion(np.__version__) >= '1.8':
self.assert_numpy_array_equal(other / td, np.array([1]))
# timedelta,datetime
other = pd.to_datetime(['2000-01-01']).values
expected = pd.to_datetime(['2000-01-02']).values
self.assert_numpy_array_equal(td + other, expected)
expected = pd.to_datetime(['1999-12-31']).values
self.assert_numpy_array_equal(-td + other, expected)
if LooseVersion(np.__version__) >= '1.8':
self.assert_numpy_array_equal(other - td, expected)
def test_timedelta_range(self):
expected = to_timedelta(np.arange(5), unit='D')
result = timedelta_range('0 days', periods=5, freq='D')
tm.assert_index_equal(result, expected)
expected = to_timedelta(np.arange(11), '10 days', expected)
expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day()
result = timedelta_range('1 days,00:00:02', '5 days,
freq='D')
tm.assert_index_equal(result, expected)
expected = to_timedelta([1, 9], unit='D') + Second(2)
result = timedelta_range('1 days, freq='2D')
tm.assert_index_equal(result, expected)
expected = to_timedelta(np.arange(50), unit='T') * 30
result = timedelta_range('0 days', periods=50)
tm.assert_index_equal(result, expected)
# GH 11776
arr = np.arange(10).reshape(2, 5)
df = pd.DataFrame(np.arange(10).reshape(2, 5))
for arg in (arr, df):
with tm.assertRaisesRegexp(TypeError, "1-d array"):
to_timedelta(arg)
for errors in ['ignore', 'raise', 'coerce']:
with tm.assertRaisesRegexp(TypeError, "1-d array"):
to_timedelta(arg, errors=errors)
# issue10583
df = pd.DataFrame(np.random.normal(size=(10, 4)))
df.index = pd.timedelta_range(start='0s', periods=10, freq='s')
expected = df.loc[pd.timedelta('0s'):, :]
result = df.loc['0s':, :]
assert_frame_equal(expected, result)
def test_timedelta_ops_scalar(self):
# GH 6808
base = pd.to_datetime('20130101 09:01:12.123456')
expected_add = pd.to_datetime('20130101 09:01:22.123456')
expected_sub = pd.to_datetime('20130101 09:01:02.123456')
for offset in [pd.to_timedelta(10, unit='s'), timedelta(seconds=10),
np.timedelta64(10, 's'),
np.timedelta64(10000000000,
pd.offsets.Second(10)]:
result = base + offset
self.assertEqual(result, expected_add)
result = base - offset
self.assertEqual(result, expected_sub)
base = pd.to_datetime('20130102 09:01:12.123456')
expected_add = pd.to_datetime('20130103 09:01:22.123456')
expected_sub = pd.to_datetime('20130101 09:01:02.123456')
for offset in [pd.to_timedelta('1 day,00:00:10'),
pd.to_timedelta('1 days,
timedelta(days=1, seconds=10),
np.timedelta64(1, 'D') + np.timedelta64(10,
pd.offsets.Day() + pd.offsets.Second(10)]:
result = base + offset
self.assertEqual(result, expected_sub)
def test_constructor_coverage(self):
rng = timedelta_range('1 days', periods=10.5)
exp = timedelta_range('1 days', periods=10)
self.assertTrue(rng.equals(exp))
self.assertRaises(ValueError, timedeltaIndex, start='1 days',
periods='foo', freq='D')
self.assertRaises(ValueError,
end='10 days')
self.assertRaises(ValueError, '1 days')
# generator expression
gen = (timedelta(i) for i in range(10))
result = timedeltaIndex(gen)
expected = timedeltaIndex([timedelta(i) for i in range(10)])
self.assertTrue(result.equals(expected))
# NumPy string array
strings = np.array(['1 days', '2 days', '3 days'])
result = timedeltaIndex(strings)
expected = to_timedelta([1, 3], unit='d')
self.assertTrue(result.equals(expected))
from_ints = timedeltaIndex(expected.asi8)
self.assertTrue(from_ints.equals(expected))
# non-conforming freq
self.assertRaises(ValueError,
['1 days', '4 days'], freq='D')
def test_conversion_preserves_name(self):
# GH 10875
i = pd.Index(['01:02:03', '01:02:04'], name='label')
self.assertEqual(i.name, pd.to_datetime(i).name)
self.assertEqual(i.name, pd.to_timedelta(i).name)
def create_index(self):
return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1)
def test_quantile(self):
from numpy import percentile
q = self.ts.quantile(0.1)
self.assertEqual(q, percentile(self.ts.valid(), 10))
q = self.ts.quantile(0.9)
self.assertEqual(q, 90))
# object dtype
q = Series(self.ts, dtype=object).quantile(0.9)
self.assertEqual(q, 90))
# datetime64[ns] dtype
dts = self.ts.index.to_series()
q = dts.quantile(.2)
self.assertEqual(q, Timestamp('2000-01-10 19:12:00'))
# timedelta64[ns] dtype
tds = dts.diff()
q = tds.quantile(.25)
self.assertEqual(q, pd.to_timedelta('24:00:00'))
# GH7661
result = Series([np.timedelta64('NaT')]).sum()
self.assertTrue(result is pd.NaT)
msg = 'percentiles should all be in the interval \\[0,1\\]'
for invalid in [-1, [0.5, -1], 2]]:
with tm.assertRaisesRegexp(ValueError, msg):
self.ts.quantile(invalid)
def test_isin_with_i8(self):
# GH 5021
expected = Series([True, True, False, False])
expected2 = Series([False, False])
# datetime64[ns]
s = Series(date_range('jan-01-2013', 'jan-05-2013'))
result = s.isin(s[0:2])
assert_series_equal(result, expected)
result = s.isin(s[0:2].values)
assert_series_equal(result, expected)
# fails on dtype conversion in the first place
result = s.isin(s[0:2].values.astype('datetime64[D]'))
assert_series_equal(result, expected)
result = s.isin([s[1]])
assert_series_equal(result, expected2)
result = s.isin([np.datetime64(s[1])])
assert_series_equal(result, expected2)
# timedelta64[ns]
s = Series(pd.to_timedelta(lrange(5), unit='d'))
result = s.isin(s[0:2])
assert_series_equal(result, expected)
def test_timedelta64_operations_with_timedeltas(self):
# td operate with td
td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
td2 = timedelta(minutes=5, seconds=4)
result = td1 - td2
expected = Series([timedelta(seconds=0)] * 3) - Series([timedelta(
seconds=1)] * 3)
self.assertEqual(result.dtype, 'm8[ns]')
assert_series_equal(result, expected)
result2 = td2 - td1
expected = (Series([timedelta(seconds=1)] * 3) - Series([timedelta(
seconds=0)] * 3))
assert_series_equal(result2, expected)
# roundtrip
assert_series_equal(result + td2, td1)
# Now again,using pd.to_timedelta,which should build
# a Series or a scalar,depending on input.
td1 = Series(pd.to_timedelta(['00:05:03'] * 3))
td2 = pd.to_timedelta('00:05:04')
result = td1 - td2
expected = Series([timedelta(seconds=0)] * 3) - Series([timedelta(
seconds=1)] * 3)
self.assertEqual(result.dtype, td1)
def test_even_day(self):
delta_1d = pd.to_timedelta(1, unit='D')
delta_0d = pd.to_timedelta(0, unit='D')
delta_1s = pd.to_timedelta(1, unit='s')
delta_500ms = pd.to_timedelta(500, unit='ms')
drepr = lambda x: x._repr_base(format='even_day')
self.assertEqual(drepr(delta_1d), "1 days")
self.assertEqual(drepr(-delta_1d), "-1 days")
self.assertEqual(drepr(delta_0d), "0 days")
self.assertEqual(drepr(delta_1s), "0 days 00:00:01")
self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000")
self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01")
self.assertEqual(
drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000")
def test_sub_day(self):
delta_1d = pd.to_timedelta(1, unit='ms')
drepr = lambda x: x._repr_base(format='sub_day')
self.assertEqual(drepr(delta_1d), "00:00:00")
self.assertEqual(drepr(delta_1s), "00:00:01")
self.assertEqual(drepr(delta_500ms), "00:00:00.500000")
self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:00.500000")
def test_long(self):
delta_1d = pd.to_timedelta(1, unit='ms')
drepr = lambda x: x._repr_base(format='long')
self.assertEqual(drepr(delta_1d), "1 days 00:00:00")
self.assertEqual(drepr(-delta_1d), "-1 days +00:00:00")
self.assertEqual(drepr(delta_0d), "0 days 00:00:00")
self.assertEqual(drepr(delta_1s), "1 days 00:00:00.500000")
def test_all(self):
delta_1d = pd.to_timedelta(1, unit='D')
delta_1ns = pd.to_timedelta(1, unit='ns')
drepr = lambda x: x._repr_base(format='all')
self.assertEqual(drepr(delta_1d), "1 days 00:00:00.000000000")
self.assertEqual(drepr(delta_0d), "0 days 00:00:00.000000000")
self.assertEqual(drepr(delta_1ns), "0 days 00:00:00.000000001")
def test_days(self):
x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='D')
result = fmt.timedelta64Formatter(x, Box=True).get_result()
self.assertEqual(result[0].strip(), "'0 days'")
self.assertEqual(result[1].strip(), "'1 days'")
result = fmt.timedelta64Formatter(x[1:2], "'1 days'")
result = fmt.timedelta64Formatter(x, Box=False).get_result()
self.assertEqual(result[0].strip(), "0 days")
self.assertEqual(result[1].strip(), "1 days")
result = fmt.timedelta64Formatter(x[1:2], "1 days")
def _chunk_to_dataframe(self):
n = self._current_row_in_chunk_index
m = self._current_row_in_file_index
ix = range(m - n, m)
rslt = pd.DataFrame(index=ix)
js, jb = 0, 0
for j in range(self.column_count):
name = self.column_names[j]
if self.column_types[j] == b'd':
rslt[name] = self._byte_chunk[jb, :].view(
dtype=self.byte_order + 'd')
rslt[name] = np.asarray(rslt[name], dtype=np.float64)
if self.convert_dates and (self.column_formats[j] == "MMDDYY"):
epoch = pd.datetime(1960, 1)
rslt[name] = epoch + pd.to_timedelta(rslt[name], unit='d')
jb += 1
elif self.column_types[j] == b's':
rslt[name] = self._string_chunk[js, :]
rslt[name] = rslt[name].apply(lambda x: x.rstrip(b'\x00 '))
if self.encoding is not None:
rslt[name] = rslt[name].apply(
lambda x: x.decode(encoding=self.encoding))
if self.blank_missing:
ii = rslt[name].str.len() == 0
rslt.loc[ii, name] = np.nan
js += 1
else:
raise ValueError("unkNown column type %s" %
self.column_types[j])
return rslt
def test_timedelta(self):
# see #6921
df = to_timedelta(
Series(['00:00:01', '00:00:03'], name='foo')).to_frame()
with tm.assert_produces_warning(UserWarning):
df.to_sql('test_timedelta', self.conn)
result = sql.read_sql_query('SELECT * FROM test_timedelta', self.conn)
tm.assert_series_equal(result['foo'], df['foo'].astype('int64'))
def test_timedelta(self):
converter = lambda x: pd.to_timedelta(x, unit='ms')
s = Series([timedelta(23), timedelta(seconds=5)])
self.assertEqual(s.dtype, 'timedelta64[ns]')
# index will be float dtype
assert_series_equal(s, pd.read_json(s.to_json(), typ='series')
.apply(converter),
check_index_type=False)
s = Series([timedelta(23), timedelta(seconds=5)],
index=pd.Index([0, 1], dtype=float))
self.assertEqual(s.dtype, 'timedelta64[ns]')
assert_series_equal(s, pd.read_json(
s.to_json(), typ='series').apply(converter))
frame = DataFrame([timedelta(23), timedelta(seconds=5)])
self.assertEqual(frame[0].dtype, 'timedelta64[ns]')
assert_frame_equal(frame, pd.read_json(frame.to_json())
.apply(converter),
check_index_type=False,
check_column_type=False)
frame = DataFrame({'a': [timedelta(days=23),
'b': [1,
'c': pd.date_range(start='20130101', periods=2)})
result = pd.read_json(frame.to_json(date_unit='ns'))
result['a'] = pd.to_timedelta(result.a, unit='ns')
result['c'] = pd.to_datetime(result.c)
assert_frame_equal(frame, check_index_type=False)
def init():
"""Return top level command handler."""
@click.command()
@cli.handle_exceptions(restclient.CLI_REST_EXCEPTIONS)
@click.option('--match', help='Server name pattern match')
@click.option('--full', is_flag=True, default=False)
@click.pass_context
def apps(ctx, match, full):
"""View apps report."""
report = fetch_report(ctx.obj.get('api'), 'apps', match)
# Replace integer N/As
for col in ['identity', 'expires', 'lease', 'data_retention']:
report.loc[report[col] == -1, col] = ''
# Convert to datetimes
for col in ['expires']:
report[col] = pd.to_datetime(report[col], unit='s')
# Convert to timedeltas
for col in ['lease', 'data_retention']:
report[col] = pd.to_timedelta(report[col], unit='s')
report = report.fillna('')
if not full:
report = report[[
'instance', 'allocation', 'partition', 'server',
'mem', 'cpu', 'disk'
]]
print_report(report)
return apps
def format_date_to_datetime(self, df, t_date = None):
if t_date is None:
t_date = dataTime.datetimeRelative(delta = 0)
t_date = t_date.replace(' 00:00:00', '')
df_new = df.copy()
df_new.insert(0, 'datetime', t_date)
df_new['datetime'] = pd.to_datetime(df_new['datetime'])
df_new['time'] = pd.to_timedelta(df_new['time'])
df_new['datetime'] = df_new['datetime'] + df_new['time']
df_new = df_new.sort_values(['datetime'], ascending=[True])
del df_new['time']
return df_new
# ????
# ?????
# code???????6?????????????sh=???? sz=???? hs300=??300?? sz50=??50 zxb=??? cyb=????
# start????????YYYY-MM-DD
# end????????YYYY-MM-DD
# ktype??????D=?k? W=? M=? 5=5?? 15=15?? 30=30?? 60=60??????D
# retry_count???????????????3
# pause:???????????0
# ??????
# date???
# open????
# high????
# close????
# low????
# volume????
# price_change?????
# p_change????
# ma5?5???
# ma10?10???
# ma20:20???
# v_ma5:5???
# v_ma10:10???
# v_ma20:20???
# turnover:???[???????]
def process(self, obj_data):
'''
Apply the MIDAS estimator to generate veLocity estimates
Adds the result to the data wrapper
@param obj_data: Data wrapper
'''
if self.column_names == None:
column_names = obj_data.getDefaultColumns()
else:
column_names = self.column_names
time_diff = pd.to_timedelta('365d')
results = dict()
for label, data in obj_data.getIterator():
start_date = data.index[0]
end_date = data.index[-1]
for column in column_names:
start_data = data.loc[start_date:(end_date-time_diff), column]
end_data = data.loc[start_date+time_diff:end_date, column]
offsets = end_data.values - start_data.values
offsets = offsets[~np.isnan(offsets)]
med_off = np.median(offsets)
mad_off = mad(offsets)
cut_offsets = offsets[np.logical_and(offsets < med_off + 2*mad_off,
offsets > med_off - 2*mad_off)]
final_vel = np.median(cut_offsets)
final_unc = np.sqrt(np.pi/2) * mad(cut_offsets) / np.sqrt(len(cut_offsets))
results[label] = pd.DataFrame([final_vel,final_unc], ['veLocity', 'uncertainty'] ,[column])
obj_data.addResult(self.str_description, pd.Panel.fromDict(results,orient='minor'))
def test_timedelta_to_human(self):
for td in timedelta(days=1, seconds=3900), pd.to_timedelta('1d1h5m'):
self.assertEqual('1.05 days', timedelta_to_human(td, precision=2))
self.assertEqual('1.0 day', precision=1))
for td in timedelta(days=-1, seconds=-3900), pd.to_timedelta('-1d1h5m'):
self.assertEqual('1.05 days ago', precision=2))
self.assertEqual('1.0 day ago', precision=1))
def get_accumulate_action_feat(start_time, end_time,action_data):
actions=action_data[(action_data['time']>=start_time)&(action_data['time']<=end_time)]
action_data['time'] = pd.to_datetime(action_data['time'],format='%Y-%m-%d %H:%M:%s')
df = pd.get_dummies(actions['type'], prefix='action')
actions = pd.concat([actions, df], axis=1) # type: pd.DataFrame
#?????????
actions['weights'] = actions['time'].map(lambda x: pd.to_timedelta(end_time-x))
#actions['weights'] = time.strptime(end_date,'%Y-%m-%d') - actions['datetime']
actions['weights'] = actions['weights'].map(lambda x: math.exp(-x.days))
print actions.head(10)
actions['action_1'] = actions['action_1'] * actions['weights']
actions['action_2'] = actions['action_2'] * actions['weights']
actions['action_3'] = actions['action_3'] * actions['weights']
actions['action_4'] = actions['action_4'] * actions['weights']
actions['action_5'] = actions['action_5'] * actions['weights']
actions['action_6'] = actions['action_6'] * actions['weights']
del actions['model_id']
del actions['time']
del actions['weights']
del actions['cate']
del actions['brand']
actions = actions.groupby(['user_id', 'sku_id'], as_index=False).sum()
actions.fillna(0,inplace=True)
actions['action_1256']=actions['action_1']+actions['action_2']+actions['action_5']+actions['action_6']
actions['action_1256_d_4']=actions['action_4']/actions['action_1256']
del actions['type']
return actions
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。