如何解决SQL 数据流上的运行平均值
一组元组都是id相邻的元组,具有相同的grp值,因此必须以纯管道方式执行查询的方式计算其组中度量的运行平均值,需要扫描表只有一次。
示例输出如下。
-- TABLE Stream
\echo -- creating table "Stream"
drop table if exists Stream;
create table Stream (
id int,grp int,measure int,constraint streamPK
primary key (id),constraint idNotNeg
check (id >= 0),constraint grpnotNeg
check (grp >= 0)
);
-- ---------------------------------------------------------------------------
-- POPULATE: add some tuples to table Stream
\echo -- populating "Stream"
insert into Stream (id,grp,measure)
values
( 0,2),( 1,3),( 2,1,5),( 3,7),( 4,11),( 5,13),( 6,17),( 7,19),( 8,23),( 9,2,29),(10,31),(11,5,37),(12,3,41),(13,43);
\echo -- creating composite type "intRec"
drop type if exists
intRec
cascade;
create type intRec as (
number int,restart boolean
);
-- ---------------------------------------------------------------------------
-- runningSum_state : accumulator function
\echo -- creating function "runningSum_state"
drop function if exists
runningSum_state(int,intRec)
cascade;
create function runningSum_state(int,intRec)
returns int
language plpgsql
as $f$
declare i alias for $1;
declare a alias for $2;
declare j int;
begin
if a.restart or i is null then
j := a.number;
elsif a.number is null then
j := i;
else
j := a.number + i;
end if;
return j;
end
$f$;
-- ---------------------------------------------------------------------------
-- runningSum_final : returns the aggregate value
\echo -- creating function "runningSum_final"
drop function if exists
runningSum_final(int)
cascade;
create function runningSum_final(int)
returns intRec
language sql
as $f$
select cast(($1,false) as intRec);
$f$;
-- ---------------------------------------------------------------------------
-- runningSum : the aggregate function
\echo -- creating aggregate function "runningSum"
drop aggregate if exists
runningSum(intRec)
cascade;
create aggregate runningSum(intRec) (
sfunc = runningSum_state,stype = int,finalfunc = runningSum_final
);
-- ---------------------------------------------------------------------------
-- pipeline sliging-window query that uses our agggregate function
\echo -- querying "Stream" with running sum
with
-- look at the neighbour tuple to the left to fetch its grp value
CellLeft (id,measure,lft) as (
select id,coalesce(
max(grp) over (
order by id
rows between
1 preceding
and
1 preceding ),-1 )
from Stream
),-- determine whether current tuple is start of a group
CellStart(id,start) as (
select id,cast(
case
when grp = lft then 0
else 1
end
as boolean)
from CellLeft
),-- bundle the measure and start-flag into an intRC
CellFlag(id,intRC) as (
select id,cast((measure,start) as intRec)
from CellStart
),-- call our runningSum aggregator
CellRun(id,runningRC) as (
select id,(intRC).number,runningSum(intRC)
over (order by id)
from CellFlag
),-- extract the running sum from the composite
CellAggr(id,running) as (
select id,(runningRC).number
from CellRun
)
-- report
select id,running
from CellAggr
order by id;
样本输出
id | grp | measure | average
----+-----+---------+------------------
0 | 0 | 2 | 2
1 | 0 | 3 | 2.5
2 | 1 | 5 | 5
3 | 1 | 7 | 6
4 | 1 | 11 | 7.66666666666667
5 | 0 | 13 | 13
6 | 0 | 17 | 15
7 | 0 | 19 | 16.3333333333333
8 | 0 | 23 | 18
9 | 2 | 29 | 29
10 | 2 | 31 | 30
11 | 5 | 37 | 37
12 | 3 | 41 | 41
13 | 3 | 43 | 42
(14 行)
解决方法
在 SQL 中,您可以将其表示为:
select t.*,avg(measure) over (partition by grp order by id) as group_running_avg
from t;
您必须信任执行计划,但您可以借助 (grp,id,measure)
上的索引来帮助它。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。