-- ################################################################################
-- Experimental Results for
-- "Oblivious Bounds on the Probability of Boolean Functions"
-- to appear in Transactions on Database Systems (TODS) 2013/2014
-- Wolfgang Gatterbauer, November 2013
-- ################################################################################
-- PREPARE TPC-H DATABASE
-- Assumes that there is already an imported version of TCP-H data set
-- 1st step: make all tuples in the database probabilistic with P = 0.5
ALTER TABLE part ADD P float not null DEFAULT 0.1;
ALTER TABLE supplier ADD P float not null DEFAULT 0.1;
ALTER TABLE partsupp ADD P float not null DEFAULT 0.1;
ALTER TABLE part DROP P;
ALTER TABLE supplier DROP P;
ALTER TABLE partsupp DROP P;
ALTER TABLE part ADD P float not null DEFAULT trunc(random() *50 +1)/100;
ALTER TABLE supplier ADD P float not null DEFAULT trunc(random() *50 +1)/100;
ALTER TABLE partsupp ADD P float not null DEFAULT trunc(random() *50 +1)/100;
-- 2nd step: Define iOR user-defined aggregate
create or replace function ior_sfunc (float, float) returns float as
'select $1 * (1.0 - $2)'
language SQL;
create or replace function ior_finalfunc (float) returns float as
'select 1.0 - $1'
language SQL;
drop aggregate if exists ior (float);
create aggregate ior (float) (
sfunc = ior_sfunc,
stype = float,
finalfunc = ior_finalfunc,
initcond = '1.0');
-- 3rd step: Create a table to store the detailed experimental results
drop table if exists Exp_TimingResults;
create table Exp_TimingResults(
CurrentTime timestamptz,
cond2 varchar,
cond1 int,
TimeDet int, -- time for deterministic query
TimeRed int, -- times for probabilistic query (red: semi-join reduction, us: upper diss on S, etc.)
TimeUs int,
TimeLs int,
TimeUp int,
TimeLp int,
LCardP int, -- cardinality of table P after semi-join reduction
LCardPS int,
LCardS int,
OCardP int, -- cardinality of table P after applying cond1
OCardS int, -- cardinality of table P after applying cond2
Lmax int, -- maximal lineage size
Lavg real); -- average linage size
-- ################################################################################
-- ACTUAL EXPERIMENTS
-- Create Intermediate Variables
DO $proc$
DECLARE
StartTime timestamptz;
EndTime timestamptz;
TimeDet int;
TimeRed int;
TimeUs int;
TimeLs int;
TimeUp int;
TimeLp int;
LCardP int;
LCardPS int;
LCardS int;
OCardP int;
OCardS int;
Lmax int;
Lavg real;
$2 varchar; -- cond2: could be varied for experiments as follows:
cond2_array varchar[] := array['%red%green%','%red%','%'];
$1 int; -- cond1: vary for experiments
i int; -- 3rd loop variable average over several experiments
BEGIN
FOREACH $2 IN array cond2_array LOOP -- change cond2 during experiments
FOR $1 IN 500..13000 BY 500 LOOP -- change cond1 during experiments
FOR i IN 1..5 LOOP -- 3rd loop to later average over several experiments
-- Timing Deterministic Query
StartTime := clock_timestamp();
perform
distinct s_nationkey
from Supplier, Partsupp, Part
where s_suppkey = ps_suppkey
and ps_partkey = p_partkey
and p_name like $2
and ps_suppkey <= $1;
EndTime := clock_timestamp();
TimeDet := (1000*(extract(epoch from EndTime)-extract(epoch from StartTime)))::int;
-- Timing Semi-join reduction on PS, then P
StartTime := clock_timestamp();
drop table if exists PS;
create table PS as
select ps_suppkey, ps_partkey, PS.P
from Supplier S, Partsupp PS, Part P
where s_suppkey = ps_suppkey
and p_partkey = ps_partkey
and p_name like $2
and s_suppkey <= $1
group by PS.ps_suppkey, PS.ps_partkey, PS.P;
drop table if exists P;
create table P as
select p_partkey, P.P
from PS, Part P
where p_partkey = ps_partkey
group by P.p_partkey, P.P;
EndTime := clock_timestamp();
TimeRed := (1000*(extract(epoch from EndTime)-extract(epoch from StartTime)))::int;
-- Timing Upper Dissociation on Supplier
StartTime := clock_timestamp();
drop table if exists rus;
create table rus as
select s_nationkey, iOR(Q3.P) as P
from
(select s_nationkey, P.P*Q2.P as P
from P,
(select Q1.s_nationkey, Q1.ps_partkey, iOR(Q1.P) as P
from
(select s_nationkey, s_suppkey, ps_partkey,S.P*PS.P as P
from Supplier S, PS
where s_suppkey = ps_suppkey) as Q1
group by Q1.s_nationkey, Q1.ps_partkey) as Q2
where p_partkey = Q2.ps_partkey) as Q3
group by Q3.s_nationkey;
EndTime := clock_timestamp();
TimeUs := (1000*(extract(epoch from EndTime)-extract(epoch from StartTime)))::int;
-- Timing Lower Dissociation on Supplier
StartTime := clock_timestamp();
drop table if exists VS;
create table VS as
select S.s_nationkey, S.s_suppkey, (1-POWER(1-S.P,1e0/count(*))) as P
from Supplier S, PS -- does not need to join with P anymore because of previous semi-join reduction
where s_suppkey = ps_suppkey
group by S.s_nationkey, S.s_suppkey, S.P;
drop table if exists rls;
create table rls as
select s_nationkey, iOR(Q3.P) as P
from
(select s_nationkey, P.P*Q2.P as P
from P,
(select Q1.s_nationkey, Q1.ps_partkey, iOR(Q1.P) as P
from
(select s_nationkey, s_suppkey, ps_partkey,VS.P*PS.P as P
from VS, PS
where s_suppkey = ps_suppkey) as Q1
group by Q1.s_nationkey, Q1.ps_partkey) as Q2
where p_partkey = Q2.ps_partkey) as Q3
group by Q3.s_nationkey;
EndTime := clock_timestamp();
TimeLs := (1000*(extract(epoch from EndTime)-extract(epoch from StartTime)))::int;
-- Timing Upper Dissociation on Part
StartTime := clock_timestamp();
drop table if exists rup;
create table rup as
select s_nationkey,iOR(Q3.P) as P
from
(select s_nationkey, S.P*Q2.P as P
from Supplier S,
(select Q1.ps_suppkey, iOR(Q1.P) as P
from
(select ps_suppkey, PS.P*P.P as P
from PS, P
where ps_partkey = p_partkey) as Q1
group by Q1.ps_suppkey) as Q2
where s_suppkey = Q2.ps_suppkey) as Q3
group by Q3.s_nationkey;
EndTime := clock_timestamp();
TimeUp := (1000*(extract(epoch from EndTime)-extract(epoch from StartTime)))::int;
-- Timing Lower Dissociation on Part
StartTime := clock_timestamp();
drop table if exists VP;
create table VP as
select p_partkey, s_nationkey, (1-POWER(1-P.P,1e0/count(*))) as P -- requires nationkey!
from P, PS, Supplier S
where p_partkey=ps_partkey
and ps_suppkey = s_suppkey
group by p_partkey, s_nationkey, P.P;
drop table if exists rlp;
create table rlp as
select s_nationkey, iOR(Q3.P) as P
from
(select S.s_nationkey, S.P*Q2.P as P
from Supplier S,
(select Q1.ps_suppkey, s_nationkey, iOR(Q1.P) as P -- requires nationkey!
from
(select ps_suppkey, s_nationkey, PS.P*VP.P as P
from PS, VP
where ps_partkey = p_partkey) as Q1
group by Q1.ps_suppkey, s_nationkey) as Q2
where S.s_suppkey = Q2.ps_suppkey
and S.s_nationkey = Q2.s_nationkey) as Q3
group by Q3.s_nationkey;
EndTime := clock_timestamp();
TimeLp := (1000*(extract(epoch from EndTime)-extract(epoch from StartTime)))::int;
-- Determine cardinalities of table tuples in lineage
select COUNT(*)
into LCardP
from P;
select COUNT(*)
into LCardPS
from PS;
select COUNT(distinct ps_suppkey)
into LCardS
from PS;
select COUNT(*)
into OCardP
from Part
where p_name like $2;
select COUNT(*)
into OCardS
from Supplier
where s_suppkey <= $1;
-- Determine Size of Lineage
drop table if exists Exp_Lineage;
create table Exp_Lineage as
select s_nationkey, count(*) as lin
from Supplier, Partsupp, Part
where s_suppkey = ps_suppkey
and ps_partkey = p_partkey
and p_name like $2
and ps_suppkey <= $1
group by s_nationkey
order by lin desc;
select max(lin) lin
into Lmax
from Exp_Lineage;
select avg(lin) lin
into Lavg
from Exp_Lineage;
-- Save results and output status of loop
insert into Exp_TimingResults values (clock_timestamp(),$2, $1,
TimeDet, TimeRed, TimeUs, TimeLs, TimeUp, TimeLp,
LCardP,LCardPS,LCardS, OCardP, OCardS,
Lmax, Lavg);
raise notice 'Cond2=%, Cond1=%, Loop=%', $2, $1, i;
END LOOP;
END LOOP;
END LOOP;
END;
$proc$;
-- ################################################################################
-- EXPERIMENTAL QUALITY RESULTS OF LAST RUN
select rup.s_nationkey,
least(rup.p, rus.p),
greatest(rlp.p, rls.p)
from rup, rlp, rus, rls
where rup.s_nationkey = rlp.s_nationkey
and rlp.s_nationkey = rus.s_nationkey
and rus.s_nationkey = rls.s_nationkey
order by
least(rup.p, rus.p) desc, greatest(rlp.p, rls.p) desc
-- ################################################################################
-- EXPERIMENTAL TIMING RESULTS
-- Show averaged timing results
select cond2, cond1,
avg(timedet) as timedet,
avg(timered+timeus+timeup) as timeupper,
avg(timered+timeus+timels+timeup+timelp) as timeprob
from Exp_TimingResults
group by cond2, cond1
order by cond2, cond1
-- ################################################################################
-- FURTHER ANALYSIS
-- Lineage of tuple 11 in figure (b): $1=10000 and $2='%red%'
drop table if exists Exp_TupleLineage;
create table Exp_TupleLineage as
select s_nationkey,
Part.Lineage, count(Part.P) as C
from Supplier, Partsupp, Part
where s_suppkey = ps_suppkey
and ps_partkey = p_partkey
and p_name like '%red%'
and ps_suppkey <= 10000
and s_nationkey = 11
group by s_nationkey, Part.Lineage
order by C desc;
select C, count(*)
from Exp_TupleLineage
group by C
order by C desc;
drop table if exists Exp_TupleLineage;
create table Exp_TupleLineage as
select s_nationkey,
Supplier.Lineage, count(Supplier.P) as C
from Supplier, Partsupp, Part
where s_suppkey = ps_suppkey
and ps_partkey = p_partkey
and p_name like '%red%'
and ps_suppkey <= 10000
and s_nationkey = 11
group by s_nationkey, Supplier.Lineage
order by C desc;
select C, count(*)
from Exp_TupleLineage
group by C
order by C desc;