create table tbl (  
  id int8,  -- 序列    
  tag1 int[],   -- 數組  
  c1 int,       -- 1-100  
  c2 int,       -- 1-10000  
  c3 timestamp   -- 時間戳  
);

2、生成隨機值函數

取值範圍$1-$2, 取$3個隨機值的數組  
  
create or replace function gen_rand_ints(int, int, int) returns int[] as $$  
  select array(select (random()*($2-$1))::int+$1 from generate_series(1,$3));  
$$ language sql strict;  
  
postgres=# select gen_rand_ints(10,25,5);  
  gen_rand_ints     
------------------  
 {20,19,24,22,21}  
(1 row)

3、寫入測試數據

-- 寫入熱點數組，5000萬條  
insert into tbl select id, gen_rand_ints(1,1000,10), random()*100, random()*10000, clock_timestamp() from generate_series(1,50000000) t(id);  
  
-- 寫入非熱點數組，1億條  
insert into tbl select id, gen_rand_ints(1,1000000,10), random()*100, random()*10000, clock_timestamp() from generate_series(1,100000000) t(id);

數據樣式如下

postgres=# select * from tbl limit 10;  
    id    |                   tag1                    | c1 |  c2  |             c3               
----------+-------------------------------------------+----+------+----------------------------  
 38931521 | {424,448,91,420,382,657,677,60,530,503}   | 59 | 6120 | 2017-09-11 14:32:06.610512  
 38931522 | {66,87,468,207,79,780,307,714,520,149}    | 44 | 7848 | 2017-09-11 14:32:06.610522  
 38931523 | {99,628,798,558,415,74,863,839,522,953}   | 26 | 9032 | 2017-09-11 14:32:06.610531  
 38931524 | {610,935,962,140,438,551,752,503,636,220} | 71 | 7136 | 2017-09-11 14:32:06.61054  
 38931525 | {998,16,428,518,164,868,303,263,496,102}  | 82 | 9102 | 2017-09-11 14:32:06.61055  
 38931526 | {175,683,749,696,637,8,599,247,942,561}   | 39 | 3796 | 2017-09-11 14:32:06.610559  
 38931527 | {112,138,882,747,356,591,461,355,605,888} | 87 | 7684 | 2017-09-11 14:32:06.610568  
 38931528 | {756,175,31,252,276,850,162,450,533,910}  | 15 | 1691 | 2017-09-11 14:32:06.610578  
 38931529 | {917,744,416,860,306,801,240,416,937,122} | 16 | 2927 | 2017-09-11 14:32:06.610587  
 38931530 | {712,623,647,317,511,519,86,267,693,116}  | 52 | 9676 | 2017-09-11 14:32:06.610596  
(10 rows)

求任意條件下的tag1的TOP N元素。

4、分析表，生成柱狀圖。

postgres=# analyze tbl;  
ANALYZE

表大小 16 GB。

postgres=# \dt+ tbl  
                   List of relations  
 Schema | Name | Type  |  Owner   | Size  | Description   
--------+------+-------+----------+-------+-------------  
 public | tbl  | table | postgres | 16 GB |   
(1 row)

5、求某個條件下的精確TOP N元素，實際上有1000個熱點ID，所以返回TOP 10的COUNT結果非常近似，後麵在使用估值時，得到的TOP 10可能就沒這麼準了，但是一定是在1000個ID以內的。

-- 開啟32個並行的查詢時間  
  
postgres=# select unnest(tag1) tag1, count(*) from tbl where c1 between 1 and 10 group by 1 order by 2 desc limit 10;  
 tag1 | count   
------+-------  
  134 | 50935  
  768 | 50915  
  663 | 50876  
  567 | 50821  
  146 | 50821  
  332 | 50814  
  450 | 50807  
  884 | 50789  
   58 | 50781  
  605 | 50774  
(10 rows)  
  
Time: 23441.247 ms (00:23.441)  
  
-- 不開並行的查詢時間  
postgres=# select unnest(tag1) tag1, count(*) from tbl where c1 between 1 and 10 group by 1 order by 2 desc limit 10;  
 tag1 | count   
------+-------  
  134 | 50935  
  768 | 50915  
  663 | 50876  
  567 | 50821  
  146 | 50821  
  332 | 50814  
  450 | 50807  
  884 | 50789  
   58 | 50781  
  605 | 50774  
(10 rows)  
  
Time: 154935.686 ms (02:34.936)

6、求同樣條件下的采樣TOP N

采樣算法參考文章末尾，PostgreSQL內置了2種采樣方法，同時支持擴展采樣方法，其中有兩個內置的擴展采樣方法，實際上內置總共有4種采樣方法。

使用塊級采樣（目前采樣不支持並行）。

postgres=# select unnest(tag1) tag1, (count(*))*20      -- 乘以100/采樣係數  
from   
(select * from tbl TABLESAMPLE system (5)) t     
where c1 between 1 and 10 group by 1 order by 2 desc limit 10;  
 tag1 | ?column?   
------+----------  
  724 |    53380  
  798 |    52680  
   24 |    52640  
  371 |    52480  
  569 |    52400  
  531 |    52280  
  979 |    52160  
  429 |    52140  
  980 |    52080  
  350 |    51920  
(10 rows)  
  
-- 采樣5%，約7秒。  
Time: 6887.745 ms (00:06.888)   
  
postgres=# select unnest(tag1) tag1, (count(*))*50    -- 乘以100/采樣係數  
from   
(select * from tbl TABLESAMPLE system (2)) t     
where c1 between 1 and 10 group by 1 order by 2 desc limit 10;  
 tag1 | ?column?   
------+----------  
  324 |    55450  
  435 |    55150  
  720 |    55050  
  943 |    54950  
  475 |    54750  
  958 |    54600  
   13 |    54400  
  742 |    54300  
  739 |    54100  
  301 |    53950  
(10 rows)  
  
-- 采樣2%， 約3秒。  
Time: 2720.140 ms (00:02.720)  
  
采樣越多，精確度越高

采樣的方法，得到的TOP N是很準確的，因為例子用了1000個隨機值，並且每個隨機值的概率是一樣的，如果返回TOP 1000，那就準確無疑了。

大表例子

重新設計熱點，總共寫入40億測試數據：

一級熱點，1，5億

二級熱點，2-4，5億

三級熱點，5-10，5億

四級熱點，11-30，5億

普通數據，1-100000，20億

1、表結構設計

create table tbl1 (  
  id int8,  -- 序列  
  c1 int8,  -- 目標字段  
  c2 int8,  -- 1-100  
  c3 int8,  -- 1-100000  
  c4 timestamp  -- 時間戳  
);

2、寫入測試數據

nohup psql -c "insert into tbl1 select id, 1, random()*100, random()*100000, clock_timestamp() from generate_series(1,500000000) t(id);" >/dev/null 2>&1 &  
nohup psql -c "insert into tbl1 select id, random()*(4-2)+2, random()*100, random()*100000, clock_timestamp() from generate_series(1,500000000) t(id);" >/dev/null 2>&1 &  
nohup psql -c "insert into tbl1 select id, random()*(10-5)+5, random()*100, random()*100000, clock_timestamp() from generate_series(1,500000000) t(id);" >/dev/null 2>&1 &  
nohup psql -c "insert into tbl1 select id, random()*(30-11)+11, random()*100, random()*100000, clock_timestamp() from generate_series(1,500000000) t(id);" >/dev/null 2>&1 &  
nohup psql -c "insert into tbl1 select id, random()*100000, random()*100, random()*100000, clock_timestamp() from generate_series(1,2000000000) t(id);" >/dev/null 2>&1 &

3、分析表

postgres=# analyze tbl1;  
ANALYZE  
Time: 502.421 ms

表大小，254 GB。

postgres=# \dt+ tbl1  
                    List of relations  
 Schema | Name | Type  |  Owner   |  Size  | Description   
--------+------+-------+----------+--------+-------------  
 public | tbl1 | table | postgres | 254 GB |   
(1 row)

4、精確TOP 30

-- 開啟32個並行的查詢時間  
  
postgres=# select c1,count(*) from tbl1 where c2 between 1 and 10 group by 1 order by 2 desc limit 30;  
 c1 |  count     
----+----------  
  1 | 49991259  
  3 | 25006580  
  2 | 12502559  
  4 | 12498741  
  9 | 10004285  
  6 | 10002597  
  8 |  9999530  
  7 |  9999215  
  5 |  5003219  
 10 |  4998870  
 29 |  2636193  
 18 |  2635457  
 13 |  2635344  
 17 |  2634693  
 26 |  2633965  
 19 |  2633690  
 28 |  2633526  
 14 |  2633512  
 15 |  2633363  
 24 |  2633260  
 20 |  2633014  
 25 |  2632926  
 16 |  2632779  
 22 |  2632508  
 27 |  2632288  
 23 |  2632216  
 21 |  2631443  
 12 |  2631315  
 11 |  1318483  
 30 |  1318451  
(30 rows)  
  
Time: 20845.738 ms (00:20.846)  
  
-- 不開啟並行的查詢時間  
  
postgres=# select c1,count(*) from tbl1 where c2 between 1 and 10 group by 1 order by 2 desc limit 30;  
  
 c1 |  count     
----+----------  
  1 | 49991259  
  3 | 25006580  
  2 | 12502559  
  4 | 12498741  
  9 | 10004285  
  6 | 10002597  
  8 |  9999530  
  7 |  9999215  
  5 |  5003219  
 10 |  4998870  
 29 |  2636193  
 18 |  2635457  
 13 |  2635344  
 17 |  2634693  
 26 |  2633965  
 19 |  2633690  
 28 |  2633526  
 14 |  2633512  
 15 |  2633363  
 24 |  2633260  
 20 |  2633014  
 25 |  2632926  
 16 |  2632779  
 22 |  2632508  
 27 |  2632288  
 23 |  2632216  
 21 |  2631443  
 12 |  2631315  
 11 |  1318483  
 30 |  1318451  
(30 rows)  
  
Time: 471112.827 ms (07:51.113)

5、采樣TOP 30

select c1,(count(*))*20 from   -- 乘以100/采樣係數  
(select * from tbl1 TABLESAMPLE system (5)) t     
where c2 between 1 and 10 group by 1 order by 2 desc limit 30;  
  
 c1 | ?column?   
----+----------  
  1 | 50068840  
  3 | 25108820  
  2 | 12558680  
  4 | 12513080  
  7 | 10009300  
  9 | 10006260  
  6 | 10005400  
  8 |  9987220  
  5 |  5008280  
 10 |  5007980  
 17 |  2652940  
 16 |  2648640  
 25 |  2646800  
 28 |  2646600  
 15 |  2642480  
 20 |  2642220  
 14 |  2641620  
 26 |  2640500  
 23 |  2639420  
 29 |  2637740  
 22 |  2637320  
 13 |  2636900  
 19 |  2636100  
 18 |  2635120  
 24 |  2634440  
 12 |  2631480  
 27 |  2629880  
 21 |  2624940  
 11 |  1330140  
 30 |  1316480  
(30 rows)  
  
Time: 31884.725 ms (00:31.885)  
  
-- 采樣5%，約32秒。  
  
select c1,(count(*))*50 from   -- 乘以100/采樣係數  
(select * from tbl1 TABLESAMPLE system (2)) t     
where c2 between 1 and 10 group by 1 order by 2 desc limit 30;  
  
 c1 | ?column?   
----+----------  
  1 | 50173200  
  3 | 24993550  
  2 | 12487100  
  4 | 12474100  
  6 |  9998250  
  8 |  9980450  
  7 |  9973950  
  9 |  9960450  
 10 |  4999050  
  5 |  4995000  
 29 |  2642700  
 28 |  2640900  
 16 |  2640300  
 26 |  2630250  
 24 |  2627500  
 23 |  2623700  
 19 |  2622350  
 27 |  2622000  
 18 |  2621200  
 12 |  2619450  
 20 |  2616200  
 17 |  2616050  
 21 |  2615800  
 15 |  2613200  
 22 |  2612200  
 14 |  2607700  
 13 |  2605900  
 25 |  2604150  
 30 |  1312300  
 11 |  1311950  
(30 rows)  
  
Time: 12942.455 ms (00:12.942)  
  
-- 采樣2%，約13秒。  
  
postgres=# select c1,(count(*))*1000 from   -- 乘以100/采樣係數  
(select * from tbl1 TABLESAMPLE system (0.1)) t     
where c2 between 1 and 10 group by 1 order by 2 desc limit 30;  
 c1 | ?column?   
----+----------  
  1 | 48077000  
  3 | 25061000  
  2 | 12762000  
  4 | 12262000  
  8 |  9851000  
  6 |  9789000  
  7 |  9718000  
  9 |  9654000  
  5 |  4971000  
 10 |  4885000  
 18 |  2731000  
 28 |  2727000  
 29 |  2710000  
 23 |  2697000  
 15 |  2687000  
 27 |  2681000  
 22 |  2672000  
 17 |  2672000  
 25 |  2670000  
 19 |  2637000  
 20 |  2632000  
 12 |  2628000  
 14 |  2628000  
 21 |  2622000  
 26 |  2618000  
 13 |  2601000  
 24 |  2522000  
 16 |  2513000  
 11 |  1406000  
 30 |  1301000  
(30 rows)  
  
Time: 863.604 ms  
  
-- 采樣0.1%，約0.86秒。

OK，采樣千分之一的時候（僅需約掃描254MB數據），隻花了不到1秒，就算出了準確的TOP 30，而且準確度相當的高。

如果在Greenplum中支持這個功能，會很爽，一萬億的數據，秒級任意維度鑽取透視不是夢。

小結

1、采樣與精確查詢耗時對比

1.1、求數組元素TOP N

查詢	表大小	記錄數	求TOP N耗時
精確，32並行	16GB	1.5億	23秒
精確，非並行	16GB	1.5億	155秒
采樣5%	16GB	1.5億	7秒
采樣2%	16GB	1.5億	3秒

1.2、求scalar類型TOP N

查詢	表大小	記錄數	求TOP N耗時
精確，32並行	254GB	40億	21秒
精確，非並行	254GB	40億	471秒
采樣5%	254GB	40億	32秒
采樣2%	254GB	40億	13秒
采樣0.1%	254GB	40億	0.86秒

2、采樣計算達到了很高的精確度，同時耗費資源很少。雖然並行計算也非常快，但是需要消耗更多的CPU和IO資源，並行度就會大打折扣，除非有足夠的資源給你折騰，否則能采用估值計算的時候，還是建議估值計算。

3、估值計算的效率評估：

由於目前估值計算不能采用多核並行，處理速度約每秒254MB，那麼要達到1秒內的響應，對於254GB的表，采樣設置為0.1%，對於1TB的表，可以將采樣設置為0.025%）。那麼TB級的表，也能實現任意維度秒級估算。

4、采樣方法

4.1 https://www.postgresql.org/docs/9.6/static/sql-select.html

TABLESAMPLE sampling_method ( argument [, ...] ) [ REPEATABLE ( seed ) ]

A TABLESAMPLE clause after a table_name indicates that the specified sampling_method should be used to retrieve a subset of the rows in that table. This sampling precedes the application of any other filters such as WHERE clauses. The standard PostgreSQL distribution includes two sampling methods, BERNOULLI and SYSTEM, and other sampling methods can be installed in the database via extensions.

The BERNOULLI and SYSTEM sampling methods each accept a single argument which is the fraction of the table to sample, expressed as a percentage between 0 and 100. This argument can be any real-valued expression. (Other sampling methods might accept more or different arguments.) These two methods each return a randomly-chosen sample of the table that will contain approximately the specified percentage of the table's rows. The BERNOULLI method scans the whole table and selects or ignores individual rows independently with the specified probability. The SYSTEM method does block-level sampling with each block having the specified chance of being selected; all rows in each selected block are returned. The SYSTEM method is significantly faster than the BERNOULLI method when small sampling percentages are specified, but it may return a less-random sample of the table as a result of clustering effects.

The optional REPEATABLE clause specifies a seed number or expression to use for generating random numbers within the sampling method. The seed value can be any non-null floating-point value. Two queries that specify the same seed and argument values will select the same sample of the table, if the table has not been changed meanwhile. But different seed values will usually produce different samples. If REPEATABLE is not given then a new random sample is selected for each query, based upon a system-generated seed. Note that some add-on sampling methods do not accept REPEATABLE, and will always produce new samples on each use.

4.2 https://www.postgresql.org/docs/9.6/static/tsm-system-rows.html

CREATE EXTENSION tsm_system_rows;  
  
SELECT * FROM my_table TABLESAMPLE SYSTEM_ROWS(100);

4.3 https://www.postgresql.org/docs/9.6/static/tsm-system-time.html

CREATE EXTENSION tsm_system_time;  
  
SELECT * FROM my_table TABLESAMPLE SYSTEM_TIME(1000);

5、PostgreSQL中還支持很多其他的估值方法，請參考本文開頭部分的介紹。

最後更新：2017-09-11 17:03:04

TB級大表秒級任意維度分析 - 采樣估值滿足高效TOP N等分析需求

標簽

背景

場景設計

大表例子

小結

上一篇： 9月11日雲棲精選夜讀：專訪阿裏雲量子技術首席科學家施堯耘：量子計算前途輝煌而任重道遠

下一篇： PostGIS空間數據庫SRID背景知識 - 地理坐標係（球麵坐標係）和投影坐標係（平麵坐標係）

相關內容

熱門內容

最新內容