閱讀751 返回首頁    go 阿裏雲 go 技術社區[雲棲]


PostgreSQL 空間、多維 序列 生成方法

標簽

PostgreSQL , GIS , PostGIS , 序列 , 空間序列


背景

數據庫的一維序列是很好理解的東西,就是在一個維度上自增。

那麼二維、多維序列怎麼理解呢?顯然就是在多個維度上齊頭並進的自增咯。

二維序列

以二維序列為例,應該是這樣增長的:

0,0  
0,1  
1,0  
1,1  
1,2  
2,1  
2,2  
...  

那麼如何生成以上二維序列呢?實際上可以利用數據庫的多個一維序列來生成。

create sequence seq1;  
create sequence seq2;  
  
create or replace function seq_2d() returns point[] as $$  
declare  
  res point[];  
begin  
  select array_cat(res, array[point(nextval('seq1'), nextval('seq2'))]) into res;  
  select array_cat(res, array[point(currval('seq1')+1, currval('seq2'))]) into res;   
  select array_cat(res, array[point(currval('seq1'), currval('seq2')+1)]) into res;    
  return res;  
end;  
$$ language plpgsql strict;  

測試

test=# select seq_2d();  
          seq_2d             
---------------------------  
 {"(1,1)","(2,1)","(1,2)"}  
(1 row)  
  
test=# select seq_2d();  
          seq_2d             
---------------------------  
 {"(2,2)","(3,2)","(2,3)"}  
(1 row)  
  
test=# select seq_2d();  
          seq_2d             
---------------------------  
 {"(3,3)","(4,3)","(3,4)"}  
(1 row)  

三維序列

三維序列的生成方法類似:

0,0,0  
1,0,0  
0,1,0  
0,0,1  
1,1,0  
0,1,1  
1,0,1  
  
1,1,1  
2,1,1  
1,2,1  
1,1,2  
2,2,1  
1,2,2  
2,1,2  
  
2,2,2  
......  
create sequence seq1;  
create sequence seq2;  
create sequence seq3;  
create extension cube;  
  
create or replace function seq_3d() returns cube[] as $$  
declare  
  res cube[];  
begin  
  select array_cat(res, array[cube(array[nextval('seq1'), nextval('seq2'), nextval('seq3')])]) into res;  
  select array_cat(res, array[cube(array[currval('seq1')+1, currval('seq2'), currval('seq3')])]) into res;  
  select array_cat(res, array[cube(array[currval('seq1'), currval('seq2')+1, currval('seq3')])]) into res;  
  select array_cat(res, array[cube(array[currval('seq1'), currval('seq2'), currval('seq3')+1])]) into res;  
  select array_cat(res, array[cube(array[currval('seq1')+1, currval('seq2')+1, currval('seq3')])]) into res;  
  select array_cat(res, array[cube(array[currval('seq1'), currval('seq2')+1, currval('seq3')+1])]) into res;  
  select array_cat(res, array[cube(array[currval('seq1')+1, currval('seq2'), currval('seq3')+1])]) into res;  
  return res;  
end;  
$$ language plpgsql strict;  

例子

test=# select seq_3d();  
                                        seq_3d                                           
---------------------------------------------------------------------------------------  
 {"(1, 1, 1)","(2, 1, 1)","(1, 2, 1)","(1, 1, 2)","(2, 2, 1)","(1, 2, 2)","(2, 1, 2)"}  
(1 row)  
  
test=# select seq_3d();  
                                        seq_3d                                           
---------------------------------------------------------------------------------------  
 {"(2, 2, 2)","(3, 2, 2)","(2, 3, 2)","(2, 2, 3)","(3, 3, 2)","(2, 3, 3)","(3, 2, 3)"}  
(1 row)  
  

多維序列

以此類推,可以得到多維序列。

多維數據的空間存放和BRIN塊級索引

《PostgreSQL 黑科技 - 空間聚集存儲》

前麵講到了空間聚集存儲,如果數據按空間順序存放,使用BRIN塊級索引,可以在任意維度上得到最好的查詢效率,真正做到一個塊級索引支持任意列的高效過濾。

例子

create sequence seq1;  
create sequence seq2;  
create sequence seq3;  
  
create table tbl(c1 int, c2 int, c3 int);  
  
create or replace function cluster_insert() returns void as $$  
declare  
begin  
  insert into tbl values (nextval('seq1'), nextval('seq2'), nextval('seq3'));  
  insert into tbl values (currval('seq1')+1, currval('seq2'), currval('seq3'));  
  insert into tbl values (currval('seq1'), currval('seq2')+1, currval('seq3'));  
  insert into tbl values (currval('seq1'), currval('seq2'), currval('seq3')+1);  
  insert into tbl values (currval('seq1')+1, currval('seq2')+1, currval('seq3'));  
  insert into tbl values (currval('seq1'), currval('seq2')+1, currval('seq3')+1);  
  insert into tbl values (currval('seq1')+1, currval('seq2'), currval('seq3')+1);  
end;  
$$ language plpgsql strict;  

壓測,寫入大量數據

vi test.sql  
select count(*) from (select cluster_insert() from generate_series(1,100)) t;  
  
pgbench -M prepared -n -r -P 1 -f ./test.sql -c 32 -j 32 -T 1200  

檢查多維聚集性

test=# select * from tbl limit 10;  
   c1    |   c2    |   c3      
---------+---------+---------  
 1992652 | 1992653 | 1992652  
 1992573 | 1992574 | 1992578  
 1992574 | 1992574 | 1992578  
 1992573 | 1992575 | 1992578  
 1992573 | 1992574 | 1992579  
 1992574 | 1992575 | 1992578  
 1992573 | 1992575 | 1992579  
 1992574 | 1992574 | 1992579  
 1992658 | 1992658 | 1992658  
 1992659 | 1992658 | 1992658  
(10 rows)  

創建BRIN塊級索引。

create index idx on tbl using brin (c1,c2,c3);  
test=# \dt+ tbl  
                             List of relations
 Schema |        Name         | Type  |  Owner   |    Size    | Description 
--------+---------------------+-------+----------+------------+-------------
 public | tbl                 | table | postgres | 97 GB      | 
(1 row)  
  
test=# \di+ idx  
                        List of relations  
 Schema | Name | Type  |  Owner   | Table |  Size  | Description   
--------+------+-------+----------+-------+--------+-------------  
 public | idx  | index | postgres | tbl   | 456 kB |   
(1 row)  

看看456KB的索引,在97 GB的數據層麵,查詢效率如何。

任意列、組合查詢過濾性。

explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000;  
explain (analyze,verbose,timing,costs,buffers) select * from tbl where c2 between 1 and 1000;  
explain (analyze,verbose,timing,costs,buffers) select * from tbl where c3 between 1 and 1000;  
explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000 and c2 between 100 and 2000;  
explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000 and c3 between 100 and 2000;  
explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000 and c2 between 100 and 2000 and c3 between 1 and 2000;  
test=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000;
                              
------------------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on public.tbl  (cost=650.23..31623.80 rows=1 width=12) (actual time=27.302..50.284 rows=6997 loops=1)
   Output: c1, c2, c3
   Recheck Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000))
   Rows Removed by Index Recheck: 229803
   Heap Blocks: lossy=1280
   Buffers: shared hit=1942
   ->  Bitmap Index Scan on idx  (cost=0.00..650.23 rows=23810 width=0) (actual time=26.881..26.881 rows=12800 loops=1)
         Index Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000))
         Buffers: shared hit=662
 Planning time: 0.095 ms
 Execution time: 50.636 ms
(11 rows)

test=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where c2 between 1 and 1000;
                                                       QUERY PLAN                                                       
------------------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on public.tbl  (cost=650.23..31623.80 rows=1 width=12) (actual time=27.886..49.011 rows=6997 loops=1)
   Output: c1, c2, c3
   Recheck Cond: ((tbl.c2 >= 1) AND (tbl.c2 <= 1000))
   Rows Removed by Index Recheck: 229803
   Heap Blocks: lossy=1280
   Buffers: shared hit=1942
   ->  Bitmap Index Scan on idx  (cost=0.00..650.23 rows=23810 width=0) (actual time=27.512..27.512 rows=12800 loops=1)
         Index Cond: ((tbl.c2 >= 1) AND (tbl.c2 <= 1000))
         Buffers: shared hit=662
 Planning time: 0.040 ms
 Execution time: 49.348 ms
(11 rows)

test=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where c3 between 1 and 1000;
                                                       QUERY PLAN                                                       
------------------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on public.tbl  (cost=650.23..31623.80 rows=1 width=12) (actual time=25.238..46.292 rows=6997 loops=1)
   Output: c1, c2, c3
   Recheck Cond: ((tbl.c3 >= 1) AND (tbl.c3 <= 1000))
   Rows Removed by Index Recheck: 229803
   Heap Blocks: lossy=1280
   Buffers: shared hit=1942
   ->  Bitmap Index Scan on idx  (cost=0.00..650.23 rows=23810 width=0) (actual time=24.875..24.875 rows=12800 loops=1)
         Index Cond: ((tbl.c3 >= 1) AND (tbl.c3 <= 1000))
         Buffers: shared hit=662
 Planning time: 0.044 ms
 Execution time: 46.631 ms
(11 rows)

test=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000 and c2 between 100 and 2000;
                                                       QUERY PLAN                                                       
------------------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on public.tbl  (cost=650.23..31742.85 rows=1 width=12) (actual time=30.018..48.522 rows=6307 loops=1)
   Output: c1, c2, c3
   Recheck Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000) AND (tbl.c2 >= 100) AND (tbl.c2 <= 2000))
   Rows Removed by Index Recheck: 230493
   Heap Blocks: lossy=1280
   Buffers: shared hit=1942
   ->  Bitmap Index Scan on idx  (cost=0.00..650.23 rows=23810 width=0) (actual time=27.273..27.273 rows=12800 loops=1)
         Index Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000) AND (tbl.c2 >= 100) AND (tbl.c2 <= 2000))
         Buffers: shared hit=662
 Planning time: 0.049 ms
 Execution time: 48.829 ms
(11 rows)

test=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000 and c3 between 100 and 2000;
                                                       QUERY PLAN                                                       
------------------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on public.tbl  (cost=650.23..31742.85 rows=1 width=12) (actual time=27.565..46.347 rows=6307 loops=1)
   Output: c1, c2, c3
   Recheck Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000) AND (tbl.c3 >= 100) AND (tbl.c3 <= 2000))
   Rows Removed by Index Recheck: 230493
   Heap Blocks: lossy=1280
   Buffers: shared hit=1942
   ->  Bitmap Index Scan on idx  (cost=0.00..650.23 rows=23810 width=0) (actual time=24.799..24.799 rows=12800 loops=1)
         Index Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000) AND (tbl.c3 >= 100) AND (tbl.c3 <= 2000))
         Buffers: shared hit=662
 Planning time: 0.055 ms
 Execution time: 46.656 ms
(11 rows)

test=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000 and c2 between 100 and 2000 and c3 between 1 and 2000;
                                                                QUERY PLAN                                                                
------------------------------------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on public.tbl  (cost=650.23..31861.90 rows=1 width=12) (actual time=28.703..49.599 rows=6307 loops=1)
   Output: c1, c2, c3
   Recheck Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000) AND (tbl.c2 >= 100) AND (tbl.c2 <= 2000) AND (tbl.c3 >= 1) AND (tbl.c3 <= 2000))
   Rows Removed by Index Recheck: 230493
   Heap Blocks: lossy=1280
   Buffers: shared hit=1942
   ->  Bitmap Index Scan on idx  (cost=0.00..650.23 rows=23810 width=0) (actual time=25.590..25.590 rows=12800 loops=1)
         Index Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000) AND (tbl.c2 >= 100) AND (tbl.c2 <= 2000) AND (tbl.c3 >= 1) AND (tbl.c3 <= 2000))
         Buffers: shared hit=662
 Planning time: 0.114 ms
 Execution time: 49.919 ms
(11 rows)

小結

本文介紹了如何創建、生成多維序列。

本文驗證了數據如果按照多維序列聚集存放,可以達到塊級索引最強過濾性,任意字段都能實現高效率過濾。

如果數據的多列本身不存在相關性,可以參考這篇文檔,對數據進行空間重分布存儲。得到最強過濾性。

《PostgreSQL 黑科技 - 空間聚集存儲》

最後更新:2017-10-28 23:03:45

  上一篇:go  PostgreSQL Oracle 兼容性 之 NUMTODSINTERVAL
  下一篇:go  PostgreSQL 事件觸發器應用 - DDL審計