PostgreSQL 空間、多維 序列 生成方法
標簽
PostgreSQL , GIS , PostGIS , 序列 , 空間序列
背景
數據庫的一維序列是很好理解的東西,就是在一個維度上自增。
那麼二維、多維序列怎麼理解呢?顯然就是在多個維度上齊頭並進的自增咯。
二維序列
以二維序列為例,應該是這樣增長的:
0,0
0,1
1,0
1,1
1,2
2,1
2,2
...
那麼如何生成以上二維序列呢?實際上可以利用數據庫的多個一維序列來生成。
create sequence seq1;
create sequence seq2;
create or replace function seq_2d() returns point[] as $$
declare
res point[];
begin
select array_cat(res, array[point(nextval('seq1'), nextval('seq2'))]) into res;
select array_cat(res, array[point(currval('seq1')+1, currval('seq2'))]) into res;
select array_cat(res, array[point(currval('seq1'), currval('seq2')+1)]) into res;
return res;
end;
$$ language plpgsql strict;
測試
test=# select seq_2d();
seq_2d
---------------------------
{"(1,1)","(2,1)","(1,2)"}
(1 row)
test=# select seq_2d();
seq_2d
---------------------------
{"(2,2)","(3,2)","(2,3)"}
(1 row)
test=# select seq_2d();
seq_2d
---------------------------
{"(3,3)","(4,3)","(3,4)"}
(1 row)
三維序列
三維序列的生成方法類似:
0,0,0
1,0,0
0,1,0
0,0,1
1,1,0
0,1,1
1,0,1
1,1,1
2,1,1
1,2,1
1,1,2
2,2,1
1,2,2
2,1,2
2,2,2
......
create sequence seq1;
create sequence seq2;
create sequence seq3;
create extension cube;
create or replace function seq_3d() returns cube[] as $$
declare
res cube[];
begin
select array_cat(res, array[cube(array[nextval('seq1'), nextval('seq2'), nextval('seq3')])]) into res;
select array_cat(res, array[cube(array[currval('seq1')+1, currval('seq2'), currval('seq3')])]) into res;
select array_cat(res, array[cube(array[currval('seq1'), currval('seq2')+1, currval('seq3')])]) into res;
select array_cat(res, array[cube(array[currval('seq1'), currval('seq2'), currval('seq3')+1])]) into res;
select array_cat(res, array[cube(array[currval('seq1')+1, currval('seq2')+1, currval('seq3')])]) into res;
select array_cat(res, array[cube(array[currval('seq1'), currval('seq2')+1, currval('seq3')+1])]) into res;
select array_cat(res, array[cube(array[currval('seq1')+1, currval('seq2'), currval('seq3')+1])]) into res;
return res;
end;
$$ language plpgsql strict;
例子
test=# select seq_3d();
seq_3d
---------------------------------------------------------------------------------------
{"(1, 1, 1)","(2, 1, 1)","(1, 2, 1)","(1, 1, 2)","(2, 2, 1)","(1, 2, 2)","(2, 1, 2)"}
(1 row)
test=# select seq_3d();
seq_3d
---------------------------------------------------------------------------------------
{"(2, 2, 2)","(3, 2, 2)","(2, 3, 2)","(2, 2, 3)","(3, 3, 2)","(2, 3, 3)","(3, 2, 3)"}
(1 row)
多維序列
以此類推,可以得到多維序列。
多維數據的空間存放和BRIN塊級索引
前麵講到了空間聚集存儲,如果數據按空間順序存放,使用BRIN塊級索引,可以在任意維度上得到最好的查詢效率,真正做到一個塊級索引支持任意列的高效過濾。
例子
create sequence seq1;
create sequence seq2;
create sequence seq3;
create table tbl(c1 int, c2 int, c3 int);
create or replace function cluster_insert() returns void as $$
declare
begin
insert into tbl values (nextval('seq1'), nextval('seq2'), nextval('seq3'));
insert into tbl values (currval('seq1')+1, currval('seq2'), currval('seq3'));
insert into tbl values (currval('seq1'), currval('seq2')+1, currval('seq3'));
insert into tbl values (currval('seq1'), currval('seq2'), currval('seq3')+1);
insert into tbl values (currval('seq1')+1, currval('seq2')+1, currval('seq3'));
insert into tbl values (currval('seq1'), currval('seq2')+1, currval('seq3')+1);
insert into tbl values (currval('seq1')+1, currval('seq2'), currval('seq3')+1);
end;
$$ language plpgsql strict;
壓測,寫入大量數據
vi test.sql
select count(*) from (select cluster_insert() from generate_series(1,100)) t;
pgbench -M prepared -n -r -P 1 -f ./test.sql -c 32 -j 32 -T 1200
檢查多維聚集性
test=# select * from tbl limit 10;
c1 | c2 | c3
---------+---------+---------
1992652 | 1992653 | 1992652
1992573 | 1992574 | 1992578
1992574 | 1992574 | 1992578
1992573 | 1992575 | 1992578
1992573 | 1992574 | 1992579
1992574 | 1992575 | 1992578
1992573 | 1992575 | 1992579
1992574 | 1992574 | 1992579
1992658 | 1992658 | 1992658
1992659 | 1992658 | 1992658
(10 rows)
創建BRIN塊級索引。
create index idx on tbl using brin (c1,c2,c3);
test=# \dt+ tbl
List of relations
Schema | Name | Type | Owner | Size | Description
--------+---------------------+-------+----------+------------+-------------
public | tbl | table | postgres | 97 GB |
(1 row)
test=# \di+ idx
List of relations
Schema | Name | Type | Owner | Table | Size | Description
--------+------+-------+----------+-------+--------+-------------
public | idx | index | postgres | tbl | 456 kB |
(1 row)
看看456KB的索引,在97 GB的數據層麵,查詢效率如何。
任意列、組合查詢過濾性。
explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000;
explain (analyze,verbose,timing,costs,buffers) select * from tbl where c2 between 1 and 1000;
explain (analyze,verbose,timing,costs,buffers) select * from tbl where c3 between 1 and 1000;
explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000 and c2 between 100 and 2000;
explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000 and c3 between 100 and 2000;
explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000 and c2 between 100 and 2000 and c3 between 1 and 2000;
test=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000;
------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on public.tbl (cost=650.23..31623.80 rows=1 width=12) (actual time=27.302..50.284 rows=6997 loops=1)
Output: c1, c2, c3
Recheck Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000))
Rows Removed by Index Recheck: 229803
Heap Blocks: lossy=1280
Buffers: shared hit=1942
-> Bitmap Index Scan on idx (cost=0.00..650.23 rows=23810 width=0) (actual time=26.881..26.881 rows=12800 loops=1)
Index Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000))
Buffers: shared hit=662
Planning time: 0.095 ms
Execution time: 50.636 ms
(11 rows)
test=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where c2 between 1 and 1000;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on public.tbl (cost=650.23..31623.80 rows=1 width=12) (actual time=27.886..49.011 rows=6997 loops=1)
Output: c1, c2, c3
Recheck Cond: ((tbl.c2 >= 1) AND (tbl.c2 <= 1000))
Rows Removed by Index Recheck: 229803
Heap Blocks: lossy=1280
Buffers: shared hit=1942
-> Bitmap Index Scan on idx (cost=0.00..650.23 rows=23810 width=0) (actual time=27.512..27.512 rows=12800 loops=1)
Index Cond: ((tbl.c2 >= 1) AND (tbl.c2 <= 1000))
Buffers: shared hit=662
Planning time: 0.040 ms
Execution time: 49.348 ms
(11 rows)
test=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where c3 between 1 and 1000;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on public.tbl (cost=650.23..31623.80 rows=1 width=12) (actual time=25.238..46.292 rows=6997 loops=1)
Output: c1, c2, c3
Recheck Cond: ((tbl.c3 >= 1) AND (tbl.c3 <= 1000))
Rows Removed by Index Recheck: 229803
Heap Blocks: lossy=1280
Buffers: shared hit=1942
-> Bitmap Index Scan on idx (cost=0.00..650.23 rows=23810 width=0) (actual time=24.875..24.875 rows=12800 loops=1)
Index Cond: ((tbl.c3 >= 1) AND (tbl.c3 <= 1000))
Buffers: shared hit=662
Planning time: 0.044 ms
Execution time: 46.631 ms
(11 rows)
test=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000 and c2 between 100 and 2000;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on public.tbl (cost=650.23..31742.85 rows=1 width=12) (actual time=30.018..48.522 rows=6307 loops=1)
Output: c1, c2, c3
Recheck Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000) AND (tbl.c2 >= 100) AND (tbl.c2 <= 2000))
Rows Removed by Index Recheck: 230493
Heap Blocks: lossy=1280
Buffers: shared hit=1942
-> Bitmap Index Scan on idx (cost=0.00..650.23 rows=23810 width=0) (actual time=27.273..27.273 rows=12800 loops=1)
Index Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000) AND (tbl.c2 >= 100) AND (tbl.c2 <= 2000))
Buffers: shared hit=662
Planning time: 0.049 ms
Execution time: 48.829 ms
(11 rows)
test=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000 and c3 between 100 and 2000;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on public.tbl (cost=650.23..31742.85 rows=1 width=12) (actual time=27.565..46.347 rows=6307 loops=1)
Output: c1, c2, c3
Recheck Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000) AND (tbl.c3 >= 100) AND (tbl.c3 <= 2000))
Rows Removed by Index Recheck: 230493
Heap Blocks: lossy=1280
Buffers: shared hit=1942
-> Bitmap Index Scan on idx (cost=0.00..650.23 rows=23810 width=0) (actual time=24.799..24.799 rows=12800 loops=1)
Index Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000) AND (tbl.c3 >= 100) AND (tbl.c3 <= 2000))
Buffers: shared hit=662
Planning time: 0.055 ms
Execution time: 46.656 ms
(11 rows)
test=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1 between 1 and 1000 and c2 between 100 and 2000 and c3 between 1 and 2000;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on public.tbl (cost=650.23..31861.90 rows=1 width=12) (actual time=28.703..49.599 rows=6307 loops=1)
Output: c1, c2, c3
Recheck Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000) AND (tbl.c2 >= 100) AND (tbl.c2 <= 2000) AND (tbl.c3 >= 1) AND (tbl.c3 <= 2000))
Rows Removed by Index Recheck: 230493
Heap Blocks: lossy=1280
Buffers: shared hit=1942
-> Bitmap Index Scan on idx (cost=0.00..650.23 rows=23810 width=0) (actual time=25.590..25.590 rows=12800 loops=1)
Index Cond: ((tbl.c1 >= 1) AND (tbl.c1 <= 1000) AND (tbl.c2 >= 100) AND (tbl.c2 <= 2000) AND (tbl.c3 >= 1) AND (tbl.c3 <= 2000))
Buffers: shared hit=662
Planning time: 0.114 ms
Execution time: 49.919 ms
(11 rows)
小結
本文介紹了如何創建、生成多維序列。
本文驗證了數據如果按照多維序列聚集存放,可以達到塊級索引最強過濾性,任意字段都能實現高效率過濾。
如果數據的多列本身不存在相關性,可以參考這篇文檔,對數據進行空間重分布存儲。得到最強過濾性。
最後更新:2017-10-28 23:03:45
上一篇:
PostgreSQL Oracle 兼容性 之 NUMTODSINTERVAL
下一篇:
PostgreSQL 事件觸發器應用 - DDL審計
《Spark 官方文檔》Spark獨立模式
每年14PB數據存儲需求,海量交通安全數據如何安放?
linux係統centOS6.5使用goaccess工具分析nginx網站日誌
C++對象模型(一):The Semantics of Constructors The Default Constructor (默認構造函數什麼時候會被創建出來)
熱愛自己的產品
【從AlphaGo的恐怖進化談起】附機器學習入門教程
iphone6鎖死怎麼解鎖 iphone6屏幕鎖定怎麼破解開機密碼
全球化背景下的物聯網產業發展
關於FileUpload文件上傳的文件大小限製問題
CentOS6.2編譯gcc失敗,kernel-headers錯誤