閱讀714 返回首頁    go 魔獸


PostgreSQL 線性回歸 - 股價預測 2

以貴州茅台的曆史數據為例, 校驗預測誤差如下 : 
未來1天的最準
未來第二天的預測開始誤差開始放大, 但是放大還不明顯..
未來第三天,第四天的就有比較大的誤差率.

e928c6303e468e9d0d3acac1a0c16f186b6c267a

將數據導入源表
--  插入順序從舊到新排列. 不要搞反了, 例如

06/02/2010 13.49 13.49 12.52 13.03 38670320 1571709568.000
06/03/2010 13.09 13.26 12.69 12.75 27873689 1135419264.000
06/04/2010 12.63 12.99 12.56 12.77 19305447 775373248.000
06/07/2010 12.52 13.13 12.43 13.03 24762597 997748928.000
06/08/2010 12.99 13.02 12.56 12.82 18987054 762023168.000
06/09/2010 12.87 13.90 12.87 13.36 38510441 1623107328.000
06/10/2010 13.37 13.51 13.26 13.39 19669987 823111744.000
06/11/2010 13.46 13.58 13.27 13.37 18622806 783614336.000
06/17/2010 13.48 13.99 13.29 13.31 25604558 1095663744.000
06/18/2010 13.13 13.23 12.37 12.57 24897719 996842496.000
....


create table orig (id int, x numeric);
數據轉換為

1, 13.49
2, 13.09
,......


create table tmp (
id int, 
x numeric, -- 自變量
y numeric  -- 因變量
);

自變量和因變量的選擇很有講究.
例如你可以選擇昨天的收盤價和今天的開盤價作為自變量和因變量.
當然也可以做多元的分析, 例如昨天的收盤價, 交易量作為自變量, 今天的開盤價作為因變量.
等等......

--  昨天預測今天的樣本數據

truncate tmp;
insert into tmp
select id,
lag(x,1) over(order by id),
x from orig;


生成最近2天預測未來1天的樣本數據

create table tmp1 (like tmp);
insert into tmp1
select id,
x+
lag(x,1) over(order by id),
lead(x,1) over(order by id)
from orig;


生成最近3天預測未來2天的樣本數據

create table tmp2 (like tmp);
insert into tmp2
select id,
x+
lag(x,1) over(order by id)+
lag(x,2) over(order by id),
lead(x,1) over(order by id)+
lead(x,2) over(order by id)
from orig;


生成最近4天預測未來3天的樣本數據

create table tmp3 (like tmp);
insert into tmp3
select id,
x+
lag(x,1) over(order by id)+
lag(x,2) over(order by id)+
lag(x,3) over(order by id),
lead(x,1) over(order by id)+
lead(x,2) over(order by id)+
lead(x,3) over(order by id)
from orig;


生成最近5天預測未來4天的樣本數據

create table tmp4 (like tmp);
insert into tmp4
select id,
x+
lag(x,1) over(order by id)+
lag(x,2) over(order by id)+
lag(x,3) over(order by id)+
lag(x,4) over(order by id),
lead(x,1) over(order by id)+
lead(x,2) over(order by id)+
lead(x,3) over(order by id)+
lead(x,4) over(order by id)
from orig;


生成預測數據的函數如下

CREATE OR REPLACE FUNCTION public.check_predict(
IN v_tbl name, -- 樣本表名
IN OUT ov integer, -- 校驗哪條記錄, 倒數第?個值的預測值, 不停迭代, 最後計算所有的實際值和預測值的corr, 選擇最佳相關?
OUT v_id int, -- 真實值唯一標識
OUT r_chkv numeric, -- 真實值, 用於校驗
OUT p_yv numeric, -- 預測值,因變量
OUT r_xv numeric, -- 自變量,用於預測因變量
OUT dev numeric, -- 誤差
OUT v_slope numeric, -- 斜率
OUT v_inter numeric, -- 截距
OUT v_r2 numeric, -- 相關性
OUT sampcnt int -- 獲得最大相關度的樣本數
)
RETURNS record
LANGUAGE plpgsql
AS $function$
declare
r2_1 numeric := 0; -- 相關性
r2_2 numeric := 0; -- 最大相關性
inter_1 numeric; -- 截距
slope_1 numeric; -- 斜率
inter_2 numeric; -- 最大相關性截距
slope_2 numeric; -- 最大相關性斜率
v_lmt int := 90; -- 使用的最大樣本集, 影響預測準確度
v_min int := 5; -- 使用的最小樣本數, 影響預測準確度
begin
-- 自變量 tbl.x
-- 因變量 tbl.y

-- 篩選最大相關度的樣本數, 並記錄下儲斜率, 截距.
for i in 0..v_lmt
loop
execute $_$with t1 as
(
select row_number() over(order by id desc) as rn,*
from
(select id,x,y from $_$||v_tbl||$_$ where x+y is not null order by id desc offset $1 limit $2) t
)
select regr_intercept(t1.y,t1.x), regr_slope(t1.y,t1.x), regr_r2(t1.y,t1.x)
from t1
where t1.rn<=$3 $_$
into inter_1,slope_1,r2_1
using ov, v_lmt+v_min, i+v_min;

if r2_1>r2_2 then
inter_2 := inter_1;
slope_2 := slope_1;
r2_2 := r2_1;
sampcnt := i+v_min;
end if;
end loop;

-- 下一個自變量ID, 用於預測因變量
execute $_$select id+1 from $_$||v_tbl||$_$ where x+y is not null order by id desc offset $1 limit 1$_$
into v_id using ov;

-- 預測值,自變量,真實值, 如果真實值為空, 說明該條記錄沒有未來的真實記錄, 就是要預測的將來值.
execute $_$select round($_$||slope_2||'*x+'||inter_2||$_$,4), x, y from $_$||v_tbl||$_$ where id=$1 $_$
into p_yv,r_xv,r_chkv
using v_id;

dev := abs(1-round(p_yv/r_chkv,4));
v_slope := round(slope_2,5);
v_inter := round(inter_2,5);
v_r2 := round(r2_2,5);

return;
end;
$function$;


生成最近100天的曆史產生的預測數據和校驗數據.

create table p1 as select (check_predict('tmp1',i)).* from generate_series(0,100) t(i);
create table p2 as select (check_predict('tmp2',i)).* from generate_series(0,100) t(i);
create table p3 as select (check_predict('tmp3',i)).* from generate_series(0,100) t(i);
create table p4 as select (check_predict('tmp4',i)).* from generate_series(0,100) t(i);



預測未來4天的數據的方法.

未來第1 p1
未來第2 p2-p1
未來第3 p3-p2
未來第4 p4-p3


驗證方法 : 

select
v_id,
r_chkv,
p_yv,
abs(1-round(p_yv/r_chkv,4)) as mis1,
r_chkv2,
p_yv2,
abs(1-round(p_yv2/r_chkv2,4)) as mis2,
r_chkv3,
p_yv3,
abs(1-round(p_yv3/r_chkv3,4)) as mis3,
r_chkv4,
p_yv4,
abs(1-round(p_yv4/r_chkv4,4)) as mis4
from
(
select
p1.v_id,
p1.r_chkv,
p1.p_yv,
lag(p1.r_chkv,1) over(order by p1.v_id desc) as r_chkv2,
p2.p_yv-p1.p_yv as p_yv2,
lag(p1.r_chkv,2) over(order by p1.v_id desc) as r_chkv3,
p3.p_yv-p2.p_yv as p_yv3,
lag(p1.r_chkv,3) over(order by p1.v_id desc) as r_chkv4,
p4.p_yv-p3.p_yv as p_yv4
from
p1,p2,p3,p4
where p1.v_id=p2.v_id and p1.v_id=p3.v_id and p1.v_id=p4.v_id
) t;

 
驗證數據 : 

v_id | r_chkv | p_yv | miss1 | r_chkv2 | p_yv2 | mis2 | r_chkv3 | p_yv3 | miss3 | r_chkv4 | p_yv4 | miss4
------+--------+----------+--------+---------+----------+--------+---------+----------+--------+---------+----------+--------
1050 | 157.81 | 159.5745 | 0.0112 | | 160.7402 | | | 162.2997 | | | 151.4013 |
1049 | 159.02 | 160.5464 | 0.0096 | 157.81 | 162.1956 | 0.0278 | | 162.3978 | | | 148.0711 |
1048 | 159.87 | 161.9598 | 0.0131 | 159.02 | 162.4292 | 0.0214 | 157.81 | 162.0693 | 0.0270 | | 147.9578 |
1047 | 161.00 | 162.6313 | 0.0101 | 159.87 | 162.1514 | 0.0143 | 159.02 | 161.7551 | 0.0172 | 157.81 | 158.5270 | 0.0045
1046 | 162.74 | 162.0216 | 0.0044 | 161.00 | 161.6165 | 0.0038 | 159.87 | 158.5411 | 0.0083 | 159.02 | 158.4651 | 0.0035
1045 | 162.33 | 161.3257 | 0.0062 | 162.74 | 149.7262 | 0.0800 | 161.00 | 167.0954 | 0.0379 | 159.87 | 161.2083 | 0.0084
1044 | 161.50 | 158.8824 | 0.0162 | 162.33 | 157.8832 | 0.0274 | 162.74 | 160.6849 | 0.0126 | 161.00 | 163.8815 | 0.0179
1043 | 160.92 | 156.8056 | 0.0256 | 161.50 | 159.7366 | 0.0109 | 162.33 | 159.6163 | 0.0167 | 162.74 | 159.0992 | 0.0224
1042 | 156.49 | 158.8599 | 0.0151 | 160.92 | 159.4193 | 0.0093 | 161.50 | 158.9038 | 0.0161 | 162.33 | 159.4251 | 0.0179
1041 | 156.70 | 160.1439 | 0.0220 | 156.49 | 159.0677 | 0.0165 | 160.92 | 159.2684 | 0.0103 | 161.50 | 160.9401 | 0.0035
1040 | 160.71 | 158.8216 | 0.0118 | 156.70 | 158.9957 | 0.0147 | 156.49 | 160.6000 | 0.0263 | 160.92 | 161.9692 | 0.0065
1039 | 159.25 | 158.4342 | 0.0051 | 160.71 | 160.1400 | 0.0035 | 156.70 | 161.7433 | 0.0322 | 156.49 | 161.8723 | 0.0344
1038 | 158.02 | 159.6522 | 0.0103 | 159.25 | 161.4267 | 0.0137 | 160.71 | 149.8541 | 0.0675 | 156.70 | 175.7699 | 0.1217
1037 | 158.47 | 161.3605 | 0.0182 | 158.02 | 161.7616 | 0.0237 | 159.25 | 163.7032 | 0.0280 | 160.71 | 164.0469 | 0.0208
1036 | 160.50 | 161.8688 | 0.0085 | 158.47 | 163.5362 | 0.0320 | 158.02 | 163.9749 | 0.0377 | 159.25 | 167.3141 | 0.0506
1035 | 161.90 | 162.8418 | 0.0058 | 160.50 | 163.8163 | 0.0207 | 158.47 | 167.1359 | 0.0547 | 158.02 | 153.6915 | 0.0274
1034 | 161.49 | 163.9804 | 0.0154 | 161.90 | 166.8222 | 0.0304 | 160.50 | 166.4108 | 0.0368 | 158.47 | 151.0944 | 0.0465
1033 | 163.87 | 165.8788 | 0.0123 | 161.49 | 166.3372 | 0.0300 | 161.90 | 165.9846 | 0.0252 | 160.50 | 164.6662 | 0.0260
1032 | 163.70 | 166.9794 | 0.0200 | 163.87 | 166.0894 | 0.0135 | 161.49 | 164.6576 | 0.0196 | 161.90 | 161.7623 | 0.0009
1031 | 167.65 | 165.4608 | 0.0131 | 163.70 | 164.2976 | 0.0037 | 163.87 | 161.5290 | 0.0143 | 161.49 | 160.9499 | 0.0033
1030 | 165.70 | 163.8841 | 0.0110 | 167.65 | 161.2073 | 0.0384 | 163.70 | 160.5254 | 0.0194 | 163.87 | 158.8926 | 0.0304
1029 | 164.70 | 160.9278 | 0.0229 | 165.70 | 159.7672 | 0.0358 | 167.65 | 158.2872 | 0.0558 | 163.70 | 158.4474 | 0.0321
1028 | 162.58 | 158.7257 | 0.0237 | 164.70 | 157.5536 | 0.0434 | 165.70 | 157.6546 | 0.0486 | 167.65 | 173.5841 | 0.0354
1027 | 158.81 | 157.3449 | 0.0092 | 162.58 | 157.0147 | 0.0342 | 164.70 | 158.5247 | 0.0375 | 165.70 | 171.3281 | 0.0340
1026 | 158.20 | 156.2507 | 0.0123 | 158.81 | 157.7433 | 0.0067 | 162.58 | 158.9512 | 0.0223 | 164.70 | 160.1243 | 0.0278
1025 | 156.00 | 157.0983 | 0.0070 | 158.20 | 158.4975 | 0.0019 | 158.81 | 159.6796 | 0.0055 | 162.58 | 145.3973 | 0.1057
1024 | 156.00 | 158.3550 | 0.0151 | 156.00 | 159.4457 | 0.0221 | 158.20 | 160.4796 | 0.0144 | 158.81 | 148.8340 | 0.0628
1023 | 157.72 | 159.1382 | 0.0090 | 156.00 | 160.2608 | 0.0273 | 156.00 | 162.2360 | 0.0400 | 158.20 | 146.8649 | 0.0717
1022 | 158.50 | 159.9296 | 0.0090 | 157.72 | 161.8630 | 0.0263 | 156.00 | 160.3253 | 0.0277 | 156.00 | 161.2206 | 0.0335
1021 | 159.27 | 161.3250 | 0.0129 | 158.50 | 160.2559 | 0.0111 | 157.72 | 161.1143 | 0.0215 | 156.00 | 163.1739 | 0.0460
1020 | 160.00 | 160.7364 | 0.0046 | 159.27 | 160.7603 | 0.0094 | 158.50 | 162.8499 | 0.0274 | 157.72 | 163.6353 | 0.0375
1019 | 162.00 | 159.5627 | 0.0150 | 160.00 | 161.9158 | 0.0120 | 159.27 | 163.1346 | 0.0243 | 158.50 | 163.7176 | 0.0329
1018 | 158.74 | 161.2400 | 0.0157 | 162.00 | 162.5665 | 0.0035 | 160.00 | 163.4097 | 0.0213 | 159.27 | 164.0869 | 0.0302
1017 | 159.75 | 162.5560 | 0.0176 | 158.74 | 163.0135 | 0.0269 | 162.00 | 163.6795 | 0.0104 | 160.00 | 164.2167 | 0.0264
1016 | 162.00 | 162.5926 | 0.0037 | 159.75 | 163.1884 | 0.0215 | 158.74 | 163.7584 | 0.0316 | 162.00 | 164.8763 | 0.0178
1015 | 162.20 | 162.5296 | 0.0020 | 162.00 | 163.0662 | 0.0066 | 159.75 | 164.4497 | 0.0294 | 158.74 | 164.8095 | 0.0382
1014 | 162.02 | 162.4474 | 0.0026 | 162.20 | 163.5357 | 0.0082 | 162.00 | 164.2277 | 0.0138 | 159.75 | 166.0446 | 0.0394
1013 | 162.04 | 162.6981 | 0.0041 | 162.02 | 163.3388 | 0.0081 | 162.20 | 162.3852 | 0.0011 | 162.00 | 170.6363 | 0.0533
1012 | 161.81 | 162.8286 | 0.0063 | 162.04 | 164.2343 | 0.0135 | 162.02 | 160.1472 | 0.0116 | 162.20 | 174.4763 | 0.0757
1011 | 162.48 | 163.1765 | 0.0043 | 161.81 | 165.7026 | 0.0241 | 162.04 | 157.7701 | 0.0264 | 162.02 | 176.5230 | 0.0895
1010 | 161.96 | 164.7836 | 0.0174 | 162.48 | 165.9001 | 0.0210 | 161.81 | 166.6854 | 0.0301 | 162.04 | 165.4803 | 0.0212
1009 | 163.10 |最後更新:2017-04-01 13:38:49

  上一篇:go PostgreSQL · 特性介紹 · 全文搜索介紹
  下一篇:go Greenplum 擴展 segment個數