2017-11-01 16 views
0

特定の日に一連のリターン(ret)の間にピアソン相関テーブルを作成しようとしました。比較したいアイテムの数は毎日異なります。それは50から200の間で変わります。SQL Serverの不特定多数のアイテム間にピアソン相関テーブルを作成する方法

:私は分析する特定の日に

は、私はこのような項目ごとにすべてのリターンが含まれている1台RetTableを持っている、(ここでは9月1日に私は過去3日間の相関関係を見てみたいです)そのテーブルから

Id RetDate  Ret 
1 2017-08-31 -0.062720 
1 2017-08-30 0.032650 
1 2017-08-29 -0.086360 
2 2017-08-31 -0.033100 
2 2017-08-30 0.032900 
2 2017-08-29 -0.032400 
3 2017-08-31 -0.017900 
3 2017-08-30 -0.018300 
3 2017-08-29 -0.015200 

私はこのような各項目の平均値と別のテーブル(MeanTable)を作成した:次の段階は、私にそのテーブルDiffRetAvgTableを与える各リターンの平均リターンを減算することである

Id MeanRet 
1 -0.038810 
2 -0.010866 
3 -0.017133 

ID RetDate DiffRetAvg 
1 2017-08-31 -0.023910 
1 2017-08-30 0.071460 
1 2017-08-29 -0.047550 
2 2017-08-31 -0.022234 
2 2017-08-30 0.043766 
2 2017-08-29 -0.021534 
3 2017-08-31 -0.000767 
3 2017-08-30 -0.001167 
3 2017-08-29 0.001933 

次のステップでは、各項目に対応する各DiffRetAvgの結果を掛けることになります。それは私が立ち往生している場所です。私が2,3のアイテムしか持っていなかった場合は、単純な算術を使うことができましたが、私はある日から何回まで持っているのか分かりません。

ピボットの相関表は次のようになります。

ID 1 2 3 
1 1 ? ? 
2 ? 1 ? 
3 ? ? 1 

(自身との項目の相関関係は常に1になります)

をここで私が持っているコードは、これまで

DECLARE @MeanTable AS TABLE (Id INT, AvgRet NUMERIC(10,6)) 
DECLARE @SquareTable AS TABLE (Id INT, SqRoot NUMERIC(10,6)) 
DECLARE @RetTable AS TABLE (Id INT,RetDate DATE, Ret NUMERIC(10,6)) 
DECLARE @DiffRetAvgTable AS TABLE (Id INT, RetDate DATE, DiffRetAVG NUMERIC(10,6)) 
DECLARE @PairMultiTable AS TABLE (ID1 int, ID2 int,Exp2 NUMERIC(10,6)) 
DECLARE @PairMultiTable2 AS TABLE (ID1 int, ID2 int,SumPairMulti NUMERIC(10,6)) 

INSERT INTO @RetTable VALUES 
(1,'2017-08-31' ,-0.06272), 
(1,'2017-08-30' , 0.03265), 
(1,'2017-08-29' , -0.08636), 
(2,'2017-08-31' , -0.0331), 
(2,'2017-08-30' , 0.0329), 
(2,'2017-08-29' , -0.0324), 
(3,'2017-08-31' , -0.0179), 
(3,'2017-08-30' , -0.0183), 
(3,'2017-08-29' , -0.0152) 

--Step 1 : Get all the average by Items 
INSERT INTO @MeanTable SELECT ID, avg(Ret) AS avgRet FROM @RetTable GROUP BY ID 

--Step 2 : Substract the average from every occurence in each Item and call it DiffRetAvg 
INSERT INTO @DiffRetAvgTable SELECT a.ID,RetDate, ret-AvgRet AS DiffRetAvg FROM @RetTable a INNER JOIN @MeanTable b on a.Id = b.Id 

-- Step 3 Multiply each DiffRetAvg for each occurence by its correspondant for another occurence and call it SumPairMulti 
INSERT INTO @PairMultiTable2 
SELECT ID1 , ID2, sum(Multi) AS SumPairMulti FROM (
SELECT a.ID AS ID1, b.ID AS ID2, cast(a.ID AS NVARCHAR(3)) + '_' + cast(b.id AS NVARCHAR(3)) AS Pair_ID, a.Retdate , a.diffRetAvg*b.DiffRetAVG AS Multi FROM @DiffRetAvgTable a 
INNER JOIN @DiffRetAvgTable b on a.RetDate =b.RetDate) c 
GROUP BY ID1, ID2 

-- Step 4 Calculate the square of each DiffRetAvg for each occurence 
INSERT INTO @PairMultiTable 
SELECT ID1 , ID2, sum(Multi) AS Exp2 FROM (
SELECT a.ID AS ID1, b.ID AS ID2, a.Retdate , a.diffRetAvg*b.DiffRetAVG AS Multi FROM @DiffRetAvgTable a 
INNER JOIN @DiffRetAvgTable b ON a.RetDate =b.RetDate) c 
WHERE ID1=ID2 
GROUP BY ID1, ID2 

--Step 5 Calculate the correlation between each Item with this formula : SumPairMulti/sqrt(ID1_Exp2*ID2_EXP2) 

SELECT c.ID1, c.ID2, d.SumPairMulti/SQRTProd AS Correlation FROM (
SELECT a.ID1, b.ID2, sqrt(a.Exp2*b.Exp2) AS SQRTProd FROM @PairMultiTable a 
CROSS JOIN @PairMultiTable b) c 
INNER JOIN @PairMultiTable2 d ON d.ID1 = c.ID1 and d.id2 = c.id2 
です

これは私にこのテーブルを与える:

ID1 ID2 Correlation 
1 1 1 
2 1 0.980387747360583 
3 1 -0.71935145479392 
1 2 0.980387747360583 
2 2 1 
3 2 -0.578754772354275 
1 3 -0.71935145479392 
2 3 -0.578754772354275 
3 3 1` 

今、私はこのような出力がしたい:

ID 1  2  3 
1 1 0.98 -0.72 
2 0.98 1 -0.58 
3 -0.72 -0.58 1 
+0

'私は信じて次のステップは、あなたが次のステップが何であるかを知らない意味..'だろうがまたはあなたはどのようにステップを実行するのか分からない? –

+0

あなたが望むものが「PIVOT」であれば、[**ここ**]をチェックしてください(https://technet.microsoft.com/es-es/library/ms177410(v = sql.105).aspx) –

+0

あなたのピボット行はアイテムIDで、列は日ですか? –

答えて

0
DECLARE @MeanTable AS TABLE (Id INT, AvgRet NUMERIC(10,6)) 
DECLARE @SquareTable AS TABLE (Id INT, SqRoot NUMERIC(10,6)) 
DECLARE @RetTable AS TABLE (Id INT,RetDate DATE, Ret NUMERIC(10,6)) 
DECLARE @DiffRetAvgTable AS TABLE (Id INT, RetDate DATE, DiffRetAVG NUMERIC(10,6)) 
DECLARE @PairMultiTable AS TABLE (ID1 int, ID2 int,Exp2 NUMERIC(10,6)) 
DECLARE @PairMultiTable2 AS TABLE (ID1 int, ID2 int,SumPairMulti NUMERIC(10,6)) 
DECLARE @CorrTable AS TABLE (ID1 INT, ID2 INT, Correlation NUMERIC(10,6)) 

INSERT INTO @RetTable VALUES 
(1,'2017-08-31' ,-0.06272), 
(1,'2017-08-30' , 0.03265), 
(1,'2017-08-29' , -0.08636), 
(2,'2017-08-31' , -0.0331), 
(2,'2017-08-30' , 0.0329), 
(2,'2017-08-29' , -0.0324), 
(3,'2017-08-31' , -0.0179), 
(3,'2017-08-30' , -0.0183), 
(3,'2017-08-29' , -0.0152) 

--Step 1 : Get all the average by Items 
INSERT INTO @MeanTable SELECT ID, avg(Ret) AS avgRet FROM @RetTable GROUP BY ID 

--Step 2 : Substract the average from every occurence in each Item and call it DiffRetAvg 
INSERT INTO @DiffRetAvgTable SELECT a.ID,RetDate, ret-AvgRet AS DiffRetAvg FROM @RetTable a INNER JOIN @MeanTable b on a.Id = b.Id 

-- Step 3 Multiply each DiffRetAvg for each occurence by its correspondant for another occurence and call it SumPairMulti 
INSERT INTO @PairMultiTable2 
SELECT ID1 , ID2, sum(Multi) AS SumPairMulti FROM (
SELECT a.ID AS ID1, b.ID AS ID2, cast(a.ID AS NVARCHAR(3)) + '_' + cast(b.id AS NVARCHAR(3)) AS Pair_ID, a.Retdate , a.diffRetAvg*b.DiffRetAVG AS Multi FROM @DiffRetAvgTable a 
INNER JOIN @DiffRetAvgTable b on a.RetDate =b.RetDate) c 
GROUP BY ID1, ID2 

-- Step 4 Calculate the square of each DiffRetAvg for each occurence 
INSERT INTO @PairMultiTable 
SELECT ID1 , ID2, sum(Multi) AS Exp2 FROM (
SELECT a.ID AS ID1, b.ID AS ID2, a.Retdate , a.diffRetAvg*b.DiffRetAVG AS Multi FROM @DiffRetAvgTable a 
INNER JOIN @DiffRetAvgTable b ON a.RetDate =b.RetDate) c 
WHERE ID1=ID2 
GROUP BY ID1, ID2 

--Step 5 Calculate the correlation between each Item with this formula : SumPairMulti/sqrt(ID1_Exp2*ID2_EXP2) 
DELETE FROM CorrTable 
INSERT INTO CorrTable 
SELECT c.ID1, c.ID2, d.SumPairMulti/SQRTProd AS Correlation FROM (
SELECT a.ID1, b.ID2, sqrt(a.Exp2*b.Exp2) AS SQRTProd FROM @PairMultiTable a 
CROSS JOIN @PairMultiTable b) c 
INNER JOIN @PairMultiTable2 d ON d.ID1 = c.ID1 and d.id2 = c.id2 

--Step 6 Create Pivot Table using : 

SQL Server 2005 Pivot on Unknown Number of Columns

DECLARE @cols AS NVARCHAR(MAX), 
@query AS NVARCHAR(MAX) 
select @cols = STUFF((SELECT distinct ',' + QUOTENAME(ID2) 
      from CorrTable 
    FOR XML PATH(''), TYPE 
    ).value('.', 'NVARCHAR(MAX)') 
,1,1,'') 
set @query = 'SELECT ID1, ' + @cols + ' from 
     (
     select ID1, ID2, Correlation 
     from CorrTable 
    ) x 
    pivot 
    (
     min(Correlation) 
     for ID2 in (' + @cols + ') 
    ) p ' 
execute(@query) 
関連する問題