セルフ・ジョインの代わりにHAVING
を使用します。結合を必要とせず、1回のテーブルスキャンしか必要としないので、はるかに効率的です。また、複数の条件がある場合は、追加の結合ではなく、HAVING
句に追加の式を必要とするだけです。
2番目の例:
SELECT ListingID
FROM [YourTable]
GROUP BY ListingID
HAVING COUNT(CASE WHEN ExtrafieldId = 1 AND Value = 1 THEN 1 END) > 0
AND COUNT(CASE WHEN ExtrafieldId = 2 AND Value = 7 THEN 1 END) > 0
補遺
上記単純に間違っています。私はそれが少し目が簡単だと思うが、以下ははるかに効率的です。
SELECT t1.ListingID
FROM Listing AS t1
INNER JOIN Listing AS t2
ON t2.ListingID = t1.ListingID
INNER JOIN Listing AS t3
ON t3.ListingID = t1.ListingID
INNER JOIN Listing AS t4
ON t4.ListingID = t1.ListingID
WHERE (t1.ExtraFieldID = 1 AND t1.Value = 1)
AND (t2.ExtraFieldID = 2 AND t2.Value = 7)
AND (t3.ExtraFieldID = 3 AND t3.Value = '')
AND (t4.ExtraFieldID = 4 AND t4.Value = 1999)
は、これを証明するために、私はそれをテストするために、次のコードを実行しました:
DECLARE @Iterations INT, @Listings INT
/*******************************************************************************************************
SET THE PARAMETERS FOR THE TEST HERE, @Listings IS THE NUMBER OF ListingIDs TO INSERT INTO THE SAMPLE
TABLE. EACH LISTING GETS 4 RECORDS SO 10,000 LISTINGS WILL GENERATE A SAMPLE OF 40,000 RECORDS ETC.
@Iterations IS THE NUMBER OF SELECTS TO PERFORM TO TEST THE PERFORMANCE OF EACH METHOD.
*******************************************************************************************************/
SET @Iterations = 500
SET @Listings = 1000000
/*******************************************************************************************************/
/*******************************************************************************************************/
IF EXISTS (SELECT * FROM TempDB.INFORMATION_SCHEMA.TABLES WHERE Table_Name LIKE '#Listing%')
BEGIN
DROP TABLE #Listing
END
CREATE TABLE #Listing (ListingID INT NOT NULL, ExtraFieldID TINYINT NOT NULL, Value VARCHAR(4), PRIMARY KEY (ListingID, ExtraFieldID))
IF EXISTS (SELECT * FROM TempDB.INFORMATION_SCHEMA.TABLES WHERE Table_Name LIKE '#Results%')
BEGIN
DROP TABLE #Results
END
CREATE TABLE #Results (GroupBy INT, SelfJoin INT)
DECLARE @i INT, @Time DATETIME, @Time2 DATETIME, @t INT
SET @i = ISNULL((SELECT MAX(ListingID) + 1 FROM #Listing), 0)
-- FILL LISTING TABLE WITH RANDOM VALUES
WHILE @i < @Listings
BEGIN
INSERT #Listing VALUES (@i, 1, ROUND(RAND() * 4, 0))
INSERT #Listing VALUES (@i, 2, ROUND(RAND() * 20, 0))
INSERT #Listing VALUES (@i, 3, CASE WHEN ROUND(RAND(), 0) = 0 THEN '' ELSE CONVERT(VARCHAR(4), ROUND(RAND(), 3) * 1000) END)
INSERT #Listing VALUES (@i, 4, DATEPART(YEAR, DATEADD(YEAR, (RAND()-1) * 100, GETDATE())))
SET @i = @i + 1
END
CREATE NONCLUSTERED INDEX #IX_Listing_Value ON #Listing (Value) WITH FILLFACTOR = 100
SET @i = 0
-- PERFORM BOTH METHODS X NUMBER OF TIMES TO GET AN AVERAGE EXECUTION TIME
WHILE @i < @Iterations
BEGIN
SET @Time = GETDATE()
SELECT @t = COUNT(*)
FROM ( SELECT ListingID
FROM #Listing
GROUP BY ListingID
HAVING COUNT(CASE WHEN ExtrafieldId = 1 AND Value = 1 THEN 1 END) > 0
AND COUNT(CASE WHEN ExtrafieldId = 2 AND Value = 7 THEN 1 END) > 0
AND COUNT(CASE WHEN ExtrafieldId = 3 AND Value = '' THEN 1 END) > 0
AND COUNT(CASE WHEN ExtrafieldId = 4 AND Value = 1999 THEN 1 END) > 0
) D
SET @Time2 = GETDATE()
SELECT @t = COUNT(*)
FROM ( SELECT t1.ListingID
FROM #Listing AS t1
JOIN #Listing AS t2
ON t2.ListingID = t1.ListingID
JOIN #Listing AS t3
ON t3.ListingID = t1.ListingID
JOIN #Listing AS t4
ON t4.ListingID = t1.ListingID
WHERE (t1.ExtraFieldID = 1 AND t1.Value = 1)
AND (t2.ExtraFieldID = 2 AND t2.Value = 7)
AND (t3.ExtraFieldID = 3 AND t3.Value = '')
AND (t4.ExtraFieldID = 4 AND t4.Value = 1999)
) D
INSERT INTO #Results
SELECT DATEDIFF(MICROSECOND, @Time, @Time2) [GroupBy],
DATEDIFF(MICROSECOND, @Time2, GETDATE()) [SelfJoin]
SET @i = @i + 1
END
IF NOT EXISTS (SELECT 1 FROM TempDB.INFORMATION_SCHEMA.TABLES WHERE Table_Name LIKE '#OverallResults%')
BEGIN
CREATE TABLE #OverallResults (GroupBy INT NOT NULL, SelfJoin INT NOT NULL, Iterations INT NOT NULL, Listings INT NOT NULL)
END
INSERT INTO #OverallResults
SELECT AVG(GroupBy) [Group By],
AVG(SelfJoin) [Self Join],
COUNT(*) [Iterations],
@Listings
FROM #Results
SELECT AVG(GroupBy) [Group By],
AVG(SelfJoin) [Self Join],
COUNT(*) [Iterations],
CONVERT(DECIMAL(5, 4), (AVG(GroupBy) - AVG(SelfJoin))/1000000.0) [Difference (Seconds)],
CONVERT(DECIMAL(4, 2), 100 * (1 - (1.0 * AVG(SelfJoin)/AVG(GroupBy)))) [Percent Faster]
FROM #Results
DROP TABLE #Listing
DROP TABLE #results
SELECT Records,
Iterations,
GroupBy [Group By],
SelfJoin [Self Join],
CONVERT(DECIMAL(5, 4), (GroupBy - SelfJoin)/1000000.0) [Difference (Seconds)],
CONVERT(DECIMAL(4, 2), 100 * (1 - (1.0 * SelfJoin/GroupBy))) [Percent Faster]
FROM ( SELECT Listings * 4 [Records],
SUM(Iterations) [Iterations],
SUM(GroupBy * Iterations)/SUM(Iterations) [GroupBy],
SUM(SelfJoin * Iterations)/SUM(Iterations) [SelfJoin]
FROM #OverallResults
GROUP BY Listings
) a
これは、異なる変数で何度も実行することができます。私は100、1000、10000、100000、1000000のリスティングでそれぞれ500件のSELECT文を実行し、平均実行時間を求めたところ、95%速くなった時点で自己参入が平均で約60%速かったことがわかりました。セルフ・ジョイン方法は、明らかにパフォーマンスの勝者です。
こんにちは、ありがとう、あなたは3つまたは4つのテーブルの同じクエリを与えてください。私は初心者であり、3番目のテーブルに参加するためのクエリを書き込めませんでした。最大私はこのように参加するために4または5のテーブルを持っています。 –
+1私の答えにコメントされているように、これは私が投稿したものよりも効率的な解決策です。 – GarethD