パフォーマンスに関するPostgreSQL 9.6の助けが必要です。インデックスを使用したグループ化と内部結合のためのPostgreSQLでのクエリの最適化
CREATE TABLE invoice
(
id bigserial primary key,
some_field character varying(200)
);
CREATE TABLE invoice_item
(
id serial primary key,
invoice_id bigint,
article_number character varying(50),
quantity numeric(19,2) NOT NULL,
CONSTRAINT invoice_item_fk FOREIGN KEY (invoice_id)
REFERENCES invoice (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
);
CREATE INDEX invoice_some_field_idx ON invoice (some_field);
CREATE INDEX invoice_item_article_number_idx ON invoice_item (article_number);
を次のように Iが持っているテーブルの非常に単純な例は、私は、請求書テーブルとinvoice_itemにおける150万の周り500 000行を使用しています。次のクエリを実行
がarticle_number上のインデックスを持つ
SELECT ii.article_number,
SUM(ii.quantity)
FROM invoice i INNER JOIN
invoice_item ii
ON i.id = ii.invoice_id
GROUP BY ii.article_number;
本当に速いです、クエリは〜55 msに〜7秒です。
これで、親テーブルの列に対してgroup byを使用するときに問題が発生しました。
SELECT i.some_field, SUM(ii.quantity)
FROM invoice i INNER JOIN
invoice_item ii
ON i.id = ii.invoice_id
GROUP BY i.some_field;
some_fieldにインデックスがあるかどうかにかかわらず、このクエリは同じ時間(約5秒)かかります。
私はここで非常に明白な何かが欠けているように感じます。
--- EDIT ----
私はこのクエリ計画に非常に新しいですし、上記の表でより多くのテストを行うときはもちろん、私は非常に異なる結果を実際のコードを比較して得ました。ここで
は、実際のテーブル定義を説明してクエリ1
CREATE TABLE receipt2
(
id serial NOT NULL,
version bigint NOT NULL,
store_number integer NOT NULL,
address1 character varying(200),
date_created timestamp without time zone NOT NULL,
round_off numeric(19,2) NOT NULL,
date_created_by_cash_register timestamp without time zone NOT NULL,
address2 character varying(200),
receipt_number integer NOT NULL,
application_version character varying(50),
control_box_serial_number_original character varying(200),
last_updated timestamp without time zone NOT NULL,
cash_register_user_id uuid NOT NULL,
control_code_copy character varying(200),
cash_register_number integer NOT NULL,
control_code_original character varying(200),
zip_code character varying(50),
receipt_footer character varying(20000),
phone_number character varying(50),
control_box_serial_number_copy character varying(200),
corporate_identity character varying(50) NOT NULL,
city character varying(200),
money_back numeric(19,2) NOT NULL,
number_of_copies_printed integer NOT NULL,
cash_register_user_username character varying(50) NOT NULL,
company_name character varying(200) NOT NULL,
email character varying(200),
website character varying(200),
CONSTRAINT receipt2_pkey PRIMARY KEY (id),
CONSTRAINT uk9f6f61365739562846c491f21efb UNIQUE (corporate_identity, store_number, cash_register_number, receipt_number)
)
WITH (
OIDS=FALSE
);
CREATE INDEX receipt2_cash_register_user_id_idx
ON receipt2 USING btree (cash_register_user_id);
CREATE INDEX receipt2_date_created_by_cash_register_idx
ON receipt2 USING btree (date_created_by_cash_register);
CREATE INDEX receipt2_store_number_idx
ON receipt2 USING btree (store_number);
CREATE INDEX receipt2corpidx
ON receipt2 USING btree (corporate_identity COLLATE pg_catalog."default");
CREATE INDEX receipt2corpstoreidx
ON receipt2 USING btree (store_number, corporate_identity COLLATE pg_catalog."default");
CREATE TABLE receipt_item2
(
id serial NOT NULL,
version bigint NOT NULL,
cost_excluding_vat numeric(19,2) NOT NULL,
account_number integer,
receipt_item_type character varying(255) NOT NULL,
article_group_id uuid,
supplier_number integer,
purchase_price_excluding_vat numeric(19,2) NOT NULL,
receipt_id bigint NOT NULL,
text character varying(20000),
promotion_id uuid,
price_including_vat numeric(19,2) NOT NULL,
discount_type character varying(255) NOT NULL,
profit_excluding_vat numeric(19,2) NOT NULL,
price_excluding_vat numeric(19,2) NOT NULL,
discount_amount_including_vat numeric(19,2) NOT NULL,
article_type character varying(255),
article_number character varying(50),
cost_including_vat numeric(19,2) NOT NULL,
purchase_cost_excluding_vat numeric(19,2) NOT NULL,
hidden boolean NOT NULL,
row_index integer NOT NULL,
quantity numeric(19,2) NOT NULL,
discount numeric(19,2) NOT NULL,
discount_amount_excluding_vat numeric(19,2) NOT NULL,
description character varying(200),
vat numeric(19,2) NOT NULL,
CONSTRAINT receipt_item2_pkey PRIMARY KEY (id),
CONSTRAINT fksohgmt8ntavcgj10ha2duc8lb FOREIGN KEY (receipt_id)
REFERENCES receipt2 (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
)
WITH (
OIDS=FALSE
);
CREATE INDEX receipt_item2_article_number_idx
ON receipt_item2 USING btree (article_number COLLATE pg_catalog."default");
です。これは非常に高速です。約55ms。
SELECT
article_number,
sum(quantity) AS "quantity",
sum(cost_excluding_vat) AS "costExcludingVat",
sum(cost_including_vat) AS "costIncludingVat",
sum(purchase_cost_excluding_vat) AS "purchaseCostExcludingVat",
sum(profit_excluding_vat) AS "profitExcludingVat"
FROM receipt2 receipt INNER JOIN receipt_item2 receipt_item ON receipt.id = receipt_item.receipt_id
WHERE
date_created_by_cash_register BETWEEN '2017-01-01' AND '2017-12-31'
AND receipt_item_type = 'ARTICLE'
GROUP BY article_number
LIMIT 100;
"Limit (cost=0.85..4821.60 rows=100 width=167)"
" -> GroupAggregate (cost=0.85..948001.24 rows=19665 width=167)"
" Group Key: receipt_item.article_number"
" -> Nested Loop (cost=0.85..925058.77 rows=1500000 width=35)"
" -> Index Scan using receipt_item2_article_number_idx on receipt_item2 receipt_item (cost=0.43..196242.77 rows=1500000 width=43)"
" Filter: ((receipt_item_type)::text = 'ARTICLE'::text)"
" -> Index Scan using receipt2_pkey on receipt2 receipt (cost=0.42..0.48 rows=1 width=4)"
" Index Cond: (id = receipt_item.receipt_id)"
" Filter: ((date_created_by_cash_register >= '2017-01-01 00:00:00'::timestamp without time zone) AND (date_created_by_cash_register <= '2017-12-31 00:00:00'::timestamp without time zone))"
説明2を伴う説明。このクエリは、cash_register_user_idまたはnorにインデックスがあるかどうかに関係なく、2.3秒かかります。
SELECT
cash_register_user_id AS "userId",
sum(quantity) AS "quantity",
sum(cost_excluding_vat) AS "costExcludingVat",
sum(cost_including_vat) AS "costIncludingVat",
sum(purchase_cost_excluding_vat) AS "purchaseCostExcludingVat",
sum(profit_excluding_vat) AS "profitExcludingVat"
FROM receipt2 receipt INNER JOIN receipt_item2 receipt_item ON receipt.id = receipt_id
WHERE
date_created_by_cash_register BETWEEN '2017-01-01' AND '2017-12-31'
AND receipt_item_type = 'ARTICLE'
AND receipt.store_number = 1
GROUP BY cash_register_user_id
LIMIT 100;
"Limit (cost=154761.00..154761.45 rows=20 width=176)"
" -> HashAggregate (cost=154761.00..154761.45 rows=20 width=176)"
" Group Key: receipt.cash_register_user_id"
" -> Hash Join (cost=28135.00..132261.00 rows=1500000 width=44)"
" Hash Cond: (receipt_item.receipt_id = receipt.id)"
" -> Seq Scan on receipt_item2 receipt_item (cost=0.00..57133.00 rows=1500000 width=36)"
" Filter: ((receipt_item_type)::text = 'ARTICLE'::text)"
" -> Hash (cost=18955.00..18955.00 rows=500000 width=20)"
" -> Seq Scan on receipt2 receipt (cost=0.00..18955.00 rows=500000 width=20)"
" Filter: ((date_created_by_cash_register >= '2017-01-01 00:00:00'::timestamp without time zone) AND (date_created_by_cash_register <= '2017-12-31 00:00:00'::timestamp without time zone) AND (store_number = 1))"
この質問には少しの話題がありますが、次の問題はそれを並べ替えることです。聖杯は、集計された価値の量やコストなどを並べ替えることができます。
明白なことは、何が起こっているのかを見るために、両方のクエリの 'EXPLAIN(ANALYZE、BUFFERS)'出力です。 –
そして 'article_number'にいくつのdistict値がありますか? – wildplasser
@wildplasser、私は20,000種類のものを使用しましたが、生産には最大100,000まであります。 –