英文:
How to speed up a SQL query which is using DISTINCT
问题
根据我阅读的内容,似乎最佳解决方案是创建一个索引,但我不确定应该为哪些列创建索引。这是我第一次使用SQL索引。
如果我从这个查询中删除DISTINCT调用,我在1秒内就能获得超过1000个结果。然而,加上DISTINCT调用后,查询会在10秒内返回结果(显然是没有重复项的)。
如果有人有其他的解决方案,我会倾听。
这是查询(第二个SELECT是调用DISTINCT函数的地方):
SELECT
Sku,
Name,
ccp.Polygon,
MarketAvailability,
Coverage,
Range
FROM
(SELECT DISTINCT
dbo.CatalogEntry.CatalogEntryId as Id,
dbo.CatalogEntry.Code as Sku,
CoverageNode.Name as Coverage,
RangeNode.Name as [Range],
(SELECT CatalogContentProperty.LongString
FROM CatalogContentProperty
WHERE MetaFieldName = 'ItemChartName'
AND (CatalogContentProperty.LongString IS NOT NULL)
AND (CatalogContentProperty.ObjectId = dbo.CatalogEntry.CatalogEntryId)) AS [Name],
(SELECT CatalogContentProperty.LongString
FROM CatalogContentProperty
WHERE MetaFieldName = 'MarketAvailabilityDetailsCollection'
AND (CatalogContentProperty.LongString IS NOT NULL)
AND (CatalogContentProperty.ObjectId = dbo.CatalogEntry.CatalogEntryId)) AS MarketAvailability
FROM
dbo.CatalogEntry
INNER JOIN
dbo.NodeEntryRelation ON dbo.CatalogEntry.CatalogEntryId = dbo.NodeEntryRelation.CatalogEntryId
INNER JOIN
dbo.CatalogNode AS CoverageNode ON dbo.NodeEntryRelation.CatalogNodeId = CoverageNode.CatalogNodeId
INNER JOIN
dbo.CatalogNode AS RangeNode ON CoverageNode.ParentNodeId = RangeNode.CatalogNodeId
INNER JOIN
dbo.CatalogContentProperty ON dbo.CatalogEntry.CatalogEntryId = dbo.CatalogContentProperty.ObjectId
INNER JOIN
dbo.CatalogNode AS ModelNode ON RangeNode.ParentNodeId = ModelNode.CatalogNodeId
INNER JOIN
dbo.CatalogNode AS BrandNode ON ModelNode.ParentNodeId = BrandNode.CatalogNodeId
WHERE
(dbo.CatalogEntry.ClassTypeId = N'Variation') AND
(dbo.CatalogContentProperty.MetaFieldName = N'ItemIsChart') AND
RangeNode.Name != 'C-MAP' AND
(BrandNode.Name = '' OR '' = '' OR '' IS NULL) AND
(ModelNode.Name = '' OR '' = '' OR '' IS NULL) AND
(CoverageNode.Name = '' OR '' = '' OR '' IS NULL) AND
(RangeNode.Name = '' OR '' = '' OR '' IS NULL)
) AS CmapResults
INNER JOIN
(SELECT
GEOMETRY::STGeomFromText(CatalogContentProperty.LongString,4326) AS PolygonGeometry,
CatalogContentProperty.LongString AS Polygon,
CatalogContentProperty.ObjectId
FROM
CatalogContentProperty
WHERE
MetaFieldName = 'ItemChartCoordinates' AND
(CatalogContentProperty.LongString IS NOT NULL)) ccp ON ccp.ObjectId = CmapResults.Id
WHERE
((GEOGRAPHY::STGeomFromText(PolygonGeometry.MakeValid().STUnion(PolygonGeometry.MakeValid().STStartPoint()).STAsText(), 4326).STDistance(GEOGRAPHY::STGeomFromText('POINT(50.9835929 -1.4205852)', 4326)) / 1609.344) <= 100 OR 'POINT(50.9835929 -1.4205852)' IS NULL)
AND MarketAvailability IS NOT NULL
ORDER BY
GEOGRAPHY::STGeomFromText(PolygonGeometry.MakeValid().STUnion(PolygonGeometry.MakeValid().STStartPoint()).STAsText(), 4326).STArea() DESC;
我正在使用SQL Server Management Studio 2012。目标是让带有DISTINCT调用的查询在与不带DISTINCT调用的查询相同的时间内返回结果。
英文:
From what I have read about this it seems like the best solution is to create an INDEX but I am not sure which columns I should be creating indexes for. This is my first time working with SQL indexes.
If I remove the DISTINCT call from this query I get over 1000 results in just over a second. However with the DISTINCT call it returns the results in 10 seconds (obviously without the duplicates).
If anyone has any alternative solutions I am all ears.
This is the query (the second SELECT
is where the DISTINCT
function is called):
SELECT
Sku,
Name,
ccp.Polygon,
MarketAvailability,
Coverage,
Range
FROM
(SELECT DISTINCT
dbo.CatalogEntry.CatalogEntryId as Id,
dbo.CatalogEntry.Code as Sku,
CoverageNode.Name as Coverage,
RangeNode.Name as [Range],
(SELECT CatalogContentProperty.LongString
FROM CatalogContentProperty
WHERE MetaFieldName = 'ItemChartName'
AND (CatalogContentProperty.LongString IS NOT NULL)
AND (CatalogContentProperty.ObjectId = dbo.CatalogEntry.CatalogEntryId)) AS [Name],
(SELECT CatalogContentProperty.LongString
FROM CatalogContentProperty
WHERE MetaFieldName = 'MarketAvailabilityDetailsCollection'
AND (CatalogContentProperty.LongString IS NOT NULL)
AND (CatalogContentProperty.ObjectId = dbo.CatalogEntry.CatalogEntryId)) AS MarketAvailability
FROM
dbo.CatalogEntry
INNER JOIN
dbo.NodeEntryRelation ON dbo.CatalogEntry.CatalogEntryId = dbo.NodeEntryRelation.CatalogEntryId
INNER JOIN
dbo.CatalogNode AS CoverageNode ON dbo.NodeEntryRelation.CatalogNodeId = CoverageNode.CatalogNodeId
INNER JOIN
dbo.CatalogNode AS RangeNode ON CoverageNode.ParentNodeId = RangeNode.CatalogNodeId
INNER JOIN
dbo.CatalogContentProperty ON dbo.CatalogEntry.CatalogEntryId = dbo.CatalogContentProperty.ObjectId
INNER JOIN
dbo.CatalogNode AS ModelNode ON RangeNode.ParentNodeId = ModelNode.CatalogNodeId
INNER JOIN
dbo.CatalogNode AS BrandNode ON ModelNode.ParentNodeId = BrandNode.CatalogNodeId
WHERE
(dbo.CatalogEntry.ClassTypeId = N'Variation') AND
(dbo.CatalogContentProperty.MetaFieldName = N'ItemIsChart') AND
RangeNode.Name != 'C-MAP' AND
(BrandNode.Name = '' OR '' = '' OR '' IS NULL) AND
(ModelNode.Name = '' OR '' = '' OR '' IS NULL) AND
(CoverageNode.Name = '' OR '' = '' OR '' IS NULL) AND
(RangeNode.Name = '' OR '' = '' OR '' IS NULL)
) AS CmapResults
INNER JOIN
(SELECT
GEOMETRY::STGeomFromText(CatalogContentProperty.LongString,4326) AS PolygonGeometry,
CatalogContentProperty.LongString AS Polygon,
CatalogContentProperty.ObjectId
FROM
CatalogContentProperty
WHERE
MetaFieldName = 'ItemChartCoordinates' AND
(CatalogContentProperty.LongString IS NOT NULL)) ccp ON ccp.ObjectId = CmapResults.Id
WHERE
((GEOGRAPHY::STGeomFromText(PolygonGeometry.MakeValid().STUnion(PolygonGeometry.MakeValid().STStartPoint()).STAsText(), 4326).STDistance(GEOGRAPHY::STGeomFromText('POINT(50.9835929 -1.4205852)', 4326)) / 1609.344) <= 100 OR 'POINT(50.9835929 -1.4205852)' IS NULL)
AND MarketAvailability IS NOT NULL
ORDER BY
GEOGRAPHY::STGeomFromText(PolygonGeometry.MakeValid().STUnion(PolygonGeometry.MakeValid().STStartPoint()).STAsText(), 4326).STArea() DESC;
I am using SQL Server Management Studio 2012. The aim is to get the query with the DISTINCT call to return in the same amount of time as the query would without the DISTINCT call.
答案1
得分: 0
您的查询看起来有点复杂。这个查询会产生相同的输出吗?
SELECT DISTINCT ce.Code as Sku
, max(case when ccp.MetaFieldName = 'ItemChartName' then ccp.LongString end) as 'Name'
, max(case when ccp.MetaFieldName = 'MarketAvailabilityDetailsCollection' then ccp.LongString end) as 'MarketAvailability'
, max(case when ccp.MetaFieldName = 'ItemChartCoordinates' then ccp.LongString end) as 'Polygon'
, CoverageNode.Name as Coverage
, RangeNode.Name as 'Range'
FROM dbo.CatalogEntry ce
INNER JOIN dbo.NodeEntryRelation ner ON ce.CatalogEntryId = ner.CatalogEntryId
INNER JOIN dbo.CatalogNode CoverageNode ON dbo.NodeEntryRelation.CatalogNodeId = CoverageNode.CatalogNodeId
INNER JOIN dbo.CatalogNode RangeNode ON CoverageNode.ParentNodeId = RangeNode.CatalogNodeId
INNER JOIN dbo.CatalogContentProperty ccp ON ce.CatalogEntryId = ccp.ObjectId
INNER JOIN dbo.CatalogNode ModelNode ON RangeNode.ParentNodeId = ModelNode.CatalogNodeId
INNER JOIN dbo.CatalogNode BrandNode ON ModelNode.ParentNodeId = BrandNode.CatalogNodeId
WHERE ce.ClassTypeId = N'Variation'
and ccp.MetaFieldName = N'ItemIsChart'
and RangeNode.Name != 'C-MAP'
--and (BrandNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
--and (ModelNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
--and (CoverageNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
--and (RangeNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
and (GEOGRAPHY::STGeomFromText(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STUnion(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STStartPoint()).STAsText(), 4326).STDistance(GEOGRAPHY::STGeomFromText('POINT(50.9835929 -1.4205852)', 4326)) / 1609.344) <= 100 -- OR 'POINT(50.9835929 -1.4205852)' IS NULL -- redundant
)
and max(case when ccp.MetaFieldName = 'MarketAvailabilityDetailsCollection' then ccp.LongString end)
ORDER BY GEOGRAPHY::STGeomFromText(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STUnion(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STStartPoint()).STAsText(), 4326).STArea() DESC;
英文:
Your query looks a bit complicated. Will this one produce the same output?
SELECT DISTINCT ce.Code as Sku
, max(case when ccp.MetaFieldName = 'ItemChartName' then ccp.LongString end) as 'Name'
, max(case when ccp.MetaFieldName = 'MarketAvailabilityDetailsCollection' then ccp.LongString end) as 'MarketAvailability'
, max(case when ccp.MetaFieldName = 'ItemChartCoordinates' then ccp.LongString end) as 'Polygon'
, CoverageNode.Name as Coverage
, RangeNode.Name as 'Range'
FROM dbo.CatalogEntry ce
INNER JOIN dbo.NodeEntryRelation ner ON ce.CatalogEntryId = ner.CatalogEntryId
INNER JOIN dbo.CatalogNode CoverageNode ON dbo.NodeEntryRelation.CatalogNodeId = CoverageNode.CatalogNodeId
INNER JOIN dbo.CatalogNode RangeNode ON CoverageNode.ParentNodeId = RangeNode.CatalogNodeId
INNER JOIN dbo.CatalogContentProperty ccp ON ce.CatalogEntryId = ccp.ObjectId
INNER JOIN dbo.CatalogNode ModelNode ON RangeNode.ParentNodeId = ModelNode.CatalogNodeId
INNER JOIN dbo.CatalogNode BrandNode ON ModelNode.ParentNodeId = BrandNode.CatalogNodeId
WHERE ce.ClassTypeId = N'Variation'
and ccp.MetaFieldName = N'ItemIsChart'
and RangeNode.Name != 'C-MAP'
--and (BrandNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
--and (ModelNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
--and (CoverageNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
--and (RangeNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
and (GEOGRAPHY::STGeomFromText(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STUnion(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STStartPoint()).STAsText(), 4326).STDistance(GEOGRAPHY::STGeomFromText('POINT(50.9835929 -1.4205852)', 4326)) / 1609.344) <= 100 -- OR 'POINT(50.9835929 -1.4205852)' IS NULL -- redundant
)
and max(case when ccp.MetaFieldName = 'MarketAvailabilityDetailsCollection' then ccp.LongString end)
ORDER BY GEOGRAPHY::STGeomFromText(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STUnion(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STStartPoint()).STAsText(), 4326).STArea() DESC;
答案2
得分: 0
答案是我的情况是去掉 DISTINCT,并使用 LINQ 进行重复项移除!加载时间从大约 10-11 秒缩短到大约 4-5 秒。
英文:
So the answer is my case was to remove the DISTINCT and to do the duplicate removing using LINQ! Load times went from 10-11ish seconds to 4-5ish seconds
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论