RDS SQL Server - 專題分享 - 巧用執行計劃緩存之數據類型隱式轉換
摘要
SQL Server數據庫基表數據類型隱式轉換,會導致Index Scan或者Clustered Index Scan的問題,這篇文章分享如何巧用執行計劃緩存來發現數據類型隱式轉換的查詢語句,從而可以有針對性的優化查詢,解決高CPU使用率的問題。
問題引入
數據類型轉化是導致SQL Server高CPU使用率的又一大殺手,詳情參見之前的雲棲社區文章:RDS SQL Server - 最佳實踐 - 高CPU使用率係列之數據類型轉換。SQL Server對基表數據類型轉換會導致Index Scan或者Clustered Index Scan,進而導致IO使用率的大幅上升,最終導致CPU的使用率大幅升高。這篇文章是從執行計劃緩存緩存的角度來找出導致數據類型轉換的查詢語句,進而做有針對性的查詢語句優化,來破解高CPU使用率的問題。
測試環境
為了更好的展示從執行計劃緩存緩存中找出導致數據類型轉化的查詢語句,我們先建立測試環境。
-- Create testing database
IF DB_ID('TestDb') IS NULL
CREATE DATABASE TestDb;
GO
USE TestDb
GO
-- create demo table SalesOrder
IF OBJECT_ID('dbo.SalesOrder', 'U') IS NOT NULL
BEGIN
TRUNCATE TABLE dbo.SalesOrder
DROP TABLE dbo.SalesOrder
END
GO
CREATE TABLE dbo.SalesOrder
(
RowID INT IDENTITY(1,1) NOT NULL
, OrderID UNIQUEIDENTIFIER NOT NULL
, ItemID INT NOT NULL
, UserID INT NOT NULL
, OrderQty INT NOT NULL
, Price DECIMAL(8,2) NOT NULL
, OrderDate DATETIME NOT NULL
CONSTRAINT DF_OrderDate DEFAULT(GETDATE())
, LastUpdateTime DATETIME NULL
, OrderComment NVARCHAR(100) NULL
, CONSTRAINT PK_SalesOrder PRIMARY KEY(
OrderID
)
);
-- data init for 2 M records.
;WITH a
AS (
SELECT *
FROM (VALUES(1),(2),(3),(4),(5),(6),(7),(8),(9),(10)) AS a(a)
), RoundData
AS(
SELECT TOP(2000000)
OrderID = NEWID()
,ItemIDRound = abs(checksum(newid()))
,Price = a.a * b.a * 10
,OrderQty = a.a + b.a + c.a + d.a + e.a + f.a + g.a + h.a
FROM a, a AS b, a AS c, a AS d, a AS e, a AS f, a AS g, a AS h
), DATA
AS(
SELECT
OrderID
,ItemID = cast(ROUND((1300 * (ItemIDRound*1./cast(replace(ItemIDRound, ItemIDRound, '1' + replicate('0', len(ItemIDRound))) as bigint)) + 101), 0) as int)
,UserID = cast(ROUND((500 * (ItemIDRound*1./cast(replace(ItemIDRound, ItemIDRound, '1' + replicate('0', len(ItemIDRound))) as bigint)) + 10000), 0) as int)
,OrderQty
,Price = cast(Price AS DECIMAL(8,2))
,OrderDate = dateadd(day, -cast(ROUND((50 * (ItemIDRound*1./cast(replace(ItemIDRound, ItemIDRound, '1' + replicate('0', len(ItemIDRound))) as bigint)) + 1), 0) as int) ,GETDATE())
FROM RoundData
)
INSERT INTO dbo.SalesOrder(OrderID, ItemID, UserID, OrderQty, Price, OrderDate, LastUpdateTime, OrderComment)
SELECT
OrderID
, ItemID
, UserID
, OrderQty
, Price
, OrderDate
, LastUpdateTime = OrderDate
, OrderComment = N'User ' + CAST(UserID AS NVARCHAR(8)) + N' purchased item ' + CAST(ItemID AS NVARCHAR(8))
FROM DATA;
GO
--===============query
USE [TestDb]
GO
ALTER TABLE dbo.SalesOrder
ALTER COLUMN UserID CHAR(8) NULL
GO
EXEC sys.sp_help 'dbo.SalesOrder'
USE [TestDb]
GO
SELECT
ItemID, OrderQty, Price
FROM dbo.SalesOrder
WHERE UserID = 10057
SELECT TOP 100
ItemID, OrderQty, Price
FROM dbo.SalesOrder
WHERE UserID = 10058
AND OrderDate >= DATEADD(MONTH, -1, GETDATE())
AND OrderDate <= GETDATE();
USE [TestDb]
GO
SELECT
ItemID, OrderQty, Price
FROM dbo.SalesOrder
WHERE UserID = 10059
SELECT TOP 100
ItemID, OrderQty, Price
FROM dbo.SalesOrder
WHERE UserID = 10061
AND OrderDate >= DATEADD(MONTH, -1, GETDATE())
AND OrderDate <= GETDATE();
查看SalesOrder表結構,我們很清楚的看到UserID數據類型是CHAR(8),而查詢WHERE語句中的WHERE UserID = XXXX中,等號右邊的數據類型為INT,這會導致SQL Server將數據類型優先級低的數據類型CHAR轉化為INT,SQL Server需要將這個表中的200萬條記錄的UserID從CHAR(8)轉化為INT。所以,必須進行Scan操作,從而導致高CPU使用率。
解決方法
從測試環境部分,我們發現的確導致了數據類型的隱式轉換。以下短短100行代碼,可以從執行計劃緩存中找出導致數據類型隱式轉化的查詢語句和執行計劃的詳細信息。
USE testdb
GO
DECLARE
@db_name SYSNAME
;
SET
@db_name = QUOTENAME(DB_NAME())
;
;WITH XMLNAMESPACES (DEFAULT 'https://schemas.microsoft.com/sqlserver/2004/07/showplan')
, planCache
AS(
SELECT
stmt.value('(@StatementText)[1]', 'varchar(max)') AS stmt,
n.t.value('(ScalarOperator/Identifier/ColumnReference/@Database)[1]', 'sysname') AS DatabaseName,
REPLACE(REPLACE(n.t.value('(ScalarOperator/Identifier/ColumnReference/@Schema)[1]', 'sysname'), '[', ''), ']', '') AS SchemaName,
REPLACE(REPLACE(n.t.value('(ScalarOperator/Identifier/ColumnReference/@Table)[1]', 'sysname'), '[', ''), ']', '') AS ObjectName,
REPLACE(REPLACE(n.t.value('(ScalarOperator/Identifier/ColumnReference/@Column)[1]', 'sysname'), '[', ''), ']', '') AS ColumnName,
n.t.value('(@DataType)[1]', 'sysname') AS ConvertTo,
n.t.value('(@Length)[1]', 'int') AS ConvertToLength,
PhysicalOperator.value('(.//Predicate/ScalarOperator/@ScalarString)[1]','nvarchar(max)') AS ScalarString,
query_plan,
cp.plan_handle
FROM sys.dm_exec_cached_plans AS cp
CROSS APPLY sys.dm_exec_query_plan(plan_handle) AS qp
CROSS APPLY query_plan.nodes('/ShowPlanXML/BatchSequence/Batch/Statements/StmtSimple') AS batch(stmt)
CROSS APPLY stmt.nodes('//ScalarOperator/Compare/ScalarOperator/Convert[@Implicit="1"]') AS n(t)
CROSS APPLY batch.stmt.nodes('.//RelOp[@PhysicalOp=''Index Scan'' or @PhysicalOp=''Clustered Index Scan'']') as RelOp(PhysicalOperator)
WHERE n.t.exist('ScalarOperator/Identifier/ColumnReference[@Database=sql:variable("@db_name")][@Schema != ''[sys]'']') = 1
)
SELECT
pc.stmt,
pc.DatabaseName,
pc.SchemaName,
pc.ObjectName,
pc.ColumnName,
ps.UsedPageCount,
ix.name AS IndexName,
CAST(ps.UsedPageCount/ 128. AS decimal(12,2)) AS UsedSizeMB,
ps.TotalRowCount,
qs.execution_count * UsedPageCount AS MostLogicalRead,
cols.DATA_TYPE AS ConvertFrom,
cols.CHARACTER_MAXIMUM_LENGTH AS ConvertFromLength,
pc.ConvertTo,
pc.ConvertToLength,
pc.ScalarString,
pc.query_plan,
qs.creation_time
,qs.last_execution_time
,qs.execution_count
,qs.total_worker_time
,qs.last_worker_time
,qs.min_worker_time
,qs.max_worker_time
,qs.total_physical_reads
,qs.last_physical_reads
,qs.min_physical_reads
,qs.max_physical_reads
,qs.total_logical_writes
,qs.last_logical_writes
,qs.min_logical_writes
,qs.max_logical_writes
,qs.total_logical_reads
,qs.last_logical_reads
,qs.min_logical_reads
,qs.max_logical_reads
,qs.total_clr_time
,qs.last_clr_time
,qs.min_clr_time
,qs.max_clr_time
,qs.total_elapsed_time
,qs.last_elapsed_time
,qs.min_elapsed_time
,qs.max_elapsed_time
,qs.total_rows
,qs.last_rows
,qs.min_rows
,qs.max_rows
FROM planCache AS pc
INNER JOIN INFORMATION_SCHEMA.COLUMNS as cols WITH(NOLOCK)
ON pc.SchemaName = cols.TABLE_SCHEMA
AND pc.ObjectName = cols.TABLE_NAME
AND pc.ColumnName = cols.COLUMN_NAME
INNER JOIN sys.tables as tb WITH(NOLOCK)
ON tb.schema_id = schema_id(pc.SchemaName)
AND tb.name = pc.ObjectName
INNER JOIN sys.indexes as ix WITH(NOLOCK)
ON tb.object_id = ix.object_id
LEFT JOIN (
SELECT
object_id,
index_id,
sum(used_page_count) AS UsedPageCount,
sum(row_count) AS TotalRowCount
FROM sys.dm_db_partition_stats as dps WITH(NOLOCK)
GROUP BY object_id,Index_id
) as ps
ON ix.object_id = ps.object_id and ix.index_id = ps.index_id
left join sys.dm_exec_query_stats qs on pc.plan_handle= qs.plan_handle
--DBCC freeproccache
總結
這篇文章分享了如何從執行計劃緩存中找到導致SQL Server數據類型隱式轉化的查詢語句,為我們針對特定查詢語句的優化提供了基礎,最終破解高CPU使用率的問題。
最後更新:2017-06-20 10:01:56