Connaître les caractéristiques des statistiques de colonne d’une table

Voici un lot de requêtes qui retourne quelques caractéristiques intéressantes des statistiques d’une table, notamment :

– la quantité de données, en MB, qui a été échantillonnée
– la date de dernier échantillonnage
– le nom de la statistique, ainsi que les colonnes sur lesquelles la statistique échantillonne
– le nombre de lignes échantillonnées lors du dernier échantillonnage
– la densité (inverse de la sélectivité)
– le nombre de modifications dans les colonnes de la statistique depuis le dernier échantillonnage
– le SRT (Statistic Recomputation Threshold : seuil de ré-échantillonnage de la statistique)
C’est le nombre de modifications que les colonnes de la statistique doivent subir avant d’être ré-échantillonnées.
– quelques comparaisons par rapport au nombre réel de lignes de la table

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
-------------------------------
-- Nicolas SOUQUET - 12-05-2011
-------------------------------
SET NOCOUNT ON
GO
------------------------------------------------------------------------------------------------------------------------------
DECLARE  @schema_name sysname = 'dbo'
  , @table_or_view_name sysname = 'maTable'
  , @statistic_name sysname = NULL -- NULL lit toutes les statistiques de la table
  , @separator varchar(2) = ', '
------------------------------------------------------------------------------------------------------------------------------
IF NOT EXISTS
(
  SELECT    *
  FROM    sys.schemas AS S
  INNER JOIN  sys.objects AS O ON S.schema_id = O.schema_id
  WHERE    S.name = @schema_name
  AND    O.name = @table_or_view_name
  AND    O.type IN ('U', 'V') -- table or view
)
BEGIN
  RAISERROR('The %s.%s table or indexed view does not exist !', 16, 1, @schema_name, @table_or_view_name)
  RETURN
END
 
IF @statistic_name IS NOT NULL
AND NOT EXISTS
(
  SELECT    *
  FROM    sys.schemas AS S
  INNER JOIN  sys.objects AS O ON S.schema_id = O.schema_id
  INNER JOIN  sys.indexes AS I ON O.object_id = I.object_id
  WHERE    S.name = @schema_name
  AND    O.name = @table_or_view_name
  AND    I.name = @statistic_name
)
BEGIN
  RAISERROR('The %s statistics on the %s.%s table or indexed view does not exist !', 16, 1, @schema_name, @table_or_view_name, @statistic_name)
  RETURN
END
-----------------------------------------
-- Collecting column and index statistics
-----------------------------------------
DECLARE  @stat_header TABLE
(
  stat_name sysname
  , updated varchar(32)
  , row_count bigint
  , rows_sampled bigint
  , steps int
  , density float
  , avg_key_length float
  , string_index char(3)
  , filter_expression varchar(1024)  
  , unfiltered_rows bigint
)
 
DECLARE @stat_name sysname = ' '
  , @object_id int
  , @sql nvarchar(256) = 'DBCC SHOW_STATISTICS(@TABLE@, @STATS@) WITH NO_INFOMSGS, STAT_HEADER'
  , @exec_string nvarchar(512)
 
WHILE DATALENGTH(@stat_name) > 0
BEGIN
  SET @stat_name = NULL
 
  SELECT    TOP(1) @stat_name = S.name
  FROM    sys.stats AS S
  INNER JOIN  sys.objects AS O ON S.object_id = O.object_id
  LEFT JOIN  @stat_header AS SH ON S.name = SH.stat_name
  WHERE    O.name = @table_or_view_name
  AND    SH.stat_name IS NULL
  AND    (@statistic_name IS NULL OR S.name = @statistic_name)
 
  SELECT @exec_string = REPLACE(REPLACE(@sql, '@STATS@', @stat_name), '@TABLE@', @table_or_view_name)
 
  INSERT  @stat_header
  EXEC  (@exec_string)
END
--------------------------------------------------------------------------------------------------------
;WITH
  CTE AS
  (
    SELECT    T.stat_name
        , T.row_count
        , T.rows_sampled
        , SI.rowmodctr
        , T.steps
        , CAST(T.density AS decimal(5,3)) AS density
        , CAST((CAST(T.rows_sampled AS decimal(38, 10)) / T.row_count) * 100 AS decimal(38, 2)) AS stat_sample_rate
        , CAST((CAST(SI.rowmodctr AS decimal(38, 10)) / T.row_count) * 100 AS decimal(38, 2)) AS change_rate
        , CAST(updated AS datetime) AS last_update
        , S.auto_created
        , REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(S.filter_definition, '[', ''), ']', ''), ')', ''), '(', ''), '=', ' = ') AS filter_definition
    FROM    @stat_header AS T
    INNER JOIN  sys.stats AS S
          ON S.object_id = OBJECT_ID(@table_or_view_name)
          AND S.name = T.stat_name
    INNER JOIN  sys.sysindexes AS SI
          ON SI.id = S.object_id
          AND SI.indid = S.stats_id
  )
  , CTE_FINAL AS
  (
    SELECT    PS.in_row_used_page_count / 128 AS data_MB
        , C.stat_name
        , C.auto_created
        , C.filter_definition
        , C.density
        , C.row_count AS stat_row_count
        , PS.row_count AS real_row_count
        , C.rows_sampled
        , C.rowmodctr
        , C.steps
        , CASE
          WHEN PS.row_count < 6 THEN 6
          WHEN PS.row_count BETWEEN 6 AND 500 THEN 500
          ELSE CAST(500 + (0.2 * PS.row_count) AS int) + 1
        END AS stat_recompute_threshold
        , C.stat_sample_rate
        , CAST(CAST(C.rows_sampled AS decimal(38, 10)) / (CASE PS.row_count WHEN 0 THEN 1 ELSE PS.row_count END) * 100 AS decimal(38, 2)) AS real_sample_rate
        , C.change_rate AS stat_change_rate
        , CAST((CAST(C.rowmodctr AS decimal(38, 10)) / (CASE PS.row_count WHEN 0 THEN 1 ELSE PS.row_count END)) * 100 AS decimal(38, 2)) AS real_change_rate
        , 100 - (CAST(C.row_count AS decimal(38, 10)) / (CASE PS.row_count WHEN 0 THEN 1 ELSE PS.row_count END)) * 100 AS row_count_gap
        , C.last_update
    FROM    CTE AS C
    CROSS JOIN  sys.dm_db_partition_stats AS PS
    WHERE    (@table_or_view_name IS NULL OR PS.object_id = OBJECT_ID(@table_or_view_name))
    AND    PS.index_id BETWEEN 0 AND 1
  )
SELECT    @table_or_view_name AS table_or_view_name
    , CF.data_MB
    , CF.last_update
    , CF.density
    , CF.stat_name
    , LEFT(SC.stat_column_list, LEN(SC.stat_column_list) - 1) AS stat_column_list
    , CF.filter_definition
    , CF.stat_row_count
    , CF.real_row_count  
    , CF.rows_sampled
    , CF.steps
    , CF.rowmodctr
    , CF.stat_recompute_threshold AS SRT
    , CF.stat_sample_rate
    , CF.real_sample_rate
    , CF.stat_change_rate
    , CF.real_change_rate
    , CF.row_count_gap
FROM    CTE_FINAL AS CF
CROSS APPLY  (
      SELECT    C.name + ','
      FROM    sys.stats AS S
      INNER JOIN  sys.stats_columns AS SC
            ON S.object_id = SC.object_id
            AND S.stats_id = SC.stats_id
      INNER JOIN  sys.columns AS C
            ON C.object_id = SC.object_id
            AND C.column_id = SC.column_id
      WHERE    CF.stat_name = S.name
      ORDER BY  SC.stats_column_id
      FOR XML PATH ('')
    ) AS SC (stat_column_list)
ORDER BY  auto_created, stat_name

On peut ensuite voir plus en détails ce qui a été échantillonné, à l’aide de l’instruction DBCC SHOW_STATISTICS (maTable, maStatistique).

Cela aide souvent à comprendre pourquoi l’optimiseur a choisi un index plutôt qu’un autre ;)

Bonne optimisation !

ElSüket ;)

Laisser un commentaire