Connaître le gain de compression d’une table avec SQL Server

Vous avez identifié une table candidate à la compression ? mais vous voulez savoir quelle sera la meilleure méthode de compression ROW ou PAGE. SQL Server met à disposition une procédure stockée sp_estimate_data_compression_savings. Cependant  l’exécution de cette dernière permet seulement de savoir le taux de compression pour l’une ou pour l’autre méthode pour une seule partition d’une table à la fois. Le script suivant permet de connaître pour une table donnée quelle est la meilleure méthode de compression à utiliser pour l’ensemble des partitions d’une table sachant qu’une table non partitionnée possède une seule partition.

Petite précision : il se peut que le résultat du script affiche un gain négatif. Ceci est parfaitement normal car la compression peut être contre bénéfique dans certains cas. En effet le nouveau format de page utilisé dans ce cas requiert des octets de gestion supplémentaire. Si le gain de compression est nulle alors on perd tout l’intérêt de la compression et on se retrouve avec un nombre de pages supérieure qu’à l’état initial.

DECLARE @object_id INT;
SET @object_id = OBJECT_ID(‘monschema.maTable’);

DECLARE @i INT = 1;
DECLARE @max INT;

IF EXISTS (SELECT * FROM tempdb.sys.objects WHERE [object_id] = OBJECT_ID(‘tempdb..#_estimate_data_compression_savings’))
DROP TABLE  #_estimate_data_compression_savings;

– Work table for compression
CREATE TABLE #_estimate_data_compression_savings
(
id INT IDENTITY(1,1),
[object_name] SYSNAME,
[schema_name] SYSNAME,
index_id INT,
partition_number INT,
partition_compression VARCHAR(50) NULL,
partition_value VARCHAR(50) NULL,
[size_with_current_compression_setting(KB)] INT,
[size_with_requested_compression_setting(KB)] INT,
[sample_size_with_current_compression_setting(KB)] INT,
[sample_size_with_requested_compression_setting(KB)] INT,
[data_compression] VARCHAR(10) NULL
);

– Table for the concerned object
DECLARE @table_compression TABLE
(
id INT IDENTITY(1,1),
[schema_name] SYSNAME,
[object_name] SYSNAME,
index_id INT,
partition_number INT,
partition_compression VARCHAR(50),
partition_value SQL_VARIANT
);

INSERT @table_compression ([schema_name], [object_name], index_id, partition_number, partition_compression, partition_value)
SELECT
s.name,
o.name,
p.index_id,
p.partition_number,
p.data_compression_desc,
pv.value
FROM sys.partitions AS p
INNER JOIN sys.objects AS o
ON p.[object_id] = o.[object_id]
INNER JOIN sys.schemas AS s
ON s.[schema_id] = o.[schema_id]
LEFT JOIN sys.partition_range_values AS pv
ON pv.boundary_id = p.partition_number
WHERE o.[object_id] = @object_id
AND index_id < 2; — Table heap or with clustered index
SELECT @max = MAX(id)
FROM @table_compression;

DECLARE @schema_name SYSNAME;
DECLARE @object_name SYSNAME;
DECLARE @index_id INT;
DECLARE @partition_number INT;
DECLARE @partition_compression VARCHAR(50)
DECLARE @partition_value SQL_VARIANT;

WHILE @i <= @max
BEGIN

SELECT
  @schema_name = [schema_name],
  @object_name = [object_name],
  @index_id = index_id,
  @partition_number = partition_number,
  @partition_compression = partition_compression,
  @partition_value = partition_value
FROM @table_compression
WHERE id = @i;

INSERT #_estimate_data_compression_savings ([object_name], [schema_name], index_id, partition_number, [size_with_current_compression_setting(KB)],
                                             [size_with_requested_compression_setting(KB)], [sample_size_with_current_compression_setting(KB)],
                                             [sample_size_with_requested_compression_setting(KB)])
EXEC sp_estimate_data_compression_savings @schema_name, @object_name, @index_id, @partition_number, ‘PAGE';
UPDATE #_estimate_data_compression_savings
SET [data_compression] = ‘PAGE’,
  partition_value = CAST(@partition_value AS VARCHAR(50)),
  partition_compression = @partition_compression
WHERE id = SCOPE_IDENTITY()
INSERT #_estimate_data_compression_savings ([object_name], [schema_name], index_id, partition_number, [size_with_current_compression_setting(KB)],
                                             [size_with_requested_compression_setting(KB)], [sample_size_with_current_compression_setting(KB)],
                                             [sample_size_with_requested_compression_setting(KB)])
EXEC sp_estimate_data_compression_savings @schema_name, @object_name, @index_id, @partition_number, ‘ROW';
UPDATE #_estimate_data_compression_savings
SET [data_compression] = ‘ROW’,
  partition_value = CAST(@partition_value AS VARCHAR(50)),
  partition_compression = @partition_compression
WHERE id = SCOPE_IDENTITY()
SET @i += @i;
END

– Show result
;WITH estimate_data_compression_with_page
AS
(
SELECT
  [schema_name],
  [object_name],
  index_id,
  partition_number,
  partition_value,
  partition_compression,
  [data_compression],
  CASE [size_with_current_compression_setting(KB)]
   WHEN 0 THEN 0
   ELSE (1 – CAST([size_with_requested_compression_setting(KB)] * 1.
            / [size_with_current_compression_setting(KB)] AS DECIMAL(5,2))) * 100
  END AS ratio_size_compressed,
  [size_with_current_compression_setting(KB)],
  [size_with_requested_compression_setting(KB)],
  CASE [sample_size_with_current_compression_setting(KB)]
   WHEN 0 THEN 0
   ELSE (1 – CAST([sample_size_with_requested_compression_setting(KB)] * 1.
            / [sample_size_with_current_compression_setting(KB)] AS DECIMAL(5,2))) * 100
  END AS ratio_sample_compressed,
  [sample_size_with_current_compression_setting(KB)],
  [sample_size_with_requested_compression_setting(KB)]
FROM #_estimate_data_compression_savings
WHERE [data_compression] = ‘PAGE’
),
estimate_data_compression_with_row
AS
(
SELECT
  [schema_name],
  [object_name],
  index_id,
  partition_number,
  [data_compression],
  CASE [size_with_current_compression_setting(KB)]
   WHEN 0 THEN 0
   ELSE (1 – CAST([size_with_requested_compression_setting(KB)] * 1.
            / [size_with_current_compression_setting(KB)] AS DECIMAL(5,2))) * 100
  END AS ratio_size_compressed,
  [size_with_current_compression_setting(KB)],
  [size_with_requested_compression_setting(KB)],
  CASE [sample_size_with_current_compression_setting(KB)]
   WHEN 0 THEN 0
   ELSE (1 – CAST([sample_size_with_requested_compression_setting(KB)] * 1.
            / [sample_size_with_current_compression_setting(KB)] AS DECIMAL(5,2))) * 100
  END AS ratio_sample_compressed,
  [sample_size_with_current_compression_setting(KB)],
  [sample_size_with_requested_compression_setting(KB)]
FROM #_estimate_data_compression_savings
WHERE [data_compression] = ‘ROW’
)
SELECT
P.[schema_name],
P.[object_name],
P.index_id,
P.partition_number,
P.partition_compression,
P.partition_value,
CASE WHEN P.ratio_size_compressed < R.ratio_size_compressed THEN ‘ROW : ‘ + CAST(R.ratio_size_compressed AS VARCHAR(8)) + ‘ %’
      WHEN P.ratio_size_compressed > R.ratio_size_compressed THEN ‘PAGE : ‘ + CAST(P.ratio_size_compressed AS VARCHAR(8)) + ‘ %’
      ELSE ‘ROW OR PAGE : ‘ + CAST(P.ratio_size_compressed AS VARCHAR(8)) + ‘ %’
END AS preferred_method_compression,
CASE WHEN P.ratio_size_compressed < R.ratio_size_compressed THEN R.[size_with_current_compression_setting(KB)]
      WHEN P.ratio_size_compressed > R.ratio_size_compressed THEN P.[size_with_current_compression_setting(KB)]
      ELSE P.[size_with_current_compression_setting(KB)]
END AS [size_with_current_compression_setting(KB)],
CASE WHEN P.ratio_size_compressed < R.ratio_size_compressed THEN R.[size_with_requested_compression_setting(KB)]
      WHEN P.ratio_size_compressed > R.ratio_size_compressed THEN P.[size_with_requested_compression_setting(KB)]
      ELSE P.[size_with_requested_compression_setting(KB)]
END AS [size_with_current_compression_setting(KB)],
CASE WHEN P.ratio_size_compressed < R.ratio_size_compressed THEN R.[sample_size_with_current_compression_setting(KB)]
      WHEN P.ratio_size_compressed > R.ratio_size_compressed THEN P.[sample_size_with_current_compression_setting(KB)]
      ELSE P.[sample_size_with_current_compression_setting(KB)]
END AS [sample_size_with_requested_compression_setting(KB)],
CASE WHEN P.ratio_size_compressed  < R.ratio_size_compressed THEN R.[sample_size_with_requested_compression_setting(KB)]
      WHEN P.ratio_size_compressed > R.ratio_size_compressed THEN P.[sample_size_with_requested_compression_setting(KB)]
      ELSE P.[sample_size_with_requested_compression_setting(KB)]
END AS [sample_size_with_requested_compression_setting(KB)]
FROM estimate_data_compression_with_page AS P
INNER JOIN estimate_data_compression_with_row AS R
ON P.[schema_name] = R.[schema_name]
  AND P.[object_name] = R.[object_name]
   AND P.index_id = R.index_id
    AND P.partition_number = R.partition_number;

Bonne compression !!

David BARBARIN (Mikedavem)
MVP SQL Server

Une réflexion au sujet de « Connaître le gain de compression d’une table avec SQL Server »

  1. Merci pour cette procédure, qui m’a permis de découvrir (après re-lecture de la documentation :)) que les valeurs LOB ne peuvent pas être compressées.
    En revanche sur une table très volumineuse avec des types natifs et non-LOB, j’ai pu voir que la compression à la PAGE valait mieux qu’à la ligne.

    @++ ;)

Laisser un commentaire