…brew your own Business Intelligence

Sequence vs Identity – Performance Comparison

Recently, as part of an effort to support a few database design choices, I spent a bit of time researching the performance characteristics of the new sequence objects available with SQL Server 2012. The main objective was to gain an understanding of how they stacked up performance-wise with the IDENTITY property…which is the predominant design pattern for surrogate key generation prior to this release.

For some background on why surrogate keys, and key-management in general, is so important in database design, check out this post by Thomas Kejser (blog | twitter) which covers the a few of the common key-related challenges faced when trying to combine data from multiple systems.

Unfortunately, the results from google were rather unsatisfying so I decided to mock up my own tests and see for myself.

With that said, the goal here is to compare the performance impact of generating values from an IDENTITY property and a Sequence object for batch-load scenarios typical of a data warehouse environment. And because sequence objects have a configurable CACHE property which allow for a range of values to be held in memory to avoid some of the I/O overhead of plucking the next value from disk each time, I’m going to have a few test cases each with a different value (0, 50, 500, and 5000) for the CACHE property.

Before we can actually test anything, we have to set up our environment.

Here’s the script (based off of this code from Mr. Itzik Ben-Gan, so you know its legit) for creating a numbers table with 50,000,000 records…and yes, that’s a little excessive, but I’m using it for some columnstore tests that I’ll be blogging about in the coming weeks.

USE tempdb;
GO
-- Create and Populate Auxillary.Numbers
BEGIN
IF OBJECT_ID('Auxillary.Numbers','U') IS NOT NULL
DROP TABLE Auxillary.Numbers;
CREATE TABLE [Auxillary].[Numbers] (
[N] [bigint] NOT NULL,
CONSTRAINT [PK_dbo_Numbers] PRIMARY KEY CLUSTERED ([N])
)
;

DECLARE @NumBatches INT = 500
,@BatchSize INT = 100000
;

DECLARE @CurBatchNum INT = 1;

WHILE @CurBatchNum <= @NumBatches
BEGIN
BEGIN TRANSACTION;

WITH
E1(N) AS (
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1
)
,E2(N) AS (SELECT 1 FROM E1 a, E1 b)
,E3(N) AS (SELECT 1 FROM E2 a, E2 b)
,E4(N) AS (SELECT 1 FROM E3 a, E3 b)
,Tally(N) AS (
SELECT ROW_NUMBER() OVER(ORDER BY a.N) AS N
FROM E3 a, E3 b
)
INSERT INTO Auxillary.Numbers (N)
SELECT N
FROM Tally
WHERE N BETWEEN
(@BatchSize * (@CurBatchNum-1)) + 1 AND
(@BatchSize * (@CurBatchNum-1)) + @BatchSize
;

COMMIT TRANSACTION;
--CHECKPOINT;
-- Increment @CurBatchNum
SET @CurBatchNum += 1;

END;
END;

Next we have to create the tables and sequence objects used by the test…

-- Create Objects
BEGIN
IF OBJECT_ID('dbo.t1_identity','U') IS NOT NULL
DROP TABLE dbo.t1_identity;
CREATE TABLE dbo.t1_identity (
id INT IDENTITY(1,1) NOT NULL
,c1 INT NOT NULL
,CONSTRAINT PK_t1_identity PRIMARY KEY CLUSTERED (id)
);

IF OBJECT_ID('dbo.t1_seq_cache_none','U') IS NOT NULL
DROP TABLE dbo.t1_seq_cache_none;
CREATE TABLE dbo.t1_seq_cache_none (
id INT NOT NULL
,c1 INT NOT NULL
,CONSTRAINT PK_t1_seq_cache_none PRIMARY KEY CLUSTERED (id)
);
IF OBJECT_ID('dbo.Seq1_cache_none','SO') IS NOT NULL
DROP SEQUENCE dbo.Seq1_cache_none;
CREATE SEQUENCE dbo.Seq1_cache_none
AS INT
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
NO CYCLE
NO CACHE
;

IF OBJECT_ID('dbo.t1_seq_cache_50','U') IS NOT NULL
DROP TABLE dbo.t1_seq_cache_50;
CREATE TABLE dbo.t1_seq_cache_50 (
id INT NOT NULL
,c1 INT NOT NULL
,CONSTRAINT PK_t1_seq_cache_50 PRIMARY KEY CLUSTERED (id)
);
IF OBJECT_ID('dbo.Seq1_cache_50','SO') IS NOT NULL
DROP SEQUENCE dbo.Seq1_cache_50;
CREATE SEQUENCE dbo.Seq1_cache_50
AS INT
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
NO CYCLE
CACHE 50
;

IF OBJECT_ID('dbo.t1_seq_cache_500','U') IS NOT NULL
DROP TABLE dbo.t1_seq_cache_500;
CREATE TABLE dbo.t1_seq_cache_500 (
id INT NOT NULL
,c1 INT NOT NULL
,CONSTRAINT PK_t1_seq_cache_500 PRIMARY KEY CLUSTERED (id)
);
IF OBJECT_ID('dbo.Seq1_cache_500','SO') IS NOT NULL
DROP SEQUENCE dbo.Seq1_cache_500;
CREATE SEQUENCE dbo.Seq1_cache_500
AS INT
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
NO CYCLE
CACHE 500
;
IF OBJECT_ID('dbo.t1_seq_cache_5000','U') IS NOT NULL
DROP TABLE dbo.t1_seq_cache_5000;
CREATE TABLE dbo.t1_seq_cache_5000 (
id INT NOT NULL
,c1 INT NOT NULL
,CONSTRAINT PK_t1_seq_cache_5000 PRIMARY KEY CLUSTERED (id)
);
IF OBJECT_ID('dbo.Seq1_cache_5000','SO') IS NOT NULL
DROP SEQUENCE dbo.Seq1_cache_5000;
CREATE SEQUENCE dbo.Seq1_cache_5000
AS INT
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
NO CYCLE
CACHE 5000
;
END;

And finally the script to run through our tests.  The @records parameter is what I’m using to vary the batch size: 10k, 100k, 1mm.  Also, as an extra precaution, I’m dropping clean buffers (DBCC DROPCLEANBUFFERS) and emptying the plan cache (DBCC FREEPROCCACHE) to keep things as fair as possible.

DECLARE @records INT = 10000,
@sTime DATETIME,
@eTime DATETIME,
@DurIdentityMS INT,
@DurSeqNoCacheMS INT,
@DurSeqCache50MS INT,
@DurSeqCache500MS INT,
@DurSeqCache5000MS INT
;

-- test identity
BEGIN
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS
--SET STATISTICS XML ON
--SET STATISTICS TIME, IO ON
SET @sTime = GETDATE();
INSERT INTO dbo.t1_identity (c1)
SELECT 1
FROM Auxillary.Numbers
WHERE N <= @records
;
SET @eTime = GETDATE();
SET @DurIdentityMS = DATEDIFF(MS, @sTime, @eTime);
--SET STATISTICS TIME, IO OFF
--SET STATISTICS XML OFF
END
;

-- test sequence (no cache)
BEGIN
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS
--SET STATISTICS XML ON
--SET STATISTICS TIME, IO ON
SET @sTime = GETDATE();
INSERT INTO dbo.t1_seq_cache_none (
id
,c1
)
SELECT id = (NEXT VALUE FOR dbo.Seq1_cache_none)
,c1 = 1
FROM Auxillary.Numbers
WHERE N <= @records
;
SET @eTime = GETDATE();
SET @DurSeqNoCacheMS = DATEDIFF(MS, @sTime, @eTime);
--SET STATISTICS TIME, IO OFF
--SET STATISTICS XML OFF
END
;

-- test sequence (cache 50)
BEGIN
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS
--SET STATISTICS XML ON
--SET STATISTICS TIME, IO ON
SET @sTime = GETDATE();
INSERT INTO dbo.t1_seq_cache_50 (
id
,c1
)
SELECT id = (NEXT VALUE FOR dbo.Seq1_cache_50)
,c1 = 1
FROM Auxillary.Numbers
WHERE N <= @records
;
SET @eTime = GETDATE();
SET @DurSeqCache50MS = DATEDIFF(MS, @sTime, @eTime);
--SET STATISTICS TIME, IO OFF
--SET STATISTICS XML OFF
END
;

-- test sequence (cache 500)
BEGIN
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS
--SET STATISTICS XML ON
--SET STATISTICS TIME, IO ON
SET @sTime = GETDATE();
INSERT INTO dbo.t1_seq_cache_500 (
id
,c1
)
SELECT id = (NEXT VALUE FOR dbo.Seq1_cache_500)
,c1 = 1
FROM Auxillary.Numbers
WHERE N <= @records
;
SET @eTime = GETDATE();
SET @DurSeqCache500MS = DATEDIFF(MS, @sTime, @eTime);
--SET STATISTICS TIME, IO OFF
--SET STATISTICS XML OFF
END
;

-- test sequence (cache 5000)
BEGIN
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS
--SET STATISTICS XML ON
--SET STATISTICS TIME, IO ON
SET @sTime = GETDATE();
INSERT INTO dbo.t1_seq_cache_5000 (
id
,c1
)
SELECT id = (NEXT VALUE FOR dbo.Seq1_cache_5000)
,c1 = 1
FROM Auxillary.Numbers
WHERE N <= @records
;
SET @eTime = GETDATE();
SET @DurSeqCache5000MS = DATEDIFF(MS, @sTime, @eTime);
--SET STATISTICS TIME, IO OFF
--SET STATISTICS XML OFF
END
;

-- display results
SELECT Scenario = '@DurIdentityMS'
,Duration = @DurIdentityMS
UNION ALL
SELECT Scenario = '@DurSeqNoCacheMS'
,Duration = @DurSeqNoCacheMS
UNION ALL
SELECT Scenario = '@DurSeqCache50MS'
,Duration = @DurSeqCache50MS
UNION ALL
SELECT Scenario = '@DurSeqCache500MS'
,Duration = @DurSeqCache500MS
UNION ALL
SELECT Scenario = '@DurSeqCache5000MS'
,Duration = @DurSeqCache5000MS

And here are the results (duration column is milliseconds)…

For a load size of 10k records…

For a load size of 100k records…

For a load size of 1mm records…

The main take-aways are as follows:

  1. IDENTITY is faster than a sequence object
  2. CACHE option with sequence object is a very important setting in terms of performance

The first point above isn’t too surprising.  The flexibility provided by sequence objects comes at a cost…there’s no such thing as a free-lunch.

I found observation #2 very interesting.  The CACHE option for sequence objects has a very noticeable  impact on performance.  It does suffer from the law of diminishing returns…meaning the benefit of going from CACHE 0 to CACHE 50 is much more substantial than going from CACHE 500 to CACHE 5000.

Here’s a breakdown of the numbers showing how much faster each cache option is than the next smaller size…

The values in red represent how many times faster each CACHE value is than the preceding one. So it appears that the performance characteristics of an increasing CACHE value is comparable across the load sizes tested…from 0-50, the increase is around 45x…and it drops off sharply from there.

One detail to be aware of when using the CACHE option with a sequence object is that all values stored in memory will be discarded/lost if the system goes down or needs to be rebooted. Once the system comes back up, the next value from the sequence will be the first value in the *next* cache range pulled into memory. This will almost always leave a gap in the range of values plucked from the sequence and used by your application…so not a straight forward option if the values MUST be in order.  This issue could also cause you to prematurely hit the max value for your sequence due to a couple of unexpected reboots.  For example…if you chose an INT value and set the cache for 100mm values…obviously a ridiculous example, but it proves the point. Both of these issues could be handled with a workaround script to “repair” the sequence after each reboot, so that any lost values are reclaimed before the application starts back up.

 

Update 20130814: Download updated scripts including use of sequence range stored proc here.

4 thoughts on “Sequence vs Identity – Performance Comparison

  1. David Laplante says:

    Very interresting! thanks for the insight!

    1. Bill says:

      Hi David,
      Thanks for visiting – glad you found the info helpful.

  2. Ed Lawton says:

    Good post and certainly got me thinking about our intended move to Sequence. I found if you reserve the number in advance, you get better results – extending your code I got the following:

    BEGIN
    DBCC FREEPROCCACHE
    DBCC DROPCLEANBUFFERS

    –EXEC sp_
    –SET STATISTICS XML ON
    –SET STATISTICS TIME, IO ON

    DECLARE
    @range_first_value SQL_VARIANT,
    @range_first_value_output SQL_VARIANT,
    @range_last_value_output SQL_VARIANT;

    DECLARE
    @i BIGINT,
    @j BIGINT

    SET @sTime = GETDATE();

    –EXEC sp_sequence_get_range @sequence_name = N’dbo.Seq1_cache_1000000′, @range_size = @records, @range_first_value = @range_first_value_output OUTPUT, @range_last_value = @range_last_value_output OUTPUT;
    –EXEC sp_sequence_get_range @sequence_name = N’dbo.Seq1_cache_100000′, @range_size = @records, @range_first_value = @range_first_value_output OUTPUT, @range_last_value = @range_last_value_output OUTPUT;
    –EXEC sp_sequence_get_range @sequence_name = N’dbo.Seq1_cache_50′, @range_size = @records, @range_first_value = @range_first_value_output OUTPUT, @range_last_value = @range_last_value_output OUTPUT;
    EXEC sp_sequence_get_range @sequence_name = N’dbo.Seq1_cache_none’, @range_size = @records, @range_first_value = @range_first_value_output OUTPUT, @range_last_value = @range_last_value_output OUTPUT;

    SET @i = CAST(@range_first_value_output AS BIGINT)
    SET @j = CAST(@range_last_value_output AS BIGINT)

    INSERT INTO dbo.t1_seq_Test
    ( id,
    c1
    )
    SELECT
    N,
    1
    FROM
    Numbers
    WHERE
    N >= @i
    AND N <= @j;

    SET @eTime = GETDATE();
    SET @DurSeqTestMS = DATEDIFF(MS, @sTime, @eTime);

    END;

    Using the no cache sequence and 1,000,000 gives results of

    Scenario Duration countRows
    @DurIdentityMS 2220 1000000
    @DurSeqTestMS 1936 1000000

    Thanks again

    1. Bill says:

      Ed – very cool…not sure why I didn’t initially include the get-range stored proc.

      I did a bit of testing this morning on the same laptop I used for this initial blog post. Not sure exactly why, but I’m not getting the results you show below…I posted an zip with updated scripts that include your code with the results for 1mm records shown below:

      @DurIdentityMS 2470
      @DurSeqNoCacheMS 104963
      @DurSeqCache50MS 5383
      @DurSeqCache500MS 3020
      @DurSeqCache5000MS 2366
      @DurSeqNoCacheRangeMS 2526

Leave a Reply