usp_SQLServerCarpenter_Tools_Estimate_Cardinality

/*
	Author	:	Brahmanand Shukla (SQLServerCarpenter.com)
	Date	:	04-Jul-2022
	Purpose	:	This procedure will estimate the cardinality of all the columns of the supplied table list. 
				If the table list is not supplied (or supplied with NULL, the default value) then it will estimate the cardinality for all the tables in the database.

	Example: 

	/* For specific tables with default @Column_Batch_Size */
	EXEC usp_SQLServerCarpenter_Tools_Estimate_Cardinality
		@TableList				=	'dbo.Parent,dbo.Child,dbo.SubChild'

	/* For specific tables with custom @Column_Batch_Size */
	EXEC usp_SQLServerCarpenter_Tools_Estimate_Cardinality
		@TableList				=	'dbo.Parent,dbo.Child,dbo.SubChild'
		, @Column_Batch_Size	=	50
		
	/* For all tables with default @Column_Batch_Size */
	EXEC usp_SQLServerCarpenter_Tools_Estimate_Cardinality

	/* For all tables with custom @Column_Batch_Size */
	EXEC usp_SQLServerCarpenter_Tools_Estimate_Cardinality
		@Column_Batch_Size	=	50

	*** Compatibility Warning: ***
	This tool is compatible with SQL Server 2016 (13.x) and later since it uses the STRING_SPLIT function. 
	In case it needs to be used in versions prior to SQL Server 2016 (13.x) 
		1)	Find an alternative to STRING_SPLIT function and you can use it without any hiccups.
		2)	In case this tool is not required to be run for the specific table list, it can be used even for versions prior to SQL Server 2016 (13.x). 
			Just comment the relevant lines of code which is splitting the comma separated table names and loading it in the @tbl_InputTableList table variable.


	*** Notes: ***
	1) The usp_SQLServerCarpenter_Tools_Estimate_Cardinality uses the following physical table to avoid the repeatation of calculation of cardinality of same columns and table. 
	2) The cardinality once calculated won't be calculated again, unless the respective column entries are deleted from the tbl_Cardinality table. 

	IF NOT EXISTS (SELECT 1 FROM sys.tables WHERE name = 'tbl_Cardinality')
		BEGIN
			CREATE TABLE tbl_Cardinality
			(
				ObjectID						INT
				, SchemaName					SYSNAME
				, TableName						SYSNAME
				, TotalRows						DECIMAL
				, ColumnName					SYSNAME
				, Column_Distinct_Values_Count	DECIMAL
				, Cardinality					AS	CAST(((Column_Distinct_Values_Count / NULLIF(TotalRows, 0)) * 100) AS INT)
			)
		END
*/
CREATE OR ALTER PROCEDURE usp_SQLServerCarpenter_Tools_Estimate_Cardinality
(
	@TableList				NVARCHAR(MAX) = NULL	/*	Table Names should be in two part naming format and separated by comma. 
														For e.g. 'dbo.Parent,dbo.Child,dbo.SubChild' etc.*/
	, @Column_Batch_Size	INT = 99999				/*	This is Column Batch Size. 
														Default is 99999 which means Cardinality of All the columns of a table will be calculated in a single iteration. 
														But if the table is heavy in terms of rows and columns then this parameter can be tuned/refined to limit the number of columns 
														in an iteration of calculation of cardinality of columns of a table. 
														For example if the table has 100 columns and the @Column_Batch_Size parameter is supplied as 20 then 
														the Cardinality of 20 columns of a table will be calculated in an iteration. 
														It will take 5 iterations to complete the cardinality calculation of all the columns of the table*/
)
AS
	BEGIN
		SET NOCOUNT ON
		SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED

		IF NOT EXISTS (SELECT 1 FROM sys.tables WHERE name = 'tbl_Cardinality')
			BEGIN
				CREATE TABLE tbl_Cardinality
				(
					ObjectID						INT
					, SchemaName					SYSNAME
					, TableName						SYSNAME
					, TotalRows						DECIMAL
					, ColumnName					SYSNAME
					, Column_Distinct_Values_Count	DECIMAL
					, Cardinality					AS	CAST(((Column_Distinct_Values_Count / NULLIF(TotalRows, 0)) * 100) AS INT)
				)
			END

		/* Code to drop the temporary tables. It may be useful at the time of debugging the procedure.
		IF OBJECT_ID('tempdb..#tmp_Columns') IS NOT NULL DROP TABLE #tmp_Columns;
		IF OBJECT_ID('tempdb..#tmp_Tables') IS NOT NULL DROP TABLE #tmp_Tables;
		*/

		DECLARE @tbl_InputTableList		TABLE
		(
			[ObjectID]		SYSNAME
		)

		IF (ISNULL(@TableList, '') = '')
			BEGIN
				INSERT INTO @tbl_InputTableList ([ObjectID])
				SELECT	[object_id] AS [ObjectID]
				FROM	sys.tables
			END
		ELSE
			BEGIN
				INSERT INTO @tbl_InputTableList ([ObjectID])
				SELECT	OBJECT_ID([value]) AS [ObjectID]
				FROM	STRING_SPLIT(@TableList, ',')
			END

		SELECT C.object_id
			, T.name AS TableName
			, C.name AS ColumnName
			, ROW_NUMBER() OVER(PARTITION BY C.object_id ORDER BY C.name ASC) AS RowID
		INTO #tmp_Columns
		FROM sys.columns C
			INNER JOIN sys.tables T
				ON T.object_id = C.object_id
				AND T.type = 'U'
			INNER JOIN @tbl_InputTableList I
				ON I.ObjectID = T.object_id
			LEFT JOIN tbl_Cardinality D
				ON D.TableName = T.name
				AND D.ColumnName = C.Name
		WHERE D.TableName IS NULL

		SELECT DISTINCT C.object_id
			, S.name AS SchemaName
			, T.name AS TableName
			, IDENTITY(INT, 1, 1) AS RowID
		INTO #tmp_Tables
		FROM #tmp_Columns C
			INNER JOIN sys.tables T
				ON T.object_id = C.object_id
			INNER JOIN sys.schemas S
				ON S.schema_id = T.schema_id

		DECLARE @SchemaName							AS  SYSNAME
			, @TableName							AS  SYSNAME
			, @ColumnName                           AS  SYSNAME
			, @Object_ID                            AS  INT
			, @Table_RowID                          AS  INT
			, @Total_Rows                           AS  BIGINT
			, @Column_From_RowID                    AS  INT
			, @Column_To_RowID						AS  INT
 
		SET @Table_RowID = 1;
		WHILE EXISTS (SELECT 1 FROM #tmp_Tables WHERE RowID = @Table_RowID)
			BEGIN
				SELECT @Object_ID   =   object_id
					, @SchemaName	=	SchemaName
					, @TableName    =   TableName
				FROM #tmp_Tables 
				WHERE RowID = @Table_RowID;

				SELECT @Column_From_RowID	=	1
					, @Column_To_RowID		=	@Column_Batch_Size;

				WHILE EXISTS (SELECT 1 FROM #tmp_Columns WHERE object_id = @Object_ID AND RowID BETWEEN @Column_From_RowID AND @Column_To_RowID)
					BEGIN   
						DECLARE @SQLString NVARCHAR(MAX);  
 
						IF (@Column_From_RowID = 1)
							BEGIN
								SET @SQLString = N'	SELECT @Total_Rows = COUNT(1)
													FROM ' + QUOTENAME(@TableName) + ' WITH (NOLOCK)';

								BEGIN TRY
									EXECUTE sp_executesql @SQLString
										, N'@Total_Rows BIGINT OUTPUT'
										, @Total_Rows = @Total_Rows OUTPUT;  
								END TRY
 
								BEGIN CATCH
								END CATCH
							END

						SELECT @SQLString	=	STUFF
												(
													(
														SELECT N' UNION ALL SELECT '  
																		+ '' + CAST(@Object_ID AS VARCHAR(50)) + ' AS [ObjectID]'
																		+ ', ''' + QUOTENAME(@SchemaName) + '''' + ' AS [SchemaName]'
																		+ ', ''' + QUOTENAME(@TableName) + '''' + ' AS [TableName]'
																		+ ', ' + CAST(@Total_Rows AS VARCHAR(50)) + ' AS [TotalRows]'
																		+ ', ' + '''' + QUOTENAME(ColumnName) + '''' + ' AS [ColumnName]'
																		+ ', ' + 'CAST(COUNT(DISTINCT ' + QUOTENAME(ColumnName) + ') AS VARCHAR(500))' + ' AS [Column_Distinct_Values_Count]' 
																		+ ' FROM ' + QUOTENAME(@TableName)
														FROM #tmp_Columns 
														WHERE object_id = @Object_ID
															AND RowID BETWEEN @Column_From_RowID AND @Column_To_RowID
														FOR XML PATH ('')
													), 1, 10, ''
												)

						BEGIN TRY
							SET @SQLString = 'INSERT INTO tbl_Cardinality (ObjectID, SchemaName, TableName, TotalRows, ColumnName, Column_Distinct_Values_Count) ' 
											+ @SQLString;
							EXECUTE sp_executesql @SQLString;
						END TRY
 
						BEGIN CATCH
							SELECT @SchemaName AS [SchemaName], @TableName AS [TableName], ERROR_MESSAGE() AS [ErrorMessage]
						END CATCH
				
						SELECT @Column_From_RowID	=	@Column_To_RowID + 1
							, @Column_To_RowID		=	@Column_To_RowID + @Column_Batch_Size;
					END
 
				SET @Table_RowID = @Table_RowID + 1;
			END

		SELECT * FROM tbl_Cardinality;

		IF OBJECT_ID('tempdb..#tmp_Columns') IS NOT NULL DROP TABLE #tmp_Columns;
		IF OBJECT_ID('tempdb..#tmp_Tables') IS NOT NULL DROP TABLE #tmp_Tables;
	END

2 thoughts on “usp_SQLServerCarpenter_Tools_Estimate_Cardinality

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s