Category Archives: Database Maintenance

My Red-Gate Hyperbac + DBCC CheckDB Script

Several people have asked me to post my script for automatically cycling through each database in an instance, restoring a virtual hyperbac database, performing consistency checks, and then e-mailing out the results, in reference to my previous post.

I am sorry it took me so long to get this put together, but pulling it out of my environment was the tough part.  I have a lot of infrastructure built up around this, so I had to extract the parts and try to cull it into one consistent script.  I have not tested this script very thoroughly, so please use with caution.  Make sure it works in your environment.  Keep in mind that you must have hyperbac already installed and setup to make proper user of this script.

Thank you for your patience:

SET NOCOUNT ON;
 
-- Constants
DECLARE @BACKUP_DAYS_THRESHOLD				INT = 8,
		@NEW_BACKUP_FILE_LOCATION			VARCHAR(256) = 'C:\Temp\BackupFileCopies\',
		@DATAFILE_RESTORATION_LOCATION		VARCHAR(256) = 'C:\Temp\DataFileRestores\',
		@DB_MAIL_PROFILENAME				SYSNAME = 'Database Administrators', 
		@EMAIL_RECIPIENTS					VARCHAR(1000) = 'yourEmail@yourDomain.com'
 
-- Retrieve List of Last Full Backups and their Backup Files
IF OBJECT_ID('TempDB..#lastBackupsTaken') IS NOT NULL
	DROP TABLE [#lastBackupsTaken]
 
CREATE TABLE [#lastBackupsTaken]
(
[serverName]		VARCHAR(256),
[databaseName]		VARCHAR(256),
[physicalFileSpec]	VARCHAR(256),
[backupFinishDate]	DATETIME
)
 
INSERT INTO
	[#lastBackupsTaken]
	(
	[serverName],
	[databaseName],
	[physicalFileSpec],
	[backupFinishDate]
	)
SELECT
	@@SERVERNAME,
	[BackupSets].[database_name],
	[MediaFamily].[physical_device_name],
	[BackupSets].[backup_finish_date]
FROM
	[msdb].[dbo].[backupset] AS BackupSets
 
		INNER JOIN (
					SELECT
						[database_name],
						MAX([backup_start_date]) AS MaxBackup_Start_Date
					FROM
						[msdb].[dbo].[backupset]
					WHERE
							[type] = 'D'
						AND	[backup_finish_date] IS NOT NULL
					GROUP BY
						[database_name]
					) AS Constrained ON
				[BackupSets].[database_name] = [Constrained].[database_name]
			AND	[BackupSets].[backup_start_date] = [Constrained].[MaxBackup_Start_Date]
			AND	[BackupSets].[type] = 'D'
 
		INNER JOIN [msdb].[dbo].[backupmediafamily] AS MediaFamily ON
			[BackupSets].[media_set_id] = [MediaFamily].[media_set_id]
 
-- Retrieve List of the FileGroups in the Databases as of the Last Full Backups
IF OBJECT_ID('TempDB..#logicalFilesForlastBackupsTaken') IS NOT NULL
	DROP TABLE [#logicalFilesForlastBackupsTaken]
 
CREATE TABLE [#logicalFilesForlastBackupsTaken]
(
[serverName]	VARCHAR(256),
[databaseName]	VARCHAR(256),
[logicalName]	VARCHAR(256)
)
 
INSERT INTO
	[#logicalFilesForlastBackupsTaken]
SELECT
	@@SERVERNAME,
	[BackupSets].[database_name],
	[LogicalFiles].[logical_name]
FROM
	[msdb].[dbo].[backupset] AS BackupSets
 
		INNER JOIN (
					SELECT
						[database_name],
						MAX([backup_start_date]) AS MaxBackup_Start_Date
					FROM
						[msdb].[dbo].[backupset]
					WHERE
							[type] = 'D'
						AND	[backup_finish_date] IS NOT NULL
					GROUP BY
						[database_name]
					) AS Constrained ON
				[BackupSets].[database_name] = [Constrained].[database_name]
			AND	[BackupSets].[backup_start_date] = [Constrained].[MaxBackup_Start_Date]
 
		INNER JOIN [msdb].[dbo].[backupfile] AS LogicalFiles ON
				[BackupSets].[backup_set_id] = [LogicalFiles].[backup_set_id]
			AND	[LogicalFiles].[is_present] = 1
 
-- Create a list of all databases we are going to restore and Consistency Check (DBCC CHECKDB)
IF OBJECT_ID('TempDB..#databasesToCheck') IS NOT NULL
	DROP TABLE [#databasesToCheck]
 
CREATE TABLE [#databasesToCheck]
(
[databaseName]		VARCHAR(256),
[executionOrder]	INT IDENTITY(1,1)
)
 
INSERT INTO
	[#databasesToCheck]
	(
	[databaseName]
	)
SELECT
	[databaseName]
FROM
	[#lastBackupsTaken]
WHERE
	[databaseName] NOT IN (
							'master',
							'tempdb'
							)
GROUP BY
	[databaseName]
 
-- For each database in the list, copy the backup files, run a Red-Gate Hyperbac Virtual Restore, Run Consistency Check, Record the results, and then drop the database
DECLARE @currentDBIdentifier			INT,
		@maxDBIdentifier				INT,
		@currentDBName					VARCHAR(256),
		@lastBackupTime					DATETIME,
		@statusMessage					VARCHAR(MAX) = '',
		@statusCode						INT = 0,
		@errorMessage					VARCHAR(MAX) = '',
		@messageSubject					VARCHAR(256),
		@sql							VARCHAR(MAX) = '',
		@backupCopyResults				VARCHAR(MAX) = '',
		@currentBackupFileIdentifier	INT,
		@maxBackupFileIdentifier		INT,
		@commandLine					VARCHAR(2000),
		@databaseWasRestored			BIT = 0
 
IF OBJECT_ID('TempDB..#backupFilesToHandle') IS NOT NULL
	DROP TABLE [#backupFilesToHandle]
 
CREATE TABLE [#backupFilesToHandle]
(
[oldFileSpec]		VARCHAR(256),
[newFileSpec]		VARCHAR(256),
[executionOrder]	INT IDENTITY(1,1)
)
 
IF OBJECT_ID('TempDB..#commandLineResults') IS NOT NULL
	DROP TABLE [#commandLineResults]
 
CREATE TABLE [#commandLineResults]
(
[outputLine]		NVARCHAR(255)
)
 
IF OBJECT_ID('TempDB..#checkDBResults') IS NOT NULL
	DROP TABLE [#checkDBResults]
 
CREATE TABLE [#checkDBResults]
(
[ServerName] [varchar](100) NULL,
[DatabaseName] [varchar](256) NULL,
[Error] [varchar](256) NULL,
[Level] [varchar](256) NULL,
[State] [varchar](256) NULL,
[MessageText] [varchar](7000) NULL,
[RepairLevel] [varchar](256) NULL,
[Status] [varchar](256) NULL,
[DbId] [varchar](256) NULL,
[Id] [varchar](256) NULL,
[IndId] [varchar](256) NULL,
[PartitionId] [varchar](256) NULL,
[AllocUnitId] [varchar](256) NULL,
[File] [varchar](256) NULL,
[Page] [varchar](256) NULL,
[Slot] [varchar](256) NULL,
[RefFile] [varchar](256) NULL,
[RefPage] [varchar](256) NULL,
[RefSlot] [varchar](256) NULL,
[Allocation] [varchar](256) NULL,
[insert_date] [datetime] NULL
)
 
-- Begin Database Loop
SELECT
	@currentDBIdentifier = MIN([executionOrder]),
	@maxDBIdentifier = MAX([executionOrder])
FROM
	[#databasesToCheck]
 
WHILE (@currentDBIdentifier < @maxDBIdentifier) 	BEGIN 		SELECT 			@currentDBName = [databaseName] 		FROM 			[#databasesToCheck] 		WHERE 			[executionOrder] = @currentDBIdentifier 		-- Let's make sure the last database backup isn't too old (in case some third-party script clears off old database backups) 		SELECT 			@lastBackupTime = MAX([backupFinishDate]) 		FROM 			[#lastBackupsTaken] 		WHERE 			[databaseName] = @currentDBName 		IF (DATEDIFF(DAY,@lastBackupTime,GETDATE()) > @BACKUP_DAYS_THRESHOLD)
			BEGIN
				-- The oldest backup for this database was taken too long ago
				SET @statusMessage =  @statusMessage + 'DBCC FOR ' + @currentDBName + ' was not properly performed (Last Backup Too Old)
'
				SET @statusCode = @statusCode + 1
			END
		ELSE
			BEGIN
				-- Prepare the Backup Files
				TRUNCATE TABLE [#backupFilesToHandle]
 
				INSERT INTO
					[#backupFilesToHandle]
					(
					[oldFileSpec],
					[newFileSpec]
					)
				SELECT
					[physicalFileSpec],
					@NEW_BACKUP_FILE_LOCATION + SUBSTRING([physicalFileSpec],((LEN([physicalFileSpec]))-(CHARINDEX('\',REVERSE([physicalFileSpec])))+2),(CHARINDEX('\',REVERSE([physicalFileSpec])))-1)
				FROM
					[#lastBackupsTaken]
				WHERE
					[databaseName] = @currentDBName
 
				-- Start the restore script and copy the backup files
				SET @sql = 'RESTORE DATABASE [' + @currentDBName + '_Virtual] FROM
							'
 
				-- Begin Backup File Loop
				SELECT
					@currentBackupFileIdentifier = MIN([executionOrder]),
					@maxBackupFileIdentifier = MAX([executionOrder])
				FROM
					[#backupFilesToHandle]
 
				WHILE (@currentBackupFileIdentifier <= @maxBackupFileIdentifier)
					BEGIN
 
						-- Create Command Line syntax for file copy
						SELECT
							@commandLine = 'copy "' + [oldFileSpec] + '" "' + [newFileSpec] + '" /Y'
						FROM
							[#backupFilesToHandle]
						WHERE
							[executionOrder] = @currentBackupFileIdentifier
 
						-- Truncate the Command Line Results Table
						TRUNCATE TABLE [#commandLineResults]
 
						INSERT INTO
							[#commandLineResults]
						EXEC
							[master].[dbo].[xp_cmdshell] @commandLine
 
						-- Record Copy Results
						SET @backupCopyResults = @backupCopyResults + '
For command issued=' + @commandLine + '
'
 
						SELECT
							@bakCopyResults = @bakCopyResults + '
' + ISNULL([outputLine],'NULL')
						FROM
							[#commandLineResults]
 
						-- Add this file to the restore script
						SELECT
							@sql = @sql + 'DISK=N''' + [newFileSpec] + ''','
						FROM
							[#backupFilesToHandle]
						WHERE
							[executionOrder] = @currentBackupFileIdentifier
 
						SET @currentBackupFileIdentifier = @currentBackupFileIdentifier + 1
					END -- Loop to next Backup File
 
				-- Now that all backup files have been moved and we have added their new locations to the restore script,
					-- we now need to remove the trailing comma
				SET @sql = LEFT(@sql,LEN(@sql)-1) + '
				WITH 
				'
 
				-- Now we need to add the database files to the restore script
				SELECT
					@sql = @sql + 'MOVE N''' + [logicalName] + ''' TO N''' + @DATAFILE_RESTORATION_LOCATION + LEFT(@currentDBName,35) + '_Virtual_' + LEFT([logicalName],35) +'.vmdf'','
				FROM
					[#logicalFilesForlastBackupsTaken]
				WHERE
					[databaseName] = @currentDBName
 
				-- Remove the trailing comma
				SET @sql = @sql + 'NORECOVERY, STATS=1, REPLACE
				'
				-- Now, we have the files moved and the restoration script created.  Next thing to do is to restore the database (using hyperbac)
				SET @databaseWasRestored = 0
 
				BEGIN TRY
 
					-- Restore the database
					EXEC(@sql)
 
					-- Recover the database
					SET @sql = 'RESTORE DATABASE [' + @currentDBName + '_Virtual] WITH RECOVERY, RESTRICTED_USER'
					EXEC(@sql)
 
					-- Put the virtual DB in Simple Recovery Model, since we do not need anything higher than that for the DBCC CHECKDB
					SET @sql = 'ALTER DATABASE [' + @currentDBName + '_Virtual] SET RECOVERY SIMPLE WITH NO_WAIT'
					EXEC(@sql)
 
					SET @databaseWasRestored = 1
 
				END TRY
 
				BEGIN CATCH
 
						SET @errorMessage = @errorMessage + @currentDBName + '
' + @backupCopyResults + '
 
' + 'Error Number: ' + CONVERT(VARCHAR,ERROR_NUMBER()) + ', Error Message: ' + ERROR_MESSAGE() + '
'
						SET	@statusMessage = @statusMessage + 'DBCC FOR ' + @currentDBName + ' was not restored properly (error message below). 
'
						SET @statusCode = @statusCode + 1	
 
				END CATCH
 
				-- Only continue if the database was properly restored
				IF (@databaseWasRestored = 1)
					BEGIN
 
						-- Run DBCC CHECKDB and Save the results to a table
						INSERT INTO
							[#checkDBResults]
							(
							[Error],
							[Level],
							[State],
							[MessageText],
							[RepairLevel],
							[Status],
							[DbId],
							[Id],
							[IndId],
							[PartitionId],
							[AllocUnitId],
							[File],
							[Page],
							[Slot],
							[RefFile],
							[RefPage],
							[RefSlot],
							[Allocation]
							)
						EXEC('DBCC CHECKDB(''' + @currentDBName + '_Virtual'') WITH TABLERESULTS')
 
						-- Fill in missing information
						UPDATE
							[#checkDBResults]
						SET
							[ServerName] = @@SERVERNAME,
							[DatabaseName] = @currentDBName
						WHERE
							[ServerName] IS NULL
 
						-- Drop the restored database
						EXEC('DROP DATABASE ['+ @currentDBName + '_Virtual]')
 
						-- analyze all DBCC checkdb results, e-mail out when an error is encountered
						IF EXISTS (
									SELECT
										[ServerName]
									FROM
										[#checkDBResults]
									WHERE
											[ServerName] = @@SERVERNAME
										AND	[DatabaseName] = @currentDBName
										AND [MessageText] LIKE 'CHECKDB found 0 allocation errors and 0 consistency errors in database %'
									)
							BEGIN
								SET	@statusMessage = @statusMessage + 'DBCC FOR ' + @currentDBName + ' Passed.
'
							END	-- Condition: A passing entry for this DB in DBCC Results
						ELSE IF EXISTS (
										SELECT
											[ServerName]
										FROM
											[#checkDBResults]
										WHERE
												[ServerName] = @@SERVERNAME
											AND	[DatabaseName] = @currentDBName
										)
							BEGIN
								SET	@statusMessage = @statusMessage + 'DBCC FOR ' + @currentDBName + ' Failed! (Check the [#checkDBResults] table)
'
								SET @statusCode = @statusCode + 1
							END	-- Condition: No passing entry for this DB in DBCC Results
						ELSE
							BEGIN
								SET	@statusMessage = @statusMessage + 'DBCC FOR ' + @currentDBName + ' was not properly performed (Check Configuration)
'
								SET @statusCode = @statusCode + 1
							END	-- Condition: No entry whatsoever for this DB in DBCC Results
 
					END -- End of "Database was properly restored"
 
				SET @currentDBIdentifier = @currentDBIdentifier + 1
			END -- End of "Check if last backup is too old"
 
	END -- Loop to next Database
 
SET	@statusMessage = @statusMessage + '
DBCC CheckDB Process has completed for ' + @@SERVERNAME + ' at ' + CONVERT(VARCHAR,GETDATE(),120) + '
'
 
IF @statusCode = 0
	BEGIN
		SET @messageSubject = 'SUCCESS - DBCC CheckDB for ' + @@SERVERNAME
	END	-- Condition: There were no errors or failures in the consistency checking of this instance
ELSE
	BEGIN
		SET @messageSubject = 'FAILURE - DBCC CheckDB for ' + @@SERVERNAME
		SET @statusMessage = @statusMessage + @errorMessage
	END	-- Condition: At least one consistency check either failed or resulted in an error
 
EXEC [msdb].[dbo].[sp_send_dbmail]
	@profile_name = @DB_MAIL_PROFILENAME, 
	@recipients = @EMAIL_RECIPIENTS,
	@body = @statusMessage,
	@subject = @messageSubject,
	@body_format = 'HTML';

Monitoring Virtual Log File Density in SQL Server

The SQL Server Transaction Log is comprised of Virtual Log Files (VLFs), which vary in size, depending on how you have grown your Transaction Log over time.  Each transaction that occurs on your SQL Server instance is logged to the Transaction Log prior to being hardened to your data files.  SQL Server will start writing to one VLF and continue to move to the next VLF as it progresses through the Transaction Log.  This is how SQL Server maintains the “C” in ACID; that is “Consistency”.  There is much more to be said with regard to the Transaction Log, but I will not go into the details in this post.

If a transaction is running for a very long time, or if a log backup has not been taken in a long while, or if a VLF cannot be cleared for any other reason (a SQL mirror might be down), the number of VLFs that are available will start to diminish.  If the Transaction Log ever runs out of available VLFs, it will either have to grow the Transaction Log, or your database will have to stop.  Due to the nature of SQL Server, the database cannot remain available to users without available space to record transactions; again, due to maintaining ACIDity.  Neither eventuality is optimal: you do not want your database to become unavailable, nor would you like your Transaction Log to grow (if you can avoid it).  When a Transaction Log grows, it must zero-out the entirety of the added diskspace (even if you have Instant File Initialization configured properly).

To avoid these sorts of scenarios, I monitor the density of my active VLFs, to try to proactively handle any situation that might arise, before it becomes a crisis.  Below, I will provide the script that I use to check each Transaction Log in the instance.

This script runs “DBCC LogInfo” against each database in the instance, records the number of VLFs, how many are active, and how many are available, then calculates the density as a percentage, and displays the results:

DECLARE @databaseList TABLE
(
      [database]        VARCHAR(256),
      [executionOrder]  INT IDENTITY(1,1) NOT NULL
)
 
DECLARE @vlfDensity     TABLE
(
      [server]          VARCHAR(256),
      [database]        VARCHAR(256),
      [density]         DECIMAL(7,2),
      [unusedVLF]       INT,
      [usedVLF]         INT,
      [totalVLF]        INT
)
 
DECLARE @logInfoResult TABLE
(
      [FileId]      INT NULL,
      [FileSize]    BIGINT NULL,
      [StartOffset] BIGINT NULL,
      [FSeqNo]      INT NULL,
      [Status]      INT NULL,
      [Parity]      TINYINT NULL,
      [CreateLSN]   NUMERIC(25, 0) NULL
)
 
DECLARE
    @currentDatabaseID      INT,
    @maxDatabaseID          INT,
    @dbName                 VARCHAR(256),
    @density                DECIMAL(7,2),
    @unusedVLF              INT,
    @usedVLF                INT,
    @totalVLF               INT
 
INSERT INTO
      @databaseList
      (
      [database]
      )
SELECT
      [name]
FROM
      [sys].[sysdatabases]
 
SELECT
      @currentDatabaseID = MIN([executionOrder]),
      @maxDatabaseID = MAX([executionOrder])
FROM
      @databaseList
 
WHILE @currentDatabaseID <= @maxDatabaseID
      BEGIN
 
            SELECT
                  @dbName = [database]
            FROM
                  @databaseList
            WHERE
                  [executionOrder] = @currentDatabaseID
 
            DELETE
                  @logInfoResult
            FROM
                  @logInfoResult
 
            INSERT INTO
                  @logInfoResult
            EXEC('DBCC LOGINFO([' + @dbName + '])')
 
            SELECT
                  @unusedVLF = COUNT(*)
            FROM
                  @logInfoResult
            WHERE
                  [Status] = 0
 
            SELECT
                  @usedVLF = COUNT(*)
            FROM
                  @logInfoResult
            WHERE
                  [Status] = 2
 
            SELECT
                  @totalVLF = COUNT(*)
            FROM
                  @logInfoResult
 
            SELECT
                  @density = CONVERT(DECIMAL(7,2),@usedVLF) / CONVERT(DECIMAL(7,2),@totalVLF) * 100
 
            INSERT INTO
                  @vlfDensity
                  (
                  [server],
                  [database],
                  [density],
                  [unusedVLF],
                  [usedVLF],
                  [totalVLF]
                  )
            VALUES
                  (
                  @@SERVERNAME,
                  @dbName,
                  @density,
                  @unusedVLF,
                  @usedVLF,
                  @totalVLF
                  )
 
            SET @currentDatabaseID = @currentDatabaseID + 1
      END
 
SELECT
    [server],
    [database],
    [density],
    [unusedVLF],
    [usedVLF],
    [totalVLF]
FROM
      @vlfDensity
ORDER BY
      [density] DESC

I sometimes run this script across all of my instances using Red-Gate MultiScript to see how my VLF densities are doing across my entire enterprise. This is particularly useful when our DR site is down for some reason, it helps me keep track of which databases, if any, are in danger of having an impact due to VLFs being unable to clear.

Perform DBCC CheckDB Weekly using Red-Gate SQL Virtual Restore

There are many articles on the Internet explaining why it is important to check the consistency of all of your databases on a regular basis.  If you are not made aware of corruption in your system as soon as possible, it is likely that you could miss a growing problem until you are in a crisis.  However, even though we know consistency checking is important, it can be hard to find a time when you can afford the large performance hit upon your production machines, due to the resource intensity of CheckDB.

So, rather than run CheckDB on my production boxes, I made the choice to take the latest full backup and restore it to a test server, and run CheckDB against that every week.  While this does not avoid the possibility of corruption happening since the last full backup, it does let me know that, at the very least, my last full backup is safe.  I also get the added built-in benefit of testing my restore strategy each week.

However, this poses other problems.  Some of my databases are quite large, and doing a full restore takes a considerable amount of time and disk space.  This is where Red-Gate comes in.

Red-Gate recently added a tool to their arsenal called SQL Virtual Restore.  This tool allows the DBA to use Red-Gate’s HyperBac Filter Driver to allow you to restore a virtual copy of a database using far less space, spending less time, and all utilizing native SQL Backup and Restore syntax.  SQL Virtual Restore references the backups you supply to it, creates custom “indexes” on your local disk (to help Red-Gate transcend your backups quickly), and creates small empty structures for your files (virtual database files).  This allows you to have an unaltered database backup serve your reads and the virtual database files serve your writes.  The savings are truly amazing:

With this tool at hand, I next needed to get my scripts written.

I already have a harness for this sort of maintenance – I have an instance dedicated to DBA tasks.  Running in this instance, are different jobs that perform tasks at different intervals; every 15 minutes, every hour, day, week, so forth.  These jobs fire off SSIS packages which collect information about my instances in my environment, such as which backups ran successfully in the last week, what security changes have occurred, which databases have datafiles which are going to grow soon, which indexes need attention, which jobs are running longer than usual, and so forth; all based off of a list of instances that I update whenever I add or modify a SQL instance to my environment.

So, since I already have all of the information I will need to write my restore and CheckDB scripts, I am all set.  I created a stored procedure that would:

  • Loop through my list of Instances
  • For each instance, loop through each database
    • For each database:
      • Gather the list of the backup files from the latest successful full backup
      • Verify that the most recent backup is not “too old” (comparing against a configurable value)
      • Gather all datafile logical names (as of the time the backup was taken)
      • Copy those backup files locally
      • Create a native syntax restore script, using dynamic SQL, from the list of backup files and specifying a local location for each virtual data file (one VDF for each logical file in the backup)
      • Attempt to execute the dynamic SQL restore script, noting any errors that might occur
      • If the restore was successful, execute DBCC CheckDB against the newly restored database, saving all results to a table
      • Once the DBCC CheckDB has been performed, I check the results in the table to discover if there were any problems found, noting the result
      • I then drop the Virtual Database and the local copy of the backups
    • Once every database in the instance has been checked, the script then e-mails me with the results, each database is listed, along with whether the check was successful, found issues that I need to look into, if the latest backup was too old, or if the restore failed (along with any errors)

Using this method, I am able to consistency check all of my databases every week without ever having to lift a finger.  I look over the e-mails I get from the script, for each Instance, and make sure everything was successful.  If an error ever pops up, I know that I only have to spend my time on the databases that are actually having problems.

This has been a huge time-saver for me and has given me additional peace of mind.

Moving a Filegroup to a New Disk Array with No Downtime

We just bought a new SAN and I am in the midst of migrating our SQL Database Instances to it.  One of the challenges I am facing is that I have several very large tables (billions of rows) and some of them contain LOB data.  So, simply dropping the non-clustered indexes and then recreating the clustered index online is not an option, and, of course, all of the systems have to remain online during the migration.  I certainly cannot get a downtime long enough (hours) to move some of my largest tables.  So, the challenge I’ve been facing, is how to move these tables with no (or at least minimal) downtime, and preferably with very little impact to production performance.

Well, I have one thing really going for me – I already have my filegroups setup exactly the way I want them.  My partitions are stretched across all the right filegroups, my tables are separated off into certain groups according to various logic on each database, and I’m not really looking to change anything like that; at least not with this migration.  So, ultimately, I just need to figure out how to move each filegroup to the new array, without interrupting anything.

So, let’s look at one of my filegroups:  it consists of 16 files, each of size 128GB, for a total size of 2TB.  Like so:

Now, my original idea was to create 16 new files in the same filegroup but on the new array and then empty and remove the old files one by one, until this filegroup was completely on the new SAN.  While, this would technically work, it causes a huge problem that I learned about from one of Paul Randal’s (Blog | Twitter) lessons during the SQLskills Immersion Event 1: Internals and Performance.  Paul also discusses the problem in a SQL Server Pro article from August 2011.  Basically, the problem is that due to the “proportional fill” method of writing to datafiles in a filegroup, you will cause your files to be filled unevenly, resulting in a hot-spot at whichever file(s) have the most freespace at the end of your process.

So, to avoid this in my environment, I decided to first, grow one of the files in my filegroup to a large enough size to hold all of the data in that filegroup.  So, assuming that each of my files in this filegroup are almost full, I will grow File_16 to 2 TB.

Now, my plan is to empty and remove each file in the filegroup, except File_16.  This will cause all of the data to be moved down to this one file.  The actual emptying of each file has the potential to be very intensive, however, I have found minimal effects on my servers when doing this.  This process can take a very long time, but since I have to move these datafiles online, this is the best way I have found in this sort of scenario.  Test this first in a non-production environment.  I would write and issue a script similar to this:

DBCC SHRINKFILE(File_01, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_01]
DBCC SHRINKFILE(File_02, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_02]
DBCC SHRINKFILE(File_03, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_03]
DBCC SHRINKFILE(File_04, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_04]
DBCC SHRINKFILE(File_05, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_05]
DBCC SHRINKFILE(File_06, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_06]
DBCC SHRINKFILE(File_07, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_07]
DBCC SHRINKFILE(File_08, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_08]
DBCC SHRINKFILE(File_09, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_09]
DBCC SHRINKFILE(File_10, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_10]
DBCC SHRINKFILE(File_11, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_11]
DBCC SHRINKFILE(File_12, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_12]
DBCC SHRINKFILE(File_13, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_13]
DBCC SHRINKFILE(File_14, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_14]
DBCC SHRINKFILE(File_15, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_15]

At this point, I add the 16 new files onto the new SAN, and we end up with one large file on the old SAN and 16 new files on the new SAN:

Now, all there is left to do is issue one more empty command to stripe that 2TB of data amongst the 16 new files.

DBCC SHRINKFILE(File_16, EMPTYFILE)
ALTER DATABASE [DatabaseName]  REMOVE FILE [File_16]

After this command has finished, the data will be nearly perfectly striped across all 16 files, on the new SAN.  And this was accomplished with no downtime and, from my experience, very little impact to production performance.  Your particular environment may see more performance impact, so please test in a non-production environment before trying this with a live system.