November 21, 2010

Strip/Remove HTML from text in SQL Server

I used the below function, because HTML contents are not fully supported in crystal report.

Ref:
http://blog.sqlauthority.com/2007/06/16/sql-server-udf-user-defined-function-to-strip-html-parse-html-no-regular-expression/

http://lazycoders.blogspot.com/2007/06/stripping-html-from-text-in-sql-server.html


CREATE FUNCTION [dbo].[StripHTML]

(
@HTMLText varchar(MAX)
)
RETURNS varchar(MAX)
AS
BEGIN
DECLARE @Start int
DECLARE @End int
DECLARE @Length int
-- Replace the HTML entity & with the '&' character (this needs to be done first, as
-- '&' might be double encoded as '&')
SET @Start = CHARINDEX('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '&')
SET @Start = CHARINDEX('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity < with the '<' character
SET @Start = CHARINDEX('<', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '<')
SET @Start = CHARINDEX('<', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity > with the '>' character
SET @Start = CHARINDEX('>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '>')
SET @Start = CHARINDEX('>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END


-- Replace the HTML entity & with the '&' character
SET @Start = CHARINDEX('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '&')
SET @Start = CHARINDEX('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END


-- Replace the HTML entity   with the ' ' character
SET @Start = CHARINDEX(' ', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, ' ')
SET @Start = CHARINDEX(' ', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
END

-- Replace any
tags with a newline

SET @Start = CHARINDEX('
', @HTMLText)

SET @End = @Start + 3
SET @Length = (@End - @Start) + 1


WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, CHAR(13) + CHAR(10))
SET @Start = CHARINDEX('
', @HTMLText)



SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Replace any
tags with a newline

SET @Start = CHARINDEX('
', @HTMLText)

SET @End = @Start + 4
SET @Length = (@End - @Start) + 1


WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, 'CHAR(13) + CHAR(10)')
SET @Start = CHARINDEX('
', @HTMLText)

SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Replace any
tags with a newline

SET @Start = CHARINDEX('
', @HTMLText)

SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, 'CHAR(13) + CHAR(10)')
SET @Start = CHARINDEX('
', @HTMLText)

SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
END
-- Remove anything between tags
SET @Start = CHARINDEX('<', @HTMLText)
SET @End = CHARINDEX('>', @HTMLText, CHARINDEX('<', @HTMLText))
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0) BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '')
SET @Start = CHARINDEX('<', @HTMLText)
SET @End = CHARINDEX('>', @HTMLText, CHARINDEX('<', @HTMLText))
SET @Length = (@End - @Start) + 1
END
WHILE CHARINDEX(' ', @HTMLText) > 0
SET @HTMLText = REPLACE(@HTMLText, ' ', ' ')


RETURN LTRIM(RTRIM(@HTMLText))


END

No comments:

Post a Comment