21
Dec

Remove Html Tages form string

   Posted by: admin   in .Net Emperor

Hello Friends,

Here is the code which will remove html tags form the string in vb .net.

Please check

Code 1 :

Dim str As String = "put html string here"
Str = Regex.Replace(Str, "\<[^\>]*\>", String.Empty, RegexOptions.IgnoreCase)
Str = Regex.Replace(Str, "[ \t\r\n]", " ")

Code 2 :

Public Shared Function RemoveHTML(ByVal strText As String) As String
Dim TAGLIST As String
TAGLIST = ";!--;!DOCTYPE;A;ACRONYM;ADDRESS;APPLET;AREA;B;BASE;BASEFONT;" _
& " BGSOUND;BIG;BLOCKQUOTE;BODY;BR;BUTTON;CAPTION;CENTER;CITE;CODE;" _
& "COL;COLGROUP;COMMENT;DD;DEL;DFN;DIR;DIV;DL;DT;EM;EMBED;FIELDSET;" _
& "FONT;FORM;FRAME;FRAMESET;HEAD;H1;H2;H3;H4;H5;H6;HR;HTML;I;IFRAME;IMG;" _
& "INPUT;INS;ISINDEX;KBD;LABEL;LAYER;LAGEND;LI;LINK;LISTING;MAP;MARQUEE;" _
& "MENU;META;NOBR;NOFRAMES;NOSCRIPT;OBJECT;OL;OPTION;P;PARAM;PLAINTEXT;" _
& "PRE;Q;S;SAMP;SCRIPT;SELECT;SMALL;SPAN;STRIKE;STRONG;STYLE;SUB;SUP;" _
& "TABLE;TBODY;TD;TEXTAREA;TFOOT;TH;THEAD;TITLE;TR;TT;U;UL;VAR;WBR;XMP;"

Const BLOCKTAGLIST As String = ";APPLET;EMBED;FRAMESET;HEAD;NOFRAMES;NOSCRIPT;OBJECT;SCRIPT;STYLE;"
Dim nPos1 As Integer
Dim nPos2 As Integer
Dim nPos3 As Integer
Dim strResult As String
Dim strTagName As String
Dim bRemove As Boolean
Dim bSearchForBlock As Boolean

nPos1 = InStr(strText, "<")
Do While nPos1 > 0
nPos2 = InStr(nPos1 + 1, strText, ">")
If nPos2 > 0 Then
strTagName = Mid(strText, nPos1 + 1, nPos2 - nPos1 - 1)
strTagName = Replace(Replace(strTagName, vbCr, " "), vbLf, " ")

nPos3 = InStr(strTagName, " ")
If nPos3 > 0 Then
strTagName = Left(strTagName, nPos3 - 1)
End If

If Left(strTagName, 1) = "/" Then
strTagName = Mid(strTagName, 2)
bSearchForBlock = False
Else
bSearchForBlock = True
End If

If InStr(1, TAGLIST, ";" & strTagName & ";", vbTextCompare) > 0 Then
bRemove = True
If bSearchForBlock Then
If InStr(1, BLOCKTAGLIST, ";" & strTagName & ";", vbTextCompare) > 0 Then
nPos2 = Len(strText)
nPos3 = InStr(nPos1 + 1, strText, " If nPos3 > 0 Then
nPos3 = InStr(nPos3 + 1, strText, ">")
End If

If nPos3 > 0 Then
nPos2 = nPos3
End If
End If
End If
Else
bRemove = False
End If

If bRemove Then
strResult = strResult & Left(strText, nPos1 - 1)
strText = Mid(strText, nPos2 + 1)
Else
strResult = strResult & Left(strText, nPos1)
strText = Mid(strText, nPos1 + 1)
End If
Else
strResult = strResult & strText
strText = ""
End If

nPos1 = InStr(strText, "<")
Loop
strResult = strResult & strText

RemoveHTML = strResult
End Function

Please check and if it will work for you please comment on site.

Tags: , ,

This entry was posted on Monday, December 21st, 2009 at 5:41 am and is filed under .Net Emperor. You can follow any responses to this entry through the RSS 2.0 feed. You can leave a response, or trackback from your own site.

Leave a reply

Name (*)
Mail (will not be published) (*)
URI
Comment