I know we can use Reg Exp to remove all HTML tags for a string. With the same token I wonder if there’s a similar elegant way to automatically detect and remove all the javascript code of a given string.
Thanks.
Below is CFSCRIPT function I found somewhere that will clean a string of HTML, JavaScript and more.
function tagStripper(str) {
str = REReplaceNoCase(str, "<script.*?</*.script*.>|<applet.*?</*.applet*.>|<embed.*?</*.embed*.>|<ilayer.*?</*.ilayer*.>|<frame.*?</*.frame*.>|<object.*?</*.object*.>|<iframe.*?</*.iframe*.>|<style.*?</*.style*.>|<meta([^>]*[^/])>|<link([^>]*[^/])>|<script([^>]*[^/])>", "", "all") ;
str = REReplaceNoCase(str, "</?(script|applet|embed|ilayer|frame|iframe|frameset|style|link)[^>]*>", "", "all") ;
str = REReplaceNoCase(str, "<[^>]*>", "", "all") ;
str = ReReplaceNoCase(str, "<(/?'.*?)>", " ", "all") ;
str = ReReplaceNoCase(str, "</{0,1}[A-Z].*?>", " ", "all") ;
str = ReplaceNoCase(str, "<!DOCTYPE html>", "", "all") ;
str = ReReplaceNoCase(str, "<!--(.*?)-->", " ", "all") ;
str = Replace(str, Chr(9), " ", "all") ;
str = ReReplace(str, "[[:blank:]]{2,}", " ", "all") ;
str = ReReplace(str, "[\r\n|\n\r|\n|\r]{2,}", Chr(13) & Chr(10), "all") ;
str = Replace(Replace(Replace(Replace(Replace(Replace(Replace(str, "<" , "«", "all"), ">", "»", "all"), Chr(34), "”", "all"), "'", "’", "all"), Chr(160), " ", "all"), " ", " ", "all"), "&##39;", "’", "all") ;
return str;
}
4 Likes
Many thanks. Does it retain new line breaks as well?