清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
一个理想的去除html包括空格css样式、js 的PHP函数。
$descclear = str_replace("\r","",$descclear);//过滤换行 $descclear = str_replace("\n","",$descclear);//过滤换行 $descclear = str_replace("\t","",$descclear);//过滤换行 $descclear = str_replace("\r\n","",$descclear);//过滤换行 $descclear = preg_replace("/\s+/", " ", $descclear);//过滤多余回车 $descclear = preg_replace("/<[ ]+/si","<",$descclear); //过滤<__("<"号后面带空格) $descclear = preg_replace("/<\!--.*?-->/si","",$descclear); //过滤html注释 $descclear = preg_replace("/<(\!.*?)>/si","",$descclear); //过滤DOCTYPE $descclear = preg_replace("/<(\/?html.*?)>/si","",$descclear); //过滤html标签 $descclear = preg_replace("/<(\/?head.*?)>/si","",$descclear); //过滤head标签 $descclear = preg_replace("/<(\/?meta.*?)>/si","",$descclear); //过滤meta标签 $descclear = preg_replace("/<(\/?body.*?)>/si","",$descclear); //过滤body标签 $descclear = preg_replace("/<(\/?link.*?)>/si","",$descclear); //过滤link标签 $descclear = preg_replace("/<(\/?form.*?)>/si","",$descclear); //过滤form标签 $descclear = preg_replace("/cookie/si","COOKIE",$descclear); //过滤COOKIE标签 $descclear = preg_replace("/<(applet.*?)>(.*?)<(\/applet.*?)>/si","",$descclear); //过滤applet标签 $descclear = preg_replace("/<(\/?applet.*?)>/si","",$descclear); //过滤applet标签 $descclear = preg_replace("/<(style.*?)>(.*?)<(\/style.*?)>/si","",$descclear); //过滤style标签 $descclear = preg_replace("/<(\/?style.*?)>/si","",$descclear); //过滤style标签 $descclear = preg_replace("/<(title.*?)>(.*?)<(\/title.*?)>/si","",$descclear); //过滤title标签 $descclear = preg_replace("/<(\/?title.*?)>/si","",$descclear); //过滤title标签 $descclear = preg_replace("/<(object.*?)>(.*?)<(\/object.*?)>/si","",$descclear); //过滤object标签 $descclear = preg_replace("/<(\/?objec.*?)>/si","",$descclear); //过滤object标签 $descclear = preg_replace("/<(noframes.*?)>(.*?)<(\/noframes.*?)>/si","",$descclear); //过滤noframes标签 $descclear = preg_replace("/<(\/?noframes.*?)>/si","",$descclear); //过滤noframes标签 $descclear = preg_replace("/<(i?frame.*?)>(.*?)<(\/i?frame.*?)>/si","",$descclear); //过滤frame标签 $descclear = preg_replace("/<(\/?i?frame.*?)>/si","",$descclear); //过滤frame标签 $descclear = preg_replace("/<(script.*?)>(.*?)<(\/script.*?)>/si","",$descclear); //过滤script标签 $descclear = preg_replace("/<(\/?script.*?)>/si","",$descclear); //过滤script标签 $descclear = preg_replace("/javascript/si","Javascript",$descclear); //过滤script标签 $descclear = preg_replace("/vbscript/si","Vbscript",$descclear); //过滤script标签 $descclear = preg_replace("/on([a-z]+)\s*=/si","On\\1=",$descclear); //过滤script标签 $descclear = preg_replace("/&#/si","&#",$descclear); //过滤script标签,如javAsCript:alert(); //使用正则替换 $pat = "/<(\/?)(script|i?frame|style|html|body|li|i|map|title|img|link|span|u|font|table|tr|b|marquee|td|strong|div|a|meta|\?|\%)([^>]*?)>/isU"; $descclear = preg_replace($pat,"",$descclear);