中国作训警服图片:把WORD转化为HTML 去除垃圾代码

来源:百度文库 编辑:查人人中国名人网 时间:2024/04/27 23:35:13
把WORD转化为HTML时,会生成不少垃圾代码,怎么样又省时又省力的彻底清楚HTML里的垃圾代码

// 清除WORD冗余格式并粘贴
function cleanAndPaste( html ) {
// Remove all SPAN tags
html = html.replace(/<\/?SPAN[^>]*>/gi, "" );
// Remove Class attributes
html = html.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3") ;
// Remove Style attributes
html = html.replace(/<(\w[^>]*) style="([^"]*)"([^>]*)/gi, "<$1$3") ;
// Remove Lang attributes
html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3") ;
// Remove XML elements and declarations
html = html.replace(/<\\?\?xml[^>]*>/gi, "") ;
// Remove Tags with XML namespace declarations: <o:p></o:p>
html = html.replace(/<\/?\w+:[^>]*>/gi, "") ;
// Replace the  
html = html.replace(/ /, " " );
// Transform <P> to <DIV>
var re = new RegExp("(<P)([^>]*>.*?)(<\/P>)","gi") ; // Different because of a IE 5.0 error
html = html.replace( re, "<div$2</div>" ) ;

insertHTML( html ) ;
}