from lxml import etree html = etree.HTML(resHtml, parser=etree.HTMLParser(encoding='utf-8')) # 处理源文件的时候,由于没有指定编码,所以它使用了一个默认编码,从而导致和UTF-8冲突,产生乱码 # http://lxml.de/api/index.html See also XML解析 dbf读写 Python3自定义排序 Decode HTML entities undefined symbol PyUnicodeUCS2_FromUnicode