XML解析

Categories: Python
XML # document.xml为docx文件中的 from xml.etree.ElementTree import parse from xml.etree.ElementTree import XMLParser f = open('document.xml') doc = parse(f, XMLParser(encoding="utf-8")) # 1 t_elems = doc.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}t') # 2 ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} t_elems = doc.findall('.//w:t', ns) # 3 t_elems = doc.iter('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}t') for elem in t_elems: print(elem.text) LXML from lxml import etree f = open('document.xml') doc_lxml = etree.parse(f, etree.XMLParser(encoding="utf-8")) # 1 p_lxml = doc_lxml.iter('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}t') # 2 ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} find_results = doc_lxml.findall('//w:t', ns) # Namespace prefix->URI mapping known in the context of this Element.

Read More →