源码网商城,靠谱的源码在线交易网站 我的订单 购物车 帮助

源码网商城

python解析xml模块封装代码

  • 时间:2022-11-18 13:27 编辑: 来源: 阅读:
  • 扫一扫,手机访问
摘要:python解析xml模块封装代码
有如下的xml文件:
[u]复制代码[/u] 代码如下:
<?xml version="1.0" encoding="utf-8" ?>  <root>  <childs>  <child name='first' >1</child>  <child value="2">2</child>  </childs>  </root>
下面介绍python解析xml文件的几种方法,使用python模块实现。 方式1,python模块实现自动遍历所有节点:
[u]复制代码[/u] 代码如下:
#!/usr/bin/env python  # -*- coding: utf-8 -*-  from xml.sax.handler import ContentHandler  from xml.sax import parse class TestHandle(ContentHandler):      def __init__(self, inlist):          self.inlist = inlist      def startElement(self,name,attrs):          print 'name:',name, 'attrs:',attrs.keys()      def endElement(self,name):          print 'endname',name      def characters(self,chars):          print 'chars',chars          self.inlist.append(chars)                if __name__ == '__main__':      lt = []      parse('test.xml', TestHandle(lt))      print lt
结果: [html] view plaincopy name: root attrs: []  chars   name: childs attrs: []  chars   name: child attrs: [u'name']  chars 1  endname child  chars   name: child attrs: [u'value']  chars 2  endname child  chars   endname childs  chars   endname root  [u'n', u'n', u'1', u'n', u'2', u'n', u'n'] 方式2,python模块实现获取根节点,按需查找指定节点:
[u]复制代码[/u] 代码如下:
#!/usr/bin/env python    # -*- coding: utf-8 -*-    from xml.dom import minidom    xmlstr = '''''<?xml version="1.0" encoding="UTF-8"?> <hash>     <request name='first'>/2/photos/square/type.xml</request>     <error_code>21301</error_code>     <error>auth faild!</error> </hash> '''  def doxml(xmlstr):      dom = minidom.parseString(xmlstr)          print 'Dom:'          print dom.toxml()        root = dom.firstChild          print 'root:'          print root.toxml()        childs = root.childNodes        for child in childs:          print child.toxml()          if child.nodeType == child.TEXT_NODE:              pass          else:              print 'child node attribute name:', child.getAttribute('name')              print 'child node name:', child.nodeName              print 'child node len:',len(child.childNodes)              print 'child data:',child.childNodes[0].data              print '======================================='              print 'more help info to see:'              for med in dir(child):                  print help(med)                      if __name__ == '__main__':        doxml(xmlstr)
结果: [html] view plaincopy Dom:  <?xml version="1.0" ?><hash>      <request name="first">/2/photos/square/type.xml</request>      <error_code>21301</error_code>      <error>auth faild!</error>  </hash>  root:  <hash>      <request name="first">/2/photos/square/type.xml</request>      <error_code>21301</error_code>      <error>auth faild!</error>  </hash>  <request name="first">/2/photos/square/type.xml</request>  child node attribute name: first  child node name: request  child node len: 1  child data: /2/photos/square/type.xml  =======================================  more help info to see:  两种方法各有其优点,python的xml处理模块太多,目前只用到这2个。 =====补充分割线================ 实际工作中发现python的mimidom无法解析其它编码的xml,只能解析utf-8的编码,而其xml文件的头部申明也必须是utf-8,为其它编码会报错误。 网上的解决办法都是替换xml文件头部的编码申明,然后转换编码为utf-8再用minidom解码,实际测试为可行,不过有点累赘的感觉。 本节是 python解析xml模块封装代码 的第二部分。 ====写xml内容的分割线=========
[u]复制代码[/u] 代码如下:
#!ursbinenv python  #encoding: utf-8  from xml.dom import minidom  class xmlwrite:      def __init__(self, resultfile):          self.resultfile = resultfile          self.rootname = 'api'          self.__create_xml_dom()      def __create_xml_dom(self):          xmlimpl = minidom.getDOMImplementation()          self.dom = xmlimpl.createDocument(None, self.rootname, None)          self.root = self.dom.documentElement      def __get_spec_node(self, xpath):          patharr = xpath.split(r'/')          parentnode = self.root          exist = 1          for nodename in patharr:              if nodename.strip() == '':                  continue              if not exist:                  return None              spcindex = nodename.find('[')              if spcindex > -1:                  index = int(nodename[spcindex+1:-1])              else:                  index = 0              count = 0              childs = parentnode.childNodes              for child in childs:                  if child.nodeName == nodename[:spcindex]:                      if count == index:                          parentnode = child                          exist = 1                          break                      count += 1                      continue                  else:                      exist = 0          return parentnode                def write_node(self, parent, nodename, value, attribute=None, CDATA=False):          node = self.dom.createElement(nodename)          if value:              if CDATA:                  nodedata = self.dom.createCDATASection(value)              else:                  nodedata = self.dom.createTextNode(value)              node.appendChild(nodedata)              if attribute and isinstance(attribute, dict):                  for key, value in attribute.items():                      node.setAttribute(key, value)             try:              parentnode = self.__get_spec_node(parent)          except:              print 'Get parent Node Fail, Use the Root as parent Node'              parentnode = self.root          parentnode.appendChild(node)            def write_start_time(self, time):          self.write_node('/','StartTime', time)      def write_end_time(self, time):          self.write_node('/','EndTime', time)          def write_pass_count(self, count):          self.write_node('/','PassCount', count)         def write_fail_count(self, count):          self.write_node('/','FailCount', count)         def write_case(self):          self.write_node('/','Case', None)         def write_case_no(self, index, value):          self.write_node('/Case[%s]/' % index,'No', value)      def write_case_url(self, index, value):          self.write_node('/Case[%s]/' % index,'URL', value)      def write_case_dbdata(self, index, value):          self.write_node('/Case[%s]/' % index,'DBData', value)      def write_case_apidata(self, index, value):          self.write_node('/Case[%s]/' % index,'APIData', value)      def write_case_dbsql(self, index, value):          self.write_node('/Case[%s]/' % index,'DBSQL', value, CDATA=True)      def write_case_apixpath(self, index, value):          self.write_node('/Case[%s]/' % index,'APIXPath', value)             def save_xml(self):          myfile = file(self.resultfile, 'w')          self.dom.writexml(myfile, encoding='utf-8')          myfile.close()  if __name__ == '__main__':        xr = xmlwrite(r'D:test.xml')        xr.write_start_time('2223')        xr.write_end_time('444')              xr.write_pass_count('22')        xr.write_fail_count('33')          xr.write_case()        xr.write_case()        xr.write_case_no(0, '0')        xr.write_case_url(0, 'http://www.google.com')           xr.write_case_url(0, 'http://www.google.com')           xr.write_case_dbsql(0, 'select * from ')        xr.write_case_dbdata(0, 'dbtata')        xr.write_case_apixpath(0, '/xpath')        xr.write_case_apidata(0, 'apidata')        xr.write_case_no(1, '1')               xr.write_case_url(1, 'http://www.baidu.com')           xr.write_case_url(1, 'http://www.baidu.com')           xr.write_case_dbsql(1, 'select 1 from ')        xr.write_case_dbdata(1, 'dbtata1')        xr.write_case_apixpath(1, '/xpath1')        xr.write_case_apidata(1, 'apidata1')        xr.save_xml()
以上封装了minidom,支持通过xpath来写节点,不支持xpath带属性的匹配,但支持带索引的匹配。 比如:/root/child[1], 表示root的第2个child节点。
  • 全部评论(0)
联系客服
客服电话:
400-000-3129
微信版

扫一扫进微信版
返回顶部