import
xmllib, string
classQuotationParser
(xmllib.XMLParser):
"""Crude xmllib extractor for quotations.dtd document"""
def__init__
(self):
xmllib.XMLParser.__init__(self)
self.thisquote = ''
# quotation accumulator
defhandle_data
(self, data):
self.thisquote = self.thisquote + data
defsyntax_error
(self, message):
pass
defstart_quotations
(self, attrs):
# top level tag
print
'--- Begin Document ---'
defstart_quotation
(self, attrs):
print
'QUOTATION:'
defend_quotation
(self):
print
string.join(string.split(self.thisquote[:230]))+'...',
print
'('+str(len(self.thisquote))+' bytes)n'
self.thisquote = ''
defunknown_starttag
(self, tag, attrs):
self.thisquote = self.thisquote + '{'
defunknown_endtag
(self, tag):
self.thisquote = self.thisquote + '}'
defunknown_charref
(self, ref):
self.thisquote = self.thisquote + '?'
defunknown_entityref
(self, ref):
self.thisquote = self.thisquote + '#'
if
__name__ == '__main__':
parser = QuotationParser()
for
c
in
open("sample.xml").read():
parser.feed(c)
parser.close()
import
xml.sax
parser = xml.sax.make_parser()
"Simple SAX example, updated for Python 2.0+"
import
string
import
xml.sax
from
xml.sax.handler
import
*
classQuotationHandler
(ContentHandler):
"""Crude extractor for quotations.dtd compliant XML document"""
def__init__
(self):
self.in_quote = 0
self.thisquote = ''
defstartDocument
(self):
print
'--- Begin Document ---'
defstartElement
(self, name, attrs):
if
name == 'quotation':
print
'QUOTATION:'
self.in_quote = 1
else:
self.thisquote = self.thisquote + '{'
defendElement
(self, name):
if
name == 'quotation':
print
string.join(string.split(self.thisquote[:230]))+'...',
print
'('+str(len(self.thisquote))+' bytes)n'
self.thisquote = ''
self.in_quote = 0
else:
self.thisquote = self.thisquote + '}'
defcharacters
(self, ch):
if
self.in_quote:
self.thisquote = self.thisquote + ch
if
__name__ == '__main__':
parser = xml.sax.make_parser()
handler = QuotationHandler()
parser.setContentHandler(handler)
parser.parse("sample.xml")
from
xml.dom.minidom
import
parse, parseString
dom1 = parse('mydata.xml')
# parse an XML file by name
for
node
in
dom_node.childNodes:
if
node.nodeName == '#text':
# PCDATA is a kind of node,
PCDATA = node.nodeValue
# but not a new subtag
elif
node.nodeName == 'spam':
spam_node_list.append(node)
# Create list of <spam> nodes
机械节能产品生产企业官网模板...
大气智能家居家具装修装饰类企业通用网站模板...
礼品公司网站模板
宽屏简约大气婚纱摄影影楼模板...
蓝白WAP手机综合医院类整站源码(独立后台)...苏ICP备2024110244号-2 苏公网安备32050702011978号 增值电信业务经营许可证编号:苏B2-20251499 | Copyright 2018 - 2025 源码网商城 (www.ymwmall.com) 版权所有