1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 """Simplexml module provides xmpppy library with all needed tools to handle XML nodes and XML streams.
18 I'm personally using it in many other separate projects. It is designed to be as standalone as possible."""
19
20 import xml.parsers.expat
21
23 """Returns provided string with symbols & < > " replaced by their respective XML entities."""
24 return txt.replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """)
25
26 ENCODING='utf-8'
28 """Converts object "what" to unicode string using it's own __str__ method if accessible or unicode method otherwise."""
29 if type(what) == type(u''): return what
30 try: r=what.__str__()
31 except AttributeError: r=str(what)
32 if type(r)<>type(u''): return unicode(r,ENCODING)
33 return r
34
36 """ Node class describes syntax of separate XML Node. It have a constructor that permits node creation
37 from set of "namespace name", attributes and payload of text strings and other nodes.
38 It does not natively support building node from text string and uses NodeBuilder class for that purpose.
39 After creation node can be mangled in many ways so it can be completely changed.
40 Also node can be serialised into string in one of two modes: default (where the textual representation
41 of node describes it exactly) and "fancy" - with whitespace added to make indentation and thus make
42 result more readable by human.
43
44 Node class have attribute FORCE_NODE_RECREATION that is defaults to False thus enabling fast node
45 replication from the some other node. The drawback of the fast way is that new node shares some
46 info with the "original" node that is changing the one node may influence the other. Though it is
47 rarely needed (in xmpppy it is never needed at all since I'm usually never using original node after
48 replication (and using replication only to move upwards on the classes tree).
49 """
50 FORCE_NODE_RECREATION=0
51 - def __init__(self, tag=None, attrs={}, payload=[], parent=None, node=None):
52 """ Takes "tag" argument as the name of node (prepended by namespace, if needed and separated from it
53 by a space), attrs dictionary as the set of arguments, payload list as the set of textual strings
54 and child nodes that this node carries within itself and "parent" argument that is another node
55 that this one will be the child of. Also the __init__ can be provided with "node" argument that is
56 either a text string containing exactly one node or another Node instance to begin with. If both
57 "node" and other arguments is provided then the node initially created as replica of "node"
58 provided and then modified to be compliant with other arguments."""
59 if node:
60 if self.FORCE_NODE_RECREATION and type(node)==type(self): node=str(node)
61 if type(node)<>type(self): node=NodeBuilder(node,self)
62 else:
63 self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = node.name,node.namespace,{},[],[],node.parent
64 for key in node.attrs.keys(): self.attrs[key]=node.attrs[key]
65 for data in node.data: self.data.append(data)
66 for kid in node.kids: self.kids.append(kid)
67 else: self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = 'tag','',{},[],[],None
68
69 if tag: self.namespace, self.name = ([self.namespace]+tag.split())[-2:]
70 if parent: self.parent = parent
71 if self.parent and not self.namespace: self.namespace=self.parent.namespace
72 for attr in attrs.keys():
73 self.attrs[attr]=attrs[attr]
74 if type(payload) in (type(''),type(u'')): payload=[payload]
75 for i in payload:
76 if type(i)==type(self): self.addChild(node=i)
77 else: self.addData(i)
78
80 """ Method used to dump node into textual representation.
81 if "fancy" argument is set to True produces indented output for readability."""
82 s = (fancy-1) * 2 * ' ' + "<" + self.name
83 if self.namespace:
84 if not self.parent or self.parent.namespace!=self.namespace:
85 s = s + ' xmlns="%s"'%self.namespace
86 for key in self.attrs.keys():
87 val = ustr(self.attrs[key])
88 s = s + ' %s="%s"' % ( key, XMLescape(val) )
89 s = s + ">"
90 cnt = 0
91 if self.kids:
92 if fancy: s = s + "\n"
93 for a in self.kids:
94 if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt])
95 elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip())
96 if a: s = s + a.__str__(fancy and fancy+1)
97 cnt=cnt+1
98 if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt])
99 elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip())
100 if not self.kids and s[-1:]=='>':
101 s=s[:-1]+' />'
102 if fancy: s = s + "\n"
103 else:
104 if fancy and not self.data: s = s + (fancy-1) * 2 * ' '
105 s = s + "</" + self.name + ">"
106 if fancy: s = s + "\n"
107 return s
109 """ Serialise node, dropping all tags and leaving CDATA intact.
110 That is effectively kills all formatiing, leaving only text were contained in XML.
111 """
112 s = ""
113 cnt = 0
114 if self.kids:
115 for a in self.kids:
116 s=s+self.data[cnt]
117 if a: s = s + a.getCDATA()
118 cnt=cnt+1
119 if (len(self.data)-1) >= cnt: s = s + self.data[cnt]
120 return s
121 - def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None):
122 """ If "node" argument is provided, adds it as child node. Else creates new node from
123 the other arguments' values and adds it as well."""
124 if attrs.has_key('xmlns'):
125 raise AttributeError("Use namespace=x instead of attrs={'xmlns':x}")
126 if namespace: name=namespace+' '+name
127 if node:
128 newnode=node
129 node.parent = self
130 else: newnode=Node(tag=name, parent=self, attrs=attrs, payload=payload)
131 self.kids.append(newnode)
132 self.data.append(u'')
133 return newnode
135 """ Adds some CDATA to node. """
136 self.data.append(ustr(data))
137 self.kids.append(None)
139 """ Removes all CDATA from the node. """
140 self.data=[]
142 """ Deletes an attribute "key" """
143 del self.attrs[key]
145 """ Deletes the "node" from the node's childs list, if "node" is an instance.
146 Else deletes the first node that have specified name and (optionally) attributes. """
147 if type(node)<>type(self): node=self.getTag(node,attrs)
148 self.kids[self.kids.index(node)]=None
149 return node
151 """ Returns all node's attributes as dictionary. """
152 return self.attrs
154 """ Returns value of specified attribute. """
155 try: return self.attrs[key]
156 except: return None
158 """ Returns all node's child nodes as list. """
159 return self.kids
161 """ Returns all node CDATA as string (concatenated). """
162 return ''.join(self.data)
164 """ Returns the name of node """
165 return self.name
167 """ Returns the namespace of node """
168 return self.namespace
170 """ Returns the parent of node (if present). """
171 return self.parent
173 """ Return the payload of node i.e. list of child nodes and CDATA entries.
174 F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned list:
175 ['text1', <nodea instance>, <nodeb instance>, ' text2']. """
176 ret=[]
177 for i in range(max(len(self.data),len(self.kids))):
178 if i < len(self.data) and self.data[i]: ret.append(self.data[i])
179 if i < len(self.kids) and self.kids[i]: ret.append(self.kids[i])
180 return ret
181 - def getTag(self, name, attrs={}, namespace=None):
182 """ Filters all child nodes using specified arguments as filter.
183 Returns the first found or None if not found. """
184 return self.getTags(name, attrs, namespace, one=1)
186 """ Returns attribute value of the child with specified name (or None if no such attribute)."""
187 try: return self.getTag(tag).attrs[attr]
188 except: return None
190 """ Returns cocatenated CDATA of the child with specified name."""
191 try: return self.getTag(tag).getData()
192 except: return None
207 """ Sets attribute "key" with the value "val". """
208 self.attrs[key]=val
210 """ Sets node's CDATA to provided string. Resets all previous CDATA!"""
211 self.data=[ustr(data)]
213 """ Changes the node name. """
214 self.name = val
216 """ Changes the node namespace. """
217 self.namespace=namespace
219 """ Sets node's parent to "node". WARNING: do not checks if the parent already present
220 and not removes the node from the list of childs of previous parent. """
221 self.parent = node
223 """ Sets node payload according to the list specified. WARNING: completely replaces all node's
224 previous content. If you wish just to add child or CDATA - use addData or addChild methods. """
225 if type(payload) in (type(''),type(u'')): payload=[payload]
226 if add: self.kids+=payload
227 else: self.kids=payload
228 - def setTag(self, name, attrs={}, namespace=None):
229 """ Same as getTag but if the node with specified namespace/attributes not found, creates such
230 node and returns it. """
231 node=self.getTags(name, attrs, namespace=namespace, one=1)
232 if node: return node
233 else: return self.addChild(name, attrs, namespace=namespace)
235 """ Creates new node (if not already present) with name "tag"
236 and sets it's attribute "attr" to value "val". """
237 try: self.getTag(tag).attrs[attr]=val
238 except: self.addChild(tag,attrs={attr:val})
240 """ Creates new node (if not already present) with name "tag" and (optionally) attributes "attrs"
241 and sets it's CDATA to string "val". """
242 try: self.getTag(tag,attrs).setData(ustr(val))
243 except: self.addChild(tag,attrs,payload=[ustr(val)])
245 """ Checks if node have attribute "key"."""
246 return self.attrs.has_key(key)
248 """ Returns node's attribute "item" value. """
249 return self.getAttr(item)
251 """ Sets node's attribute "item" value. """
252 return self.setAttr(item,val)
254 """ Deletes node's attribute "item". """
255 return self.delAttr(item)
257 """ Reduce memory usage caused by T/NT classes - use memory only when needed. """
258 if attr=='T':
259 self.T=T(self)
260 return self.T
261 if attr=='NT':
262 self.NT=NT(self)
263 return self.NT
264 raise AttributeError
265
267 """ Auxiliary class used to quick access to node's child nodes. """
268 - def __init__(self,node): self.__dict__['node']=node
274
276 """ Auxiliary class used to quick create node's child nodes. """
279 if isinstance(val,Node): self.node.addChild(attr,node=val)
280 else: return self.node.addChild(attr,payload=[val])
281
282 DBG_NODEBUILDER = 'nodebuilder'
284 """ Builds a Node class minidom from data parsed to it. This class used for two purposes:
285 1. Creation an XML Node from a textual representation. F.e. reading a config file. See an XML2Node method.
286 2. Handling an incoming XML stream. This is done by mangling
287 the __dispatch_depth parameter and redefining the dispatch method.
288 You do not need to use this class directly if you do not designing your own XML handler."""
289 - def __init__(self,data=None,initial_node=None):
290 """ Takes two optional parameters: "data" and "initial_node".
291 By default class initialised with empty Node class instance.
292 Though, if "initial_node" is provided it used as "starting point".
293 You can think about it as of "node upgrade".
294 "data" (if provided) feeded to parser immidiatedly after instance init.
295 """
296 self.DEBUG(DBG_NODEBUILDER, "Preparing to handle incoming XML stream.", 'start')
297 self._parser = xml.parsers.expat.ParserCreate(namespace_separator=' ')
298 self._parser.StartElementHandler = self.starttag
299 self._parser.EndElementHandler = self.endtag
300 self._parser.CharacterDataHandler = self.handle_data
301 self._parser.StartNamespaceDeclHandler = self.handle_namespace_start
302 self.Parse = self._parser.Parse
303
304 self.__depth = 0
305 self._dispatch_depth = 1
306 self._document_attrs = None
307 self._mini_dom=initial_node
308 self.last_is_data = 1
309 self._ptr=None
310 self.namespaces={"http://www.w3.org/XML/1998/namespace":'xml:'}
311 self.xmlns="http://www.w3.org/XML/1998/namespace"
312
313 if data: self._parser.Parse(data,1)
314
316 """ Method used to allow class instance to be garbage-collected. """
317 self._parser.StartElementHandler = None
318 self._parser.EndElementHandler = None
319 self._parser.CharacterDataHandler = None
320 self._parser.StartNamespaceDeclHandler = None
321
323 """XML Parser callback. Used internally"""
324 attlist=attrs.keys()
325 for attr in attlist:
326 sp=attr.rfind(" ")
327 if sp==-1: continue
328 ns=attr[:sp]
329 attrs[self.namespaces[ns]+attr[sp+1:]]=attrs[attr]
330 del attrs[attr]
331 self.__depth += 1
332 self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`), 'down')
333 if self.__depth == self._dispatch_depth:
334 if not self._mini_dom : self._mini_dom = Node(tag=tag, attrs=attrs)
335 else: Node.__init__(self._mini_dom,tag=tag, attrs=attrs)
336 self._ptr = self._mini_dom
337 elif self.__depth > self._dispatch_depth:
338 self._ptr.kids.append(Node(tag=tag,parent=self._ptr,attrs=attrs))
339 self._ptr = self._ptr.kids[-1]
340 if self.__depth == 1:
341 self._document_attrs = attrs
342 ns, name = (['']+tag.split())[-2:]
343 self.stream_header_received(ns, name, attrs)
344 if not self.last_is_data and self._ptr.parent: self._ptr.parent.data.append('')
345 self.last_is_data = 0
346
348 """XML Parser callback. Used internally"""
349 self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s" % (self.__depth, tag), 'up')
350 if self.__depth == self._dispatch_depth:
351 self.dispatch(self._mini_dom)
352 elif self.__depth > self._dispatch_depth:
353 self._ptr = self._ptr.parent
354 else:
355 self.DEBUG(DBG_NODEBUILDER, "Got higher than dispatch level. Stream terminated?", 'stop')
356 self.__depth -= 1
357 self.last_is_data = 0
358 if self.__depth == 0: self.stream_footer_received()
359
361 """XML Parser callback. Used internally"""
362 self.DEBUG(DBG_NODEBUILDER, data, 'data')
363 if not self._ptr: return
364 if self.last_is_data:
365 self._ptr.data[-1] += data
366 else:
367 self._ptr.data.append(data)
368 self.last_is_data = 1
369
371 """XML Parser callback. Used internally"""
372 if prefix: self.namespaces[uri]=prefix+':'
373 else: self.xmlns=uri
374 - def DEBUG(self, level, text, comment=None):
375 """ Gets all NodeBuilder walking events. Can be used for debugging if redefined."""
377 """ Returns just built Node. """
378 return self._mini_dom
380 """ Gets called when the NodeBuilder reaches some level of depth on it's way up with the built
381 node as argument. Can be redefined to convert incoming XML stanzas to program events. """
383 """ Method called when stream just opened. """
386
388 """ Converts supplied textual string into XML node. Handy f.e. for reading configuration file.
389 Raises xml.parser.expat.parsererror if provided string is not well-formed XML. """
390 return NodeBuilder(xml).getDom()
391
393 """ Converts supplied textual string into XML node. Survives if xml data is cutted half way round.
394 I.e. "<html>some text <br>some more text". Will raise xml.parser.expat.parsererror on misplaced
395 tags though. F.e. "<b>some text <br>some more text</b>" will not work."""
396 return NodeBuilder(xml).getDom()
397