Package xmpp :: Module simplexml
[hide private]
[frames] | no frames]

Source Code for Module xmpp.simplexml

  1  ##   simplexml.py based on Mattew Allum's xmlstream.py 
  2  ## 
  3  ##   Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov 
  4  ## 
  5  ##   This program is free software; you can redistribute it and/or modify 
  6  ##   it under the terms of the GNU General Public License as published by 
  7  ##   the Free Software Foundation; either version 2, or (at your option) 
  8  ##   any later version. 
  9  ## 
 10  ##   This program is distributed in the hope that it will be useful, 
 11  ##   but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  ##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 13  ##   GNU General Public License for more details. 
 14   
 15  # $Id: simplexml.py,v 1.33 2007/09/11 12:46:16 normanr Exp $ 
 16   
 17  """Simplexml module provides xmpppy library with all needed tools to handle XML nodes and XML streams. 
 18  I'm personally using it in many other separate projects. It is designed to be as standalone as possible.""" 
 19   
 20  import xml.parsers.expat 
 21   
22 -def XMLescape(txt):
23 """Returns provided string with symbols & < > " replaced by their respective XML entities.""" 24 return txt.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;")
25 26 ENCODING='utf-8'
27 -def ustr(what):
28 """Converts object "what" to unicode string using it's own __str__ method if accessible or unicode method otherwise.""" 29 if type(what) == type(u''): return what 30 try: r=what.__str__() 31 except AttributeError: r=str(what) 32 if type(r)<>type(u''): return unicode(r,ENCODING) 33 return r
34
35 -class Node:
36 """ Node class describes syntax of separate XML Node. It have a constructor that permits node creation 37 from set of "namespace name", attributes and payload of text strings and other nodes. 38 It does not natively support building node from text string and uses NodeBuilder class for that purpose. 39 After creation node can be mangled in many ways so it can be completely changed. 40 Also node can be serialised into string in one of two modes: default (where the textual representation 41 of node describes it exactly) and "fancy" - with whitespace added to make indentation and thus make 42 result more readable by human. 43 44 Node class have attribute FORCE_NODE_RECREATION that is defaults to False thus enabling fast node 45 replication from the some other node. The drawback of the fast way is that new node shares some 46 info with the "original" node that is changing the one node may influence the other. Though it is 47 rarely needed (in xmpppy it is never needed at all since I'm usually never using original node after 48 replication (and using replication only to move upwards on the classes tree). 49 """ 50 FORCE_NODE_RECREATION=0
51 - def __init__(self, tag=None, attrs={}, payload=[], parent=None, node=None):
52 """ Takes "tag" argument as the name of node (prepended by namespace, if needed and separated from it 53 by a space), attrs dictionary as the set of arguments, payload list as the set of textual strings 54 and child nodes that this node carries within itself and "parent" argument that is another node 55 that this one will be the child of. Also the __init__ can be provided with "node" argument that is 56 either a text string containing exactly one node or another Node instance to begin with. If both 57 "node" and other arguments is provided then the node initially created as replica of "node" 58 provided and then modified to be compliant with other arguments.""" 59 if node: 60 if self.FORCE_NODE_RECREATION and type(node)==type(self): node=str(node) 61 if type(node)<>type(self): node=NodeBuilder(node,self) 62 else: 63 self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = node.name,node.namespace,{},[],[],node.parent 64 for key in node.attrs.keys(): self.attrs[key]=node.attrs[key] 65 for data in node.data: self.data.append(data) 66 for kid in node.kids: self.kids.append(kid) 67 else: self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = 'tag','',{},[],[],None 68 69 if tag: self.namespace, self.name = ([self.namespace]+tag.split())[-2:] 70 if parent: self.parent = parent 71 if self.parent and not self.namespace: self.namespace=self.parent.namespace 72 for attr in attrs.keys(): 73 self.attrs[attr]=attrs[attr] 74 if type(payload) in (type(''),type(u'')): payload=[payload] 75 for i in payload: 76 if type(i)==type(self): self.addChild(node=i) 77 else: self.addData(i)
78
79 - def __str__(self,fancy=0):
80 """ Method used to dump node into textual representation. 81 if "fancy" argument is set to True produces indented output for readability.""" 82 s = (fancy-1) * 2 * ' ' + "<" + self.name 83 if self.namespace: 84 if not self.parent or self.parent.namespace!=self.namespace: 85 s = s + ' xmlns="%s"'%self.namespace 86 for key in self.attrs.keys(): 87 val = ustr(self.attrs[key]) 88 s = s + ' %s="%s"' % ( key, XMLescape(val) ) 89 s = s + ">" 90 cnt = 0 91 if self.kids: 92 if fancy: s = s + "\n" 93 for a in self.kids: 94 if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt]) 95 elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip()) 96 if a: s = s + a.__str__(fancy and fancy+1) 97 cnt=cnt+1 98 if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt]) 99 elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip()) 100 if not self.kids and s[-1:]=='>': 101 s=s[:-1]+' />' 102 if fancy: s = s + "\n" 103 else: 104 if fancy and not self.data: s = s + (fancy-1) * 2 * ' ' 105 s = s + "</" + self.name + ">" 106 if fancy: s = s + "\n" 107 return s
108 - def getCDATA(self):
109 """ Serialise node, dropping all tags and leaving CDATA intact. 110 That is effectively kills all formatiing, leaving only text were contained in XML. 111 """ 112 s = "" 113 cnt = 0 114 if self.kids: 115 for a in self.kids: 116 s=s+self.data[cnt] 117 if a: s = s + a.getCDATA() 118 cnt=cnt+1 119 if (len(self.data)-1) >= cnt: s = s + self.data[cnt] 120 return s
121 - def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None):
122 """ If "node" argument is provided, adds it as child node. Else creates new node from 123 the other arguments' values and adds it as well.""" 124 if attrs.has_key('xmlns'): 125 raise AttributeError("Use namespace=x instead of attrs={'xmlns':x}") 126 if namespace: name=namespace+' '+name 127 if node: 128 newnode=node 129 node.parent = self 130 else: newnode=Node(tag=name, parent=self, attrs=attrs, payload=payload) 131 self.kids.append(newnode) 132 self.data.append(u'') 133 return newnode
134 - def addData(self, data):
135 """ Adds some CDATA to node. """ 136 self.data.append(ustr(data)) 137 self.kids.append(None)
138 - def clearData(self):
139 """ Removes all CDATA from the node. """ 140 self.data=[]
141 - def delAttr(self, key):
142 """ Deletes an attribute "key" """ 143 del self.attrs[key]
144 - def delChild(self, node, attrs={}):
145 """ Deletes the "node" from the node's childs list, if "node" is an instance. 146 Else deletes the first node that have specified name and (optionally) attributes. """ 147 if type(node)<>type(self): node=self.getTag(node,attrs) 148 self.kids[self.kids.index(node)]=None 149 return node
150 - def getAttrs(self):
151 """ Returns all node's attributes as dictionary. """ 152 return self.attrs
153 - def getAttr(self, key):
154 """ Returns value of specified attribute. """ 155 try: return self.attrs[key] 156 except: return None
157 - def getChildren(self):
158 """ Returns all node's child nodes as list. """ 159 return self.kids
160 - def getData(self):
161 """ Returns all node CDATA as string (concatenated). """ 162 return ''.join(self.data)
163 - def getName(self):
164 """ Returns the name of node """ 165 return self.name
166 - def getNamespace(self):
167 """ Returns the namespace of node """ 168 return self.namespace
169 - def getParent(self):
170 """ Returns the parent of node (if present). """ 171 return self.parent
172 - def getPayload(self):
173 """ Return the payload of node i.e. list of child nodes and CDATA entries. 174 F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned list: 175 ['text1', <nodea instance>, <nodeb instance>, ' text2']. """ 176 ret=[] 177 for i in range(max(len(self.data),len(self.kids))): 178 if i < len(self.data) and self.data[i]: ret.append(self.data[i]) 179 if i < len(self.kids) and self.kids[i]: ret.append(self.kids[i]) 180 return ret
181 - def getTag(self, name, attrs={}, namespace=None):
182 """ Filters all child nodes using specified arguments as filter. 183 Returns the first found or None if not found. """ 184 return self.getTags(name, attrs, namespace, one=1)
185 - def getTagAttr(self,tag,attr):
186 """ Returns attribute value of the child with specified name (or None if no such attribute).""" 187 try: return self.getTag(tag).attrs[attr] 188 except: return None
189 - def getTagData(self,tag):
190 """ Returns cocatenated CDATA of the child with specified name.""" 191 try: return self.getTag(tag).getData() 192 except: return None
193 - def getTags(self, name, attrs={}, namespace=None, one=0):
194 """ Filters all child nodes using specified arguments as filter. 195 Returns the list of nodes found. """ 196 nodes=[] 197 for node in self.kids: 198 if not node: continue 199 if namespace and namespace<>node.getNamespace(): continue 200 if node.getName() == name: 201 for key in attrs.keys(): 202 if not node.attrs.has_key(key) or node.attrs[key]<>attrs[key]: break 203 else: nodes.append(node) 204 if one and nodes: return nodes[0] 205 if not one: return nodes
206 - def setAttr(self, key, val):
207 """ Sets attribute "key" with the value "val". """ 208 self.attrs[key]=val
209 - def setData(self, data):
210 """ Sets node's CDATA to provided string. Resets all previous CDATA!""" 211 self.data=[ustr(data)]
212 - def setName(self,val):
213 """ Changes the node name. """ 214 self.name = val
215 - def setNamespace(self, namespace):
216 """ Changes the node namespace. """ 217 self.namespace=namespace
218 - def setParent(self, node):
219 """ Sets node's parent to "node". WARNING: do not checks if the parent already present 220 and not removes the node from the list of childs of previous parent. """ 221 self.parent = node
222 - def setPayload(self,payload,add=0):
223 """ Sets node payload according to the list specified. WARNING: completely replaces all node's 224 previous content. If you wish just to add child or CDATA - use addData or addChild methods. """ 225 if type(payload) in (type(''),type(u'')): payload=[payload] 226 if add: self.kids+=payload 227 else: self.kids=payload
228 - def setTag(self, name, attrs={}, namespace=None):
229 """ Same as getTag but if the node with specified namespace/attributes not found, creates such 230 node and returns it. """ 231 node=self.getTags(name, attrs, namespace=namespace, one=1) 232 if node: return node 233 else: return self.addChild(name, attrs, namespace=namespace)
234 - def setTagAttr(self,tag,attr,val):
235 """ Creates new node (if not already present) with name "tag" 236 and sets it's attribute "attr" to value "val". """ 237 try: self.getTag(tag).attrs[attr]=val 238 except: self.addChild(tag,attrs={attr:val})
239 - def setTagData(self,tag,val,attrs={}):
240 """ Creates new node (if not already present) with name "tag" and (optionally) attributes "attrs" 241 and sets it's CDATA to string "val". """ 242 try: self.getTag(tag,attrs).setData(ustr(val)) 243 except: self.addChild(tag,attrs,payload=[ustr(val)])
244 - def has_attr(self,key):
245 """ Checks if node have attribute "key".""" 246 return self.attrs.has_key(key)
247 - def __getitem__(self,item):
248 """ Returns node's attribute "item" value. """ 249 return self.getAttr(item)
250 - def __setitem__(self,item,val):
251 """ Sets node's attribute "item" value. """ 252 return self.setAttr(item,val)
253 - def __delitem__(self,item):
254 """ Deletes node's attribute "item". """ 255 return self.delAttr(item)
256 - def __getattr__(self,attr):
257 """ Reduce memory usage caused by T/NT classes - use memory only when needed. """ 258 if attr=='T': 259 self.T=T(self) 260 return self.T 261 if attr=='NT': 262 self.NT=NT(self) 263 return self.NT 264 raise AttributeError
265
266 -class T:
267 """ Auxiliary class used to quick access to node's child nodes. """
268 - def __init__(self,node): self.__dict__['node']=node
269 - def __getattr__(self,attr): return self.node.getTag(attr)
270 - def __setattr__(self,attr,val):
271 if isinstance(val,Node): Node.__init__(self.node.setTag(attr),node=val) 272 else: return self.node.setTagData(attr,val)
273 - def __delattr__(self,attr): return self.node.delChild(attr)
274
275 -class NT(T):
276 """ Auxiliary class used to quick create node's child nodes. """
277 - def __getattr__(self,attr): return self.node.addChild(attr)
278 - def __setattr__(self,attr,val):
279 if isinstance(val,Node): self.node.addChild(attr,node=val) 280 else: return self.node.addChild(attr,payload=[val])
281 282 DBG_NODEBUILDER = 'nodebuilder'
283 -class NodeBuilder:
284 """ Builds a Node class minidom from data parsed to it. This class used for two purposes: 285 1. Creation an XML Node from a textual representation. F.e. reading a config file. See an XML2Node method. 286 2. Handling an incoming XML stream. This is done by mangling 287 the __dispatch_depth parameter and redefining the dispatch method. 288 You do not need to use this class directly if you do not designing your own XML handler."""
289 - def __init__(self,data=None,initial_node=None):
290 """ Takes two optional parameters: "data" and "initial_node". 291 By default class initialised with empty Node class instance. 292 Though, if "initial_node" is provided it used as "starting point". 293 You can think about it as of "node upgrade". 294 "data" (if provided) feeded to parser immidiatedly after instance init. 295 """ 296 self.DEBUG(DBG_NODEBUILDER, "Preparing to handle incoming XML stream.", 'start') 297 self._parser = xml.parsers.expat.ParserCreate(namespace_separator=' ') 298 self._parser.StartElementHandler = self.starttag 299 self._parser.EndElementHandler = self.endtag 300 self._parser.CharacterDataHandler = self.handle_data 301 self._parser.StartNamespaceDeclHandler = self.handle_namespace_start 302 self.Parse = self._parser.Parse 303 304 self.__depth = 0 305 self._dispatch_depth = 1 306 self._document_attrs = None 307 self._mini_dom=initial_node 308 self.last_is_data = 1 309 self._ptr=None 310 self.namespaces={"http://www.w3.org/XML/1998/namespace":'xml:'} 311 self.xmlns="http://www.w3.org/XML/1998/namespace" 312 313 if data: self._parser.Parse(data,1)
314
315 - def destroy(self):
316 """ Method used to allow class instance to be garbage-collected. """ 317 self._parser.StartElementHandler = None 318 self._parser.EndElementHandler = None 319 self._parser.CharacterDataHandler = None 320 self._parser.StartNamespaceDeclHandler = None
321
322 - def starttag(self, tag, attrs):
323 """XML Parser callback. Used internally""" 324 attlist=attrs.keys() # 325 for attr in attlist: # FIXME: Crude hack. And it also slows down the whole library considerably. 326 sp=attr.rfind(" ") # 327 if sp==-1: continue # 328 ns=attr[:sp] # 329 attrs[self.namespaces[ns]+attr[sp+1:]]=attrs[attr] 330 del attrs[attr] # 331 self.__depth += 1 332 self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`), 'down') 333 if self.__depth == self._dispatch_depth: 334 if not self._mini_dom : self._mini_dom = Node(tag=tag, attrs=attrs) 335 else: Node.__init__(self._mini_dom,tag=tag, attrs=attrs) 336 self._ptr = self._mini_dom 337 elif self.__depth > self._dispatch_depth: 338 self._ptr.kids.append(Node(tag=tag,parent=self._ptr,attrs=attrs)) 339 self._ptr = self._ptr.kids[-1] 340 if self.__depth == 1: 341 self._document_attrs = attrs 342 ns, name = (['']+tag.split())[-2:] 343 self.stream_header_received(ns, name, attrs) 344 if not self.last_is_data and self._ptr.parent: self._ptr.parent.data.append('') 345 self.last_is_data = 0
346
347 - def endtag(self, tag ):
348 """XML Parser callback. Used internally""" 349 self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s" % (self.__depth, tag), 'up') 350 if self.__depth == self._dispatch_depth: 351 self.dispatch(self._mini_dom) 352 elif self.__depth > self._dispatch_depth: 353 self._ptr = self._ptr.parent 354 else: 355 self.DEBUG(DBG_NODEBUILDER, "Got higher than dispatch level. Stream terminated?", 'stop') 356 self.__depth -= 1 357 self.last_is_data = 0 358 if self.__depth == 0: self.stream_footer_received()
359
360 - def handle_data(self, data):
361 """XML Parser callback. Used internally""" 362 self.DEBUG(DBG_NODEBUILDER, data, 'data') 363 if not self._ptr: return 364 if self.last_is_data: 365 self._ptr.data[-1] += data 366 else: 367 self._ptr.data.append(data) 368 self.last_is_data = 1
369
370 - def handle_namespace_start(self, prefix, uri):
371 """XML Parser callback. Used internally""" 372 if prefix: self.namespaces[uri]=prefix+':' 373 else: self.xmlns=uri
374 - def DEBUG(self, level, text, comment=None):
375 """ Gets all NodeBuilder walking events. Can be used for debugging if redefined."""
376 - def getDom(self):
377 """ Returns just built Node. """ 378 return self._mini_dom
379 - def dispatch(self,stanza):
380 """ Gets called when the NodeBuilder reaches some level of depth on it's way up with the built 381 node as argument. Can be redefined to convert incoming XML stanzas to program events. """
382 - def stream_header_received(self,ns,tag,attrs):
383 """ Method called when stream just opened. """
386
387 -def XML2Node(xml):
388 """ Converts supplied textual string into XML node. Handy f.e. for reading configuration file. 389 Raises xml.parser.expat.parsererror if provided string is not well-formed XML. """ 390 return NodeBuilder(xml).getDom()
391
392 -def BadXML2Node(xml):
393 """ Converts supplied textual string into XML node. Survives if xml data is cutted half way round. 394 I.e. "<html>some text <br>some more text". Will raise xml.parser.expat.parsererror on misplaced 395 tags though. F.e. "<b>some text <br>some more text</b>" will not work.""" 396 return NodeBuilder(xml).getDom()
397