1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 """Simplexml module provides xmpppy library with all needed tools to handle XML nodes and XML streams.
18 I'm personally using it in many other separate projects. It is designed to be as standalone as possible."""
19
20 import xml.parsers.expat
21
23 """Returns provided string with symbols & < > " replaced by their respective XML entities."""
24 return txt.replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """)
25
26 ENCODING='utf-8'
28 """Converts object "what" to unicode string using it's own __str__ method if accessible or unicode method otherwise."""
29 if type(what) == type(u''): return what
30 try: r=what.__str__()
31 except AttributeError: r=str(what)
32 if type(r)<>type(u''): return unicode(r,ENCODING)
33 return r
34
36 """ Node class describes syntax of separate XML Node. It have a constructor that permits node creation
37 from set of "namespace name", attributes and payload of text strings and other nodes.
38 It does not natively support building node from text string and uses NodeBuilder class for that purpose.
39 After creation node can be mangled in many ways so it can be completely changed.
40 Also node can be serialised into string in one of two modes: default (where the textual representation
41 of node describes it exactly) and "fancy" - with whitespace added to make indentation and thus make
42 result more readable by human.
43
44 Node class have attribute FORCE_NODE_RECREATION that is defaults to False thus enabling fast node
45 replication from the some other node. The drawback of the fast way is that new node shares some
46 info with the "original" node that is changing the one node may influence the other. Though it is
47 rarely needed (in xmpppy it is never needed at all since I'm usually never using original node after
48 replication (and using replication only to move upwards on the classes tree).
49 """
50 FORCE_NODE_RECREATION=0
51 - def __init__(self, tag=None, attrs={}, payload=[], parent=None, node=None):
52 """ Takes "tag" argument as the name of node (prepended by namespace, if needed and separated from it
53 by a space), attrs dictionary as the set of arguments, payload list as the set of textual strings
54 and child nodes that this node carries within itself and "parent" argument that is another node
55 that this one will be the child of. Also the __init__ can be provided with "node" argument that is
56 either a text string containing exactly one node or another Node instance to begin with. If both
57 "node" and other arguments is provided then the node initially created as replica of "node"
58 provided and then modified to be compliant with other arguments."""
59 if node:
60 if self.FORCE_NODE_RECREATION and type(node)==type(self): node=str(node)
61 if type(node)<>type(self): node=NodeBuilder(node,self)
62 else:
63 self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = node.name,node.namespace,{},[],[],node.parent
64 for key in node.attrs.keys(): self.attrs[key]=node.attrs[key]
65 for data in node.data: self.data.append(data)
66 for kid in node.kids: self.kids.append(kid)
67 else: self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = 'tag','',{},[],[],None
68
69 if tag: self.namespace, self.name = ([self.namespace]+tag.split())[-2:]
70 if parent: self.parent = parent
71 if self.parent and not self.namespace: self.namespace=self.parent.namespace
72 for attr in attrs.keys():
73 self.attrs[attr]=attrs[attr]
74 if type(payload) in (type(''),type(u'')): payload=[payload]
75 for i in payload:
76 if type(i)==type(self): self.addChild(node=i)
77 else: self.addData(i)
78
80 """ Method used to dump node into textual representation.
81 if "fancy" argument is set to True produces indented output for readability."""
82 s = (fancy-1) * 2 * ' ' + "<" + self.name
83 if self.namespace:
84 if not self.parent or self.parent.namespace!=self.namespace:
85 s = s + ' xmlns="%s"'%self.namespace
86 for key in self.attrs.keys():
87 val = ustr(self.attrs[key])
88 s = s + ' %s="%s"' % ( key, XMLescape(val) )
89 s = s + ">"
90 cnt = 0
91 if self.kids:
92 if fancy: s = s + "\n"
93 for a in self.kids:
94 if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt])
95 elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip())
96 if a: s = s + a.__str__(fancy and fancy+1)
97 cnt=cnt+1
98 if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt])
99 elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip())
100 if not self.kids and s[-1:]=='>':
101 s=s[:-1]+' />'
102 if fancy: s = s + "\n"
103 else:
104 if fancy and not self.data: s = s + (fancy-1) * 2 * ' '
105 s = s + "</" + self.name + ">"
106 if fancy: s = s + "\n"
107 return s
109 """ Serialise node, dropping all tags and leaving CDATA intact.
110 That is effectively kills all formatiing, leaving only text were contained in XML.
111 """
112 s = ""
113 cnt = 0
114 if self.kids:
115 for a in self.kids:
116 s=s+self.data[cnt]
117 if a: s = s + a.getCDATA()
118 cnt=cnt+1
119 if (len(self.data)-1) >= cnt: s = s + self.data[cnt]
120 return s
121 - def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None):
122 """ If "node" argument is provided, adds it as child node. Else creates new node from
123 the other arguments' values and adds it as well."""
124 if attrs.has_key('xmlns'):
125 raise AttributeError("Use namespace=x instead of attrs={'xmlns':x}")
126 if namespace: name=namespace+' '+name
127 if node:
128 newnode=node
129 node.parent = self
130 else: newnode=Node(tag=name, parent=self, attrs=attrs, payload=payload)
131 self.kids.append(newnode)
132 self.data.append(u'')
133 return newnode
135 """ Adds some CDATA to node. """
136 self.data.append(ustr(data))
137 self.kids.append(None)
139 """ Removes all CDATA from the node. """
140 self.data=[]
142 """ Deletes an attribute "key" """
143 del self.attrs[key]
145 """ Deletes the "node" from the node's childs list, if "node" is an instance.
146 Else deletes the first node that have specified name and (optionally) attributes. """
147 if type(node)<>type(self): node=self.getTag(node,attrs)
148 self.kids[self.kids.index(node)]=None
149 return node
151 """ Returns all node's attributes as dictionary. """
152 return self.attrs
154 """ Returns value of specified attribute. """
155 try: return self.attrs[key]
156 except: return None
158 """ Returns all node's child nodes as list. """
159 return self.kids
161 """ Returns all node CDATA as string (concatenated). """
162 return ''.join(self.data)
164 """ Returns the name of node """
165 return self.name
167 """ Returns the namespace of node """
168 return self.namespace
170 """ Returns the parent of node (if present). """
171 return self.parent
173 """ Return the payload of node i.e. list of child nodes and CDATA entries.
174 F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned list:
175 ['text1', <nodea instance>, <nodeb instance>, ' text2']. """
176 ret=[]
177 for i in range(max(len(self.data),len(self.kids))):
178 if i < len(self.data) and self.data[i]: ret.append(self.data[i])
179 if i < len(self.kids) and self.kids[i]: ret.append(self.kids[i])
180 return ret
181 - def getTag(self, name, attrs={}, namespace=None):
182 """ Filters all child nodes using specified arguments as filter.
183 Returns the first found or None if not found. """
184 return self.getTags(name, attrs, namespace, one=1)
186 """ Returns attribute value of the child with specified name (or None if no such attribute)."""
187 try: return self.getTag(tag).attrs[attr]
188 except: return None
190 """ Returns cocatenated CDATA of the child with specified name."""
191 try: return self.getTag(tag).getData()
192 except: return None
207 """ Sets attribute "key" with the value "val". """
208 self.attrs[key]=val
210 """ Sets node's CDATA to provided string. Resets all previous CDATA!"""
211 self.data=[ustr(data)]
213 """ Changes the node name. """
214 self.name = val
216 """ Changes the node namespace. """
217 self.namespace=namespace
219 """ Sets node's parent to "node". WARNING: do not checks if the parent already present
220 and not removes the node from the list of childs of previous parent. """
221 self.parent = node
223 """ Sets node payload according to the list specified. WARNING: completely replaces all node's
224 previous content. If you wish just to add child or CDATA - use addData or addChild methods. """
225 if type(payload) in (type(''),type(u'')): payload=[payload]
226 if add: self.kids+=payload
227 else: self.kids=payload
228 - def setTag(self, name, attrs={}, namespace=None):
229 """ Same as getTag but if the node with specified namespace/attributes not found, creates such
230 node and returns it. """
231 node=self.getTags(name, attrs, namespace=namespace, one=1)
232 if node: return node
233 else: return self.addChild(name, attrs, namespace=namespace)
235 """ Creates new node (if not already present) with name "tag"
236 and sets it's attribute "attr" to value "val". """
237 try: self.getTag(tag).attrs[attr]=val
238 except: self.addChild(tag,attrs={attr:val})
240 """ Creates new node (if not already present) with name "tag" and (optionally) attributes "attrs"
241 and sets it's CDATA to string "val". """
242 try: self.getTag(tag,attrs).setData(ustr(val))
243 except: self.addChild(tag,attrs,payload=[ustr(val)])
245 """ Checks if node have attribute "key"."""
246 return self.attrs.has_key(key)
248 """ Returns node's attribute "item" value. """
249 return self.getAttr(item)
251 """ Sets node's attribute "item" value. """
252 return self.setAttr(item,val)
254 """ Deletes node's attribute "item". """
255 return self.delAttr(item)
257 """ Reduce memory usage caused by T/NT classes - use memory only when needed. """
258 if attr=='T':
259 self.T=T(self)
260 return self.T
261 if attr=='NT':
262 self.NT=NT(self)
263 return self.NT
264 raise AttributeError
265
267 """ Auxiliary class used to quick access to node's child nodes. """
268 - def __init__(self,node): self.__dict__['node']=node
274
276 """ Auxiliary class used to quick create node's child nodes. """
279 if isinstance(val,Node): self.node.addChild(attr,node=val)
280 else: return self.node.addChild(attr,payload=[val])
281
282 DBG_NODEBUILDER = 'nodebuilder'
284 """ Builds a Node class minidom from data parsed to it. This class used for two purposes:
285 1. Creation an XML Node from a textual representation. F.e. reading a config file. See an XML2Node method.
286 2. Handling an incoming XML stream. This is done by mangling
287 the __dispatch_depth parameter and redefining the dispatch method.
288 You do not need to use this class directly if you do not designing your own XML handler."""
289 - def __init__(self,data=None,initial_node=None):
290 """ Takes two optional parameters: "data" and "initial_node".
291 By default class initialised with empty Node class instance.
292 Though, if "initial_node" is provided it used as "starting point".
293 You can think about it as of "node upgrade".
294 "data" (if provided) feeded to parser immidiatedly after instance init.
295 """
296 self.DEBUG(DBG_NODEBUILDER, "Preparing to handle incoming XML stream.", 'start')
297 self._parser = xml.parsers.expat.ParserCreate(namespace_separator=' ')
298 self._parser.StartElementHandler = self.