############################################################################### # # XMLwriter - A base class for XlsxWriter classes. # # Used in conjunction with XlsxWriter. # # SPDX-License-Identifier: BSD-2-Clause # Copyright 2013-2024, John McNamara, jmcnamara@cpan.org # # Standard packages. import re from io import StringIO # Compile performance critical regular expressions. re_control_chars_1 = re.compile("(_x[0-9a-fA-F]{4}_)") re_control_chars_2 = re.compile(r"([\x00-\x08\x0b-\x1f])") xml_escapes = re.compile('["&<>\n]') class XMLwriter(object): """ Simple XML writer class. """ def __init__(self): self.fh = None self.internal_fh = False def _set_filehandle(self, filehandle): # Set the writer filehandle directly. Mainly for testing. self.fh = filehandle self.internal_fh = False def _set_xml_writer(self, filename): # Set the XML writer filehandle for the object. if isinstance(filename, StringIO): self.internal_fh = False self.fh = filename else: self.internal_fh = True self.fh = open(filename, "w", encoding="utf-8") def _xml_close(self): # Close the XML filehandle if we created it. if self.internal_fh: self.fh.close() def _xml_declaration(self): # Write the XML declaration. self.fh.write("""\n""") def _xml_start_tag(self, tag, attributes=[]): # Write an XML start tag with optional attributes. for key, value in attributes: value = self._escape_attributes(value) tag += ' %s="%s"' % (key, value) self.fh.write("<%s>" % tag) def _xml_start_tag_unencoded(self, tag, attributes=[]): # Write an XML start tag with optional, unencoded, attributes. # This is a minor speed optimization for elements that don't # need encoding. for key, value in attributes: tag += ' %s="%s"' % (key, value) self.fh.write("<%s>" % tag) def _xml_end_tag(self, tag): # Write an XML end tag. self.fh.write("" % tag) def _xml_empty_tag(self, tag, attributes=[]): # Write an empty XML tag with optional attributes. for key, value in attributes: value = self._escape_attributes(value) tag += ' %s="%s"' % (key, value) self.fh.write("<%s/>" % tag) def _xml_empty_tag_unencoded(self, tag, attributes=[]): # Write an empty XML tag with optional, unencoded, attributes. # This is a minor speed optimization for elements that don't # need encoding. for key, value in attributes: tag += ' %s="%s"' % (key, value) self.fh.write("<%s/>" % tag) def _xml_data_element(self, tag, data, attributes=[]): # Write an XML element containing data with optional attributes. end_tag = tag for key, value in attributes: value = self._escape_attributes(value) tag += ' %s="%s"' % (key, value) data = self._escape_data(data) data = self._escape_control_characters(data) self.fh.write("<%s>%s" % (tag, data, end_tag)) def _xml_string_element(self, index, attributes=[]): # Optimized tag writer for cell string elements in the inner loop. attr = "" for key, value in attributes: value = self._escape_attributes(value) attr += ' %s="%s"' % (key, value) self.fh.write("""%d""" % (attr, index)) def _xml_si_element(self, string, attributes=[]): # Optimized tag writer for shared strings elements. attr = "" for key, value in attributes: value = self._escape_attributes(value) attr += ' %s="%s"' % (key, value) string = self._escape_data(string) self.fh.write("""%s""" % (attr, string)) def _xml_rich_si_element(self, string): # Optimized tag writer for shared strings rich string elements. self.fh.write("""%s""" % string) def _xml_number_element(self, number, attributes=[]): # Optimized tag writer for cell number elements in the inner loop. attr = "" for key, value in attributes: value = self._escape_attributes(value) attr += ' %s="%s"' % (key, value) self.fh.write("""%.16G""" % (attr, number)) def _xml_formula_element(self, formula, result, attributes=[]): # Optimized tag writer for cell formula elements in the inner loop. attr = "" for key, value in attributes: value = self._escape_attributes(value) attr += ' %s="%s"' % (key, value) self.fh.write( """%s%s""" % (attr, self._escape_data(formula), self._escape_data(result)) ) def _xml_inline_string(self, string, preserve, attributes=[]): # Optimized tag writer for inlineStr cell elements in the inner loop. attr = "" t_attr = "" # Set the attribute to preserve whitespace. if preserve: t_attr = ' xml:space="preserve"' for key, value in attributes: value = self._escape_attributes(value) attr += ' %s="%s"' % (key, value) string = self._escape_data(string) self.fh.write( """%s""" % (attr, t_attr, string) ) def _xml_rich_inline_string(self, string, attributes=[]): # Optimized tag writer for rich inlineStr in the inner loop. attr = "" for key, value in attributes: value = self._escape_attributes(value) attr += ' %s="%s"' % (key, value) self.fh.write("""%s""" % (attr, string)) def _escape_attributes(self, attribute): # Escape XML characters in attributes. try: if not xml_escapes.search(attribute): return attribute except TypeError: return attribute attribute = ( attribute.replace("&", "&") .replace('"', """) .replace("<", "<") .replace(">", ">") .replace("\n", " ") ) return attribute def _escape_data(self, data): # Escape XML characters in data sections of tags. Note, this # is different from _escape_attributes() in that double quotes # are not escaped by Excel. try: if not xml_escapes.search(data): return data except TypeError: return data data = data.replace("&", "&").replace("<", "<").replace(">", ">") return data @staticmethod def _escape_control_characters(data): # Excel escapes control characters with _xHHHH_ and also escapes any # literal strings of that type by encoding the leading underscore. # So "\0" -> _x0000_ and "_x0000_" -> _x005F_x0000_. # The following substitutions deal with those cases. try: # Escape the escape. data = re_control_chars_1.sub(r"_x005F\1", data) except TypeError: return data # Convert control character to the _xHHHH_ escape. data = re_control_chars_2.sub( lambda match: "_x%04X_" % ord(match.group(1)), data ) # Escapes non characters in strings. data = data.replace("\uFFFE", "_xFFFE_").replace("\uFFFF", "_xFFFF_") return data