You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							305 lines
						
					
					
						
							10 KiB
						
					
					
				
			
		
		
	
	
							305 lines
						
					
					
						
							10 KiB
						
					
					
				# -*- coding: utf-8 -*-
 | 
						|
"""
 | 
						|
    markupsafe
 | 
						|
    ~~~~~~~~~~
 | 
						|
 | 
						|
    Implements a Markup string.
 | 
						|
 | 
						|
    :copyright: (c) 2010 by Armin Ronacher.
 | 
						|
    :license: BSD, see LICENSE for more details.
 | 
						|
"""
 | 
						|
import re
 | 
						|
import string
 | 
						|
from collections import Mapping
 | 
						|
from markupsafe._compat import text_type, string_types, int_types, \
 | 
						|
     unichr, iteritems, PY2
 | 
						|
 | 
						|
__version__ = "1.0"
 | 
						|
 | 
						|
__all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent']
 | 
						|
 | 
						|
 | 
						|
_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
 | 
						|
_entity_re = re.compile(r'&([^& ;]+);')
 | 
						|
 | 
						|
 | 
						|
class Markup(text_type):
 | 
						|
    r"""Marks a string as being safe for inclusion in HTML/XML output without
 | 
						|
    needing to be escaped.  This implements the `__html__` interface a couple
 | 
						|
    of frameworks and web applications use.  :class:`Markup` is a direct
 | 
						|
    subclass of `unicode` and provides all the methods of `unicode` just that
 | 
						|
    it escapes arguments passed and always returns `Markup`.
 | 
						|
 | 
						|
    The `escape` function returns markup objects so that double escaping can't
 | 
						|
    happen.
 | 
						|
 | 
						|
    The constructor of the :class:`Markup` class can be used for three
 | 
						|
    different things:  When passed an unicode object it's assumed to be safe,
 | 
						|
    when passed an object with an HTML representation (has an `__html__`
 | 
						|
    method) that representation is used, otherwise the object passed is
 | 
						|
    converted into a unicode string and then assumed to be safe:
 | 
						|
 | 
						|
    >>> Markup("Hello <em>World</em>!")
 | 
						|
    Markup(u'Hello <em>World</em>!')
 | 
						|
    >>> class Foo(object):
 | 
						|
    ...  def __html__(self):
 | 
						|
    ...   return '<a href="#">foo</a>'
 | 
						|
    ...
 | 
						|
    >>> Markup(Foo())
 | 
						|
    Markup(u'<a href="#">foo</a>')
 | 
						|
 | 
						|
    If you want object passed being always treated as unsafe you can use the
 | 
						|
    :meth:`escape` classmethod to create a :class:`Markup` object:
 | 
						|
 | 
						|
    >>> Markup.escape("Hello <em>World</em>!")
 | 
						|
    Markup(u'Hello <em>World</em>!')
 | 
						|
 | 
						|
    Operations on a markup string are markup aware which means that all
 | 
						|
    arguments are passed through the :func:`escape` function:
 | 
						|
 | 
						|
    >>> em = Markup("<em>%s</em>")
 | 
						|
    >>> em % "foo & bar"
 | 
						|
    Markup(u'<em>foo & bar</em>')
 | 
						|
    >>> strong = Markup("<strong>%(text)s</strong>")
 | 
						|
    >>> strong % {'text': '<blink>hacker here</blink>'}
 | 
						|
    Markup(u'<strong><blink>hacker here</blink></strong>')
 | 
						|
    >>> Markup("<em>Hello</em> ") + "<foo>"
 | 
						|
    Markup(u'<em>Hello</em> <foo>')
 | 
						|
    """
 | 
						|
    __slots__ = ()
 | 
						|
 | 
						|
    def __new__(cls, base=u'', encoding=None, errors='strict'):
 | 
						|
        if hasattr(base, '__html__'):
 | 
						|
            base = base.__html__()
 | 
						|
        if encoding is None:
 | 
						|
            return text_type.__new__(cls, base)
 | 
						|
        return text_type.__new__(cls, base, encoding, errors)
 | 
						|
 | 
						|
    def __html__(self):
 | 
						|
        return self
 | 
						|
 | 
						|
    def __add__(self, other):
 | 
						|
        if isinstance(other, string_types) or hasattr(other, '__html__'):
 | 
						|
            return self.__class__(super(Markup, self).__add__(self.escape(other)))
 | 
						|
        return NotImplemented
 | 
						|
 | 
						|
    def __radd__(self, other):
 | 
						|
        if hasattr(other, '__html__') or isinstance(other, string_types):
 | 
						|
            return self.escape(other).__add__(self)
 | 
						|
        return NotImplemented
 | 
						|
 | 
						|
    def __mul__(self, num):
 | 
						|
        if isinstance(num, int_types):
 | 
						|
            return self.__class__(text_type.__mul__(self, num))
 | 
						|
        return NotImplemented
 | 
						|
    __rmul__ = __mul__
 | 
						|
 | 
						|
    def __mod__(self, arg):
 | 
						|
        if isinstance(arg, tuple):
 | 
						|
            arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg)
 | 
						|
        else:
 | 
						|
            arg = _MarkupEscapeHelper(arg, self.escape)
 | 
						|
        return self.__class__(text_type.__mod__(self, arg))
 | 
						|
 | 
						|
    def __repr__(self):
 | 
						|
        return '%s(%s)' % (
 | 
						|
            self.__class__.__name__,
 | 
						|
            text_type.__repr__(self)
 | 
						|
        )
 | 
						|
 | 
						|
    def join(self, seq):
 | 
						|
        return self.__class__(text_type.join(self, map(self.escape, seq)))
 | 
						|
    join.__doc__ = text_type.join.__doc__
 | 
						|
 | 
						|
    def split(self, *args, **kwargs):
 | 
						|
        return list(map(self.__class__, text_type.split(self, *args, **kwargs)))
 | 
						|
    split.__doc__ = text_type.split.__doc__
 | 
						|
 | 
						|
    def rsplit(self, *args, **kwargs):
 | 
						|
        return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs)))
 | 
						|
    rsplit.__doc__ = text_type.rsplit.__doc__
 | 
						|
 | 
						|
    def splitlines(self, *args, **kwargs):
 | 
						|
        return list(map(self.__class__, text_type.splitlines(
 | 
						|
            self, *args, **kwargs)))
 | 
						|
    splitlines.__doc__ = text_type.splitlines.__doc__
 | 
						|
 | 
						|
    def unescape(self):
 | 
						|
        r"""Unescape markup again into an text_type string.  This also resolves
 | 
						|
        known HTML4 and XHTML entities:
 | 
						|
 | 
						|
        >>> Markup("Main » <em>About</em>").unescape()
 | 
						|
        u'Main \xbb <em>About</em>'
 | 
						|
        """
 | 
						|
        from markupsafe._constants import HTML_ENTITIES
 | 
						|
        def handle_match(m):
 | 
						|
            name = m.group(1)
 | 
						|
            if name in HTML_ENTITIES:
 | 
						|
                return unichr(HTML_ENTITIES[name])
 | 
						|
            try:
 | 
						|
                if name[:2] in ('#x', '#X'):
 | 
						|
                    return unichr(int(name[2:], 16))
 | 
						|
                elif name.startswith('#'):
 | 
						|
                    return unichr(int(name[1:]))
 | 
						|
            except ValueError:
 | 
						|
                pass
 | 
						|
            # Don't modify unexpected input.
 | 
						|
            return m.group()
 | 
						|
        return _entity_re.sub(handle_match, text_type(self))
 | 
						|
 | 
						|
    def striptags(self):
 | 
						|
        r"""Unescape markup into an text_type string and strip all tags.  This
 | 
						|
        also resolves known HTML4 and XHTML entities.  Whitespace is
 | 
						|
        normalized to one:
 | 
						|
 | 
						|
        >>> Markup("Main »  <em>About</em>").striptags()
 | 
						|
        u'Main \xbb About'
 | 
						|
        """
 | 
						|
        stripped = u' '.join(_striptags_re.sub('', self).split())
 | 
						|
        return Markup(stripped).unescape()
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def escape(cls, s):
 | 
						|
        """Escape the string.  Works like :func:`escape` with the difference
 | 
						|
        that for subclasses of :class:`Markup` this function would return the
 | 
						|
        correct subclass.
 | 
						|
        """
 | 
						|
        rv = escape(s)
 | 
						|
        if rv.__class__ is not cls:
 | 
						|
            return cls(rv)
 | 
						|
        return rv
 | 
						|
 | 
						|
    def make_simple_escaping_wrapper(name):
 | 
						|
        orig = getattr(text_type, name)
 | 
						|
        def func(self, *args, **kwargs):
 | 
						|
            args = _escape_argspec(list(args), enumerate(args), self.escape)
 | 
						|
            _escape_argspec(kwargs, iteritems(kwargs), self.escape)
 | 
						|
            return self.__class__(orig(self, *args, **kwargs))
 | 
						|
        func.__name__ = orig.__name__
 | 
						|
        func.__doc__ = orig.__doc__
 | 
						|
        return func
 | 
						|
 | 
						|
    for method in '__getitem__', 'capitalize', \
 | 
						|
                  'title', 'lower', 'upper', 'replace', 'ljust', \
 | 
						|
                  'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
 | 
						|
                  'translate', 'expandtabs', 'swapcase', 'zfill':
 | 
						|
        locals()[method] = make_simple_escaping_wrapper(method)
 | 
						|
 | 
						|
    # new in python 2.5
 | 
						|
    if hasattr(text_type, 'partition'):
 | 
						|
        def partition(self, sep):
 | 
						|
            return tuple(map(self.__class__,
 | 
						|
                             text_type.partition(self, self.escape(sep))))
 | 
						|
        def rpartition(self, sep):
 | 
						|
            return tuple(map(self.__class__,
 | 
						|
                             text_type.rpartition(self, self.escape(sep))))
 | 
						|
 | 
						|
    # new in python 2.6
 | 
						|
    if hasattr(text_type, 'format'):
 | 
						|
        def format(*args, **kwargs):
 | 
						|
            self, args = args[0], args[1:]
 | 
						|
            formatter = EscapeFormatter(self.escape)
 | 
						|
            kwargs = _MagicFormatMapping(args, kwargs)
 | 
						|
            return self.__class__(formatter.vformat(self, args, kwargs))
 | 
						|
 | 
						|
        def __html_format__(self, format_spec):
 | 
						|
            if format_spec:
 | 
						|
                raise ValueError('Unsupported format specification '
 | 
						|
                                 'for Markup.')
 | 
						|
            return self
 | 
						|
 | 
						|
    # not in python 3
 | 
						|
    if hasattr(text_type, '__getslice__'):
 | 
						|
        __getslice__ = make_simple_escaping_wrapper('__getslice__')
 | 
						|
 | 
						|
    del method, make_simple_escaping_wrapper
 | 
						|
 | 
						|
 | 
						|
class _MagicFormatMapping(Mapping):
 | 
						|
    """This class implements a dummy wrapper to fix a bug in the Python
 | 
						|
    standard library for string formatting.
 | 
						|
 | 
						|
    See http://bugs.python.org/issue13598 for information about why
 | 
						|
    this is necessary.
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(self, args, kwargs):
 | 
						|
        self._args = args
 | 
						|
        self._kwargs = kwargs
 | 
						|
        self._last_index = 0
 | 
						|
 | 
						|
    def __getitem__(self, key):
 | 
						|
        if key == '':
 | 
						|
            idx = self._last_index
 | 
						|
            self._last_index += 1
 | 
						|
            try:
 | 
						|
                return self._args[idx]
 | 
						|
            except LookupError:
 | 
						|
                pass
 | 
						|
            key = str(idx)
 | 
						|
        return self._kwargs[key]
 | 
						|
 | 
						|
    def __iter__(self):
 | 
						|
        return iter(self._kwargs)
 | 
						|
 | 
						|
    def __len__(self):
 | 
						|
        return len(self._kwargs)
 | 
						|
 | 
						|
 | 
						|
if hasattr(text_type, 'format'):
 | 
						|
    class EscapeFormatter(string.Formatter):
 | 
						|
 | 
						|
        def __init__(self, escape):
 | 
						|
            self.escape = escape
 | 
						|
 | 
						|
        def format_field(self, value, format_spec):
 | 
						|
            if hasattr(value, '__html_format__'):
 | 
						|
                rv = value.__html_format__(format_spec)
 | 
						|
            elif hasattr(value, '__html__'):
 | 
						|
                if format_spec:
 | 
						|
                    raise ValueError('No format specification allowed '
 | 
						|
                                     'when formatting an object with '
 | 
						|
                                     'its __html__ method.')
 | 
						|
                rv = value.__html__()
 | 
						|
            else:
 | 
						|
                # We need to make sure the format spec is unicode here as
 | 
						|
                # otherwise the wrong callback methods are invoked.  For
 | 
						|
                # instance a byte string there would invoke __str__ and
 | 
						|
                # not __unicode__.
 | 
						|
                rv = string.Formatter.format_field(
 | 
						|
                    self, value, text_type(format_spec))
 | 
						|
            return text_type(self.escape(rv))
 | 
						|
 | 
						|
 | 
						|
def _escape_argspec(obj, iterable, escape):
 | 
						|
    """Helper for various string-wrapped functions."""
 | 
						|
    for key, value in iterable:
 | 
						|
        if hasattr(value, '__html__') or isinstance(value, string_types):
 | 
						|
            obj[key] = escape(value)
 | 
						|
    return obj
 | 
						|
 | 
						|
 | 
						|
class _MarkupEscapeHelper(object):
 | 
						|
    """Helper for Markup.__mod__"""
 | 
						|
 | 
						|
    def __init__(self, obj, escape):
 | 
						|
        self.obj = obj
 | 
						|
        self.escape = escape
 | 
						|
 | 
						|
    __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x], s.escape)
 | 
						|
    __unicode__ = __str__ = lambda s: text_type(s.escape(s.obj))
 | 
						|
    __repr__ = lambda s: str(s.escape(repr(s.obj)))
 | 
						|
    __int__ = lambda s: int(s.obj)
 | 
						|
    __float__ = lambda s: float(s.obj)
 | 
						|
 | 
						|
 | 
						|
# we have to import it down here as the speedups and native
 | 
						|
# modules imports the markup type which is define above.
 | 
						|
try:
 | 
						|
    from markupsafe._speedups import escape, escape_silent, soft_unicode
 | 
						|
except ImportError:
 | 
						|
    from markupsafe._native import escape, escape_silent, soft_unicode
 | 
						|
 | 
						|
if not PY2:
 | 
						|
    soft_str = soft_unicode
 | 
						|
    __all__.append('soft_str')
 | 
						|
 |