dbt-selly/dbt-env/lib/python3.8/site-packages/slugify/slugify.py

import re
import sys
import unicodedata
from html.entities import name2codepoint

try:
    import text_unidecode as unidecode
except ImportError:
    import unidecode

__all__ = ['slugify', 'smart_truncate']


CHAR_ENTITY_PATTERN = re.compile(r'&(%s);' % '|'.join(name2codepoint))
DECIMAL_PATTERN = re.compile(r'&#(\d+);')
HEX_PATTERN = re.compile(r'&#x([\da-fA-F]+);')
QUOTE_PATTERN = re.compile(r'[\']+')
DISALLOWED_CHARS_PATTERN = re.compile(r'[^-a-zA-Z0-9]+')
DISALLOWED_UNICODE_CHARS_PATTERN = re.compile(r'[\W_]+')
DUPLICATE_DASH_PATTERN = re.compile(r'-{2,}')
NUMBERS_PATTERN = re.compile(r'(?<=\d),(?=\d)')
DEFAULT_SEPARATOR = '-'


def smart_truncate(string, max_length=0, word_boundary=False, separator=' ', save_order=False):
    """
    Truncate a string.
    :param string (str): string for modification
    :param max_length (int): output string length
    :param word_boundary (bool):
    :param save_order (bool): if True then word order of output string is like input string
    :param separator (str): separator between words
    :return:
    """

    string = string.strip(separator)

    if not max_length:
        return string

    if len(string) < max_length:
        return string

    if not word_boundary:
        return string[:max_length].strip(separator)

    if separator not in string:
        return string[:max_length]

    truncated = ''
    for word in string.split(separator):
        if word:
            next_len = len(truncated) + len(word)
            if next_len < max_length:
                truncated += '{}{}'.format(word, separator)
            elif next_len == max_length:
                truncated += '{}'.format(word)
                break
            else:
                if save_order:
                    break
    if not truncated:  # pragma: no cover
        truncated = string[:max_length]
    return truncated.strip(separator)


def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False,
            separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None, lowercase=True,
            replacements=(), allow_unicode=False):
    """
    Make a slug from the given text.
    :param text (str): initial text
    :param entities (bool): converts html entities to unicode
    :param decimal (bool): converts html decimal to unicode
    :param hexadecimal (bool): converts html hexadecimal to unicode
    :param max_length (int): output string length
    :param word_boundary (bool): truncates to complete word even if length ends up shorter than max_length
    :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order
    :param separator (str): separator between words
    :param stopwords (iterable): words to discount
    :param regex_pattern (str): regex pattern for disallowed characters
    :param lowercase (bool): activate case sensitivity by setting it to False
    :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
    :param allow_unicode (bool): allow unicode characters
    :return (str):
    """

    # user-specific replacements
    if replacements:
        for old, new in replacements:
            text = text.replace(old, new)

    # ensure text is unicode
    if not isinstance(text, str):
        text = str(text, 'utf-8', 'ignore')

    # replace quotes with dashes - pre-process
    text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)

    # decode unicode
    if not allow_unicode:
        text = unidecode.unidecode(text)

    # ensure text is still in unicode
    if not isinstance(text, str):
        text = str(text, 'utf-8', 'ignore')

    # character entity reference
    if entities:
        text = CHAR_ENTITY_PATTERN.sub(lambda m: chr(name2codepoint[m.group(1)]), text)

    # decimal character reference
    if decimal:
        try:
            text = DECIMAL_PATTERN.sub(lambda m: chr(int(m.group(1))), text)
        except Exception:
            pass

    # hexadecimal character reference
    if hexadecimal:
        try:
            text = HEX_PATTERN.sub(lambda m: chr(int(m.group(1), 16)), text)
        except Exception:
            pass

    # translate
    if allow_unicode:
        text = unicodedata.normalize('NFKC', text)
    else:
        text = unicodedata.normalize('NFKD', text)

    if sys.version_info < (3,):
        text = text.encode('ascii', 'ignore')

    # make the text lowercase (optional)
    if lowercase:
        text = text.lower()

    # remove generated quotes -- post-process
    text = QUOTE_PATTERN.sub('', text)

    # cleanup numbers
    text = NUMBERS_PATTERN.sub('', text)

    # replace all other unwanted characters
    if allow_unicode:
        pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN
    else:
        pattern = regex_pattern or DISALLOWED_CHARS_PATTERN

    text = re.sub(pattern, DEFAULT_SEPARATOR, text)

    # remove redundant
    text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)

    # remove stopwords
    if stopwords:
        if lowercase:
            stopwords_lower = [s.lower() for s in stopwords]
            words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower]
        else:
            words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords]
        text = DEFAULT_SEPARATOR.join(words)

    # finalize user-specific replacements
    if replacements:
        for old, new in replacements:
            text = text.replace(old, new)

    # smart truncate if requested
    if max_length > 0:
        text = smart_truncate(text, max_length, word_boundary, DEFAULT_SEPARATOR, save_order)

    if separator != DEFAULT_SEPARATOR:
        text = text.replace(DEFAULT_SEPARATOR, separator)

    return text
fix order deliveries 2022-03-22 15:13:27 +00:00			`import re`
			`import sys`
			`import unicodedata`
			`from html.entities import name2codepoint`

			`try:`
			`import text_unidecode as unidecode`
			`except ImportError:`
			`import unidecode`

			`__all__ = ['slugify', 'smart_truncate']`


			`CHAR_ENTITY_PATTERN = re.compile(r'&(%s);' % '\|'.join(name2codepoint))`
			`DECIMAL_PATTERN = re.compile(r'&#(\d+);')`
			`HEX_PATTERN = re.compile(r'&#x([\da-fA-F]+);')`
			`QUOTE_PATTERN = re.compile(r'[\']+')`
			`DISALLOWED_CHARS_PATTERN = re.compile(r'[^-a-zA-Z0-9]+')`
			`DISALLOWED_UNICODE_CHARS_PATTERN = re.compile(r'[\W_]+')`
			`DUPLICATE_DASH_PATTERN = re.compile(r'-{2,}')`
			`NUMBERS_PATTERN = re.compile(r'(?<=\d),(?=\d)')`
			`DEFAULT_SEPARATOR = '-'`


			`def smart_truncate(string, max_length=0, word_boundary=False, separator=' ', save_order=False):`
			`"""`
			`Truncate a string.`
			`:param string (str): string for modification`
			`:param max_length (int): output string length`
			`:param word_boundary (bool):`
			`:param save_order (bool): if True then word order of output string is like input string`
			`:param separator (str): separator between words`
			`:return:`
			`"""`

			`string = string.strip(separator)`

			`if not max_length:`
			`return string`

			`if len(string) < max_length:`
			`return string`

			`if not word_boundary:`
			`return string[:max_length].strip(separator)`

			`if separator not in string:`
			`return string[:max_length]`

			`truncated = ''`
			`for word in string.split(separator):`
			`if word:`
			`next_len = len(truncated) + len(word)`
			`if next_len < max_length:`
			`truncated += '{}{}'.format(word, separator)`
			`elif next_len == max_length:`
			`truncated += '{}'.format(word)`
			`break`
			`else:`
			`if save_order:`
			`break`
			`if not truncated: # pragma: no cover`
			`truncated = string[:max_length]`
			`return truncated.strip(separator)`


			`def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False,`
			`separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None, lowercase=True,`
			`replacements=(), allow_unicode=False):`
			`"""`
			`Make a slug from the given text.`
			`:param text (str): initial text`
			`:param entities (bool): converts html entities to unicode`
			`:param decimal (bool): converts html decimal to unicode`
			`:param hexadecimal (bool): converts html hexadecimal to unicode`
			`:param max_length (int): output string length`
			`:param word_boundary (bool): truncates to complete word even if length ends up shorter than max_length`
			`:param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order`
			`:param separator (str): separator between words`
			`:param stopwords (iterable): words to discount`
			`:param regex_pattern (str): regex pattern for disallowed characters`
			`:param lowercase (bool): activate case sensitivity by setting it to False`
			`:param replacements (iterable): list of replacement rules e.g. [['\|', 'or'], ['%', 'percent']]`
			`:param allow_unicode (bool): allow unicode characters`
			`:return (str):`
			`"""`

			`# user-specific replacements`
			`if replacements:`
			`for old, new in replacements:`
			`text = text.replace(old, new)`

			`# ensure text is unicode`
			`if not isinstance(text, str):`
			`text = str(text, 'utf-8', 'ignore')`

			`# replace quotes with dashes - pre-process`
			`text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)`

			`# decode unicode`
			`if not allow_unicode:`
			`text = unidecode.unidecode(text)`

			`# ensure text is still in unicode`
			`if not isinstance(text, str):`
			`text = str(text, 'utf-8', 'ignore')`

			`# character entity reference`
			`if entities:`
			`text = CHAR_ENTITY_PATTERN.sub(lambda m: chr(name2codepoint[m.group(1)]), text)`

			`# decimal character reference`
			`if decimal:`
			`try:`
			`text = DECIMAL_PATTERN.sub(lambda m: chr(int(m.group(1))), text)`
			`except Exception:`
			`pass`

			`# hexadecimal character reference`
			`if hexadecimal:`
			`try:`
			`text = HEX_PATTERN.sub(lambda m: chr(int(m.group(1), 16)), text)`
			`except Exception:`
			`pass`

			`# translate`
			`if allow_unicode:`
			`text = unicodedata.normalize('NFKC', text)`
			`else:`
			`text = unicodedata.normalize('NFKD', text)`

			`if sys.version_info < (3,):`
			`text = text.encode('ascii', 'ignore')`

			`# make the text lowercase (optional)`
			`if lowercase:`
			`text = text.lower()`

			`# remove generated quotes -- post-process`
			`text = QUOTE_PATTERN.sub('', text)`

			`# cleanup numbers`
			`text = NUMBERS_PATTERN.sub('', text)`

			`# replace all other unwanted characters`
			`if allow_unicode:`
			`pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN`
			`else:`
			`pattern = regex_pattern or DISALLOWED_CHARS_PATTERN`

			`text = re.sub(pattern, DEFAULT_SEPARATOR, text)`

			`# remove redundant`
			`text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)`

			`# remove stopwords`
			`if stopwords:`
			`if lowercase:`
			`stopwords_lower = [s.lower() for s in stopwords]`
			`words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower]`
			`else:`
			`words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords]`
			`text = DEFAULT_SEPARATOR.join(words)`

			`# finalize user-specific replacements`
			`if replacements:`
			`for old, new in replacements:`
			`text = text.replace(old, new)`

			`# smart truncate if requested`
			`if max_length > 0:`
			`text = smart_truncate(text, max_length, word_boundary, DEFAULT_SEPARATOR, save_order)`

			`if separator != DEFAULT_SEPARATOR:`
			`text = text.replace(DEFAULT_SEPARATOR, separator)`

			`return text`