67 lines
1.8 KiB
Python
67 lines
1.8 KiB
Python
import re
|
|
import six
|
|
import unicodedata
|
|
from unidecode import unidecode
|
|
|
|
|
|
def smart_text(s, encoding='utf-8', errors='strict'):
|
|
if isinstance(s, six.text_type):
|
|
return s
|
|
|
|
if not isinstance(s, six.string_types):
|
|
if six.PY3:
|
|
if isinstance(s, bytes):
|
|
s = six.text_type(s, encoding, errors)
|
|
else:
|
|
s = six.text_type(s)
|
|
elif hasattr(s, '__unicode__'):
|
|
s = six.text_type(s)
|
|
else:
|
|
s = six.text_type(bytes(s), encoding, errors)
|
|
else:
|
|
s = six.text_type(s)
|
|
return s
|
|
|
|
|
|
# Extra characters outside of alphanumerics that we'll allow.
|
|
SLUG_OK = '-_~'
|
|
|
|
|
|
def slugify(s, ok=SLUG_OK, lower=True, spaces=False, only_ascii=False):
|
|
"""
|
|
Creates a unicode slug for given string with several options.
|
|
|
|
L and N signify letter/number.
|
|
http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table
|
|
|
|
:param s: Your unicode string.
|
|
:param ok: Extra characters outside of alphanumerics to be allowed.
|
|
:param lower: Lower the output string.
|
|
:param spaces: True allows spaces, False replaces a space with a dash (-).
|
|
:param only_ascii: True to replace non-ASCII unicode characters with their ASCII representations.
|
|
:type s: String
|
|
:type ok: String
|
|
:type lower: Bool
|
|
:type spaces: Bool
|
|
:type only_ascii: Bool
|
|
:return: Slugified unicode string
|
|
|
|
"""
|
|
|
|
rv = []
|
|
for c in unicodedata.normalize('NFKC', smart_text(s)):
|
|
cat = unicodedata.category(c)[0]
|
|
if cat in 'LN' or c in ok:
|
|
rv.append(c)
|
|
if cat == 'Z': # space
|
|
rv.append(' ')
|
|
new = ''.join(rv).strip()
|
|
if not spaces:
|
|
new = re.sub('[-\s]+', '-', new)
|
|
|
|
new = new.lower() if lower else new
|
|
|
|
if only_ascii == True:
|
|
new = unidecode(new)
|
|
|
|
return new
|