Source code for toolkit.ulib

# coding=utf8

"""
:mod:`ulib` is a library for processing the unicode character or string more pythonic.

.. warning:: This module not finished, don't use it on product environment.

Support:

    * Chinese
"""

from . import (
    ToolkitException,
    text_type,
    logger,
    PY2
)

if not PY2:
    unichr = chr


class ULibException(ToolkitException):
    pass


class NotUnicodeException(ULibException):
    pass


def _is(u, start, end):
    try:
        oridinal = ord(u)
    except TypeError:
        err_msg = 'ulib excepted an unicode character, but {} of length {} found'
        raise NotUnicodeException(err_msg.format(type(u), len(u)))
    else:
        return True if start <= oridinal <= end else False


[docs]def is_cn(u):
    """
    Test if the unicode character is a chinese character.

    :param u: Unicode character.
    :return: Bool Value
    """
    return _is(u, 0x4e00, 0x9fa5)


[docs]def has_cn(us):
    """
    Test if the unicode string contain an unicode chinese character.

    :param us: Unicode string.
    :return: Bool Value
    """
    for u in us:
        if is_cn(u):
            return True
    else:
        return False


[docs]def is_digital(u):
    """
    Test if the unicode character is a digital.

    :param u: Unicode character.
    :return: Bool Value
    """
    return _is(u, 0x0030, 0x0039)


[docs]def cnlen(us):
    """
    Calculate the length of unicode string. length of chinese character is 2.

    :param us: Unicode string.
    :return: Bool Value
    """
    return len(us) + len(filter(is_cn, us))


def _f2h(u):
    ordinal = ord(u)
    if ordinal == 0x3000:
        ordinal = 0x0020
    elif ordinal == 0x3001:
        ordinal = 0x002c
    elif ordinal == 0x3002:
        ordinal = 0x002e
    else:
        ordinal -= 0xfee0
        print(u)
        print(ordinal)

    if ordinal < 0x0020 or 0x7e < ordinal:
        return u
    return unichr(ordinal)


[docs]def f2h(u):
    """

    :param us: Unicode character or string.
    :return:
    """
    if not type(u) is text_type:
        raise NotUnicodeException("f2h() excepted an unicode character or string.")
    if len(u) == 1:
        return _f2h(u)
    else:
        return ''.join(map(_f2h, u))