Source code for negmas.helpers.strings

#!/usr/bin/env python
"""
A set of utilities to handle strings
"""

from __future__ import annotations

import datetime
import itertools
import math
import random
import re
import socket
import string
import traceback
from collections import defaultdict

__all__ = [
    "shortest_unique_names",
    "snake_case",
    "camel_case",
    "unique_name",
    "pretty_string",
    "exception2str",
    "humanize_time",
    "shorten",
]

COMMON_NAME_PARTS = (
    "Mechanism",
    "Negotiator",
    "Agent",
    "Controller",
    "Acceptance",
    "Component",
    "Model",
    "Strategy",
    "Offering",
    "Entity",
)
"""Default parts of names removed by `shorten` """



[docs]
def shorten(name: str, length: int = 4, common_parts=COMMON_NAME_PARTS) -> str:
    """
    Returns a short version of the name.

    Remarks:
        - Removes common parts of names in negmas like Negotiator, Agent, etc
        - Keeps Capital letters up to the given length
        - Adds some of the lowercase letters to fit the length
        - If the input is shorter than the length, it is returned as it is
    """
    for p in common_parts:
        if len(name) <= len(p):
            continue
        name = name.replace(p, "")
    if len(name) <= length:
        return name
    caps = [_ for _ in name if _.isupper()]
    if len(caps) >= length:
        return "".join(caps[:length])
    needed = length - len(caps)
    caps = []
    for c in name:
        if len(caps) >= length:
            break
        if c.isupper():
            caps.append(c)
            continue
        if needed < 1:
            continue
        needed -= 1
        caps.append(c)
    return str("".join(caps[:length]))




[docs]
def unique_name(base, add_time=True, add_host=False, rand_digits=8, sep="/") -> str:
    """Return a unique name.

    Can be used to return a unique directory name on the givn base.

    Args:
        base: (any): base path/string (it is converted to string whatever it is)
        add_time (bool, optional): Defaults to True. Add current time
        rand_digits (int, optional): Defaults to 8. The number of random
            characters to add to the name

    Examples:

        >>> a = unique_name("")
        >>> len(a) == 8 + 1 + 6 + 8 + 6
        True

    Returns:
        str: The unique name.

    """
    base = str(base)
    _time, rand_part = "", ""
    host_part = socket.gethostname() if add_host else ""
    if rand_digits > 0:
        rand_part = "".join(
            random.choices(string.digits + string.ascii_letters, k=rand_digits)
        )
    if add_time:
        _time = datetime.datetime.now().strftime("%Y%m%dH%H%M%S%f")
    sub = _time + host_part + rand_part
    if len(sub) == 0:
        return base
    if len(base) == 0:
        return sub
    return f"{str(base)}{sep}{sub}"




[docs]
def shortest_unique_names(
    strs: list[str], sep=".", max_compression=False, guarantee_unique=False
):
    """
    Finds the shortest unique strings starting from the end of each input
    string based on the separator.

    The final strings will only be unique if the inputs are unique.

    Args:
        strs: A list of strings
        sep: The separator used to separate fields in each string
        max_compression: If True, each string will be further compressed
                         by taking the shortest prefix that keeps the
                         strings unique (if they were originally unique)
        guarantee_unique: If given, random characters will be postfixed on
                         strings to guarantee uniquness

    Example:
        given ["a.b.cat", "d.e.f", "a.d.cat"] it will generate ["b.c", "f", "d.cat"]
        if max_compression was false and will generate ["b", "f", "d"] if it was
        True
    """
    if len(strs) < 2:
        return strs
    strs_unique = [_ for _ in strs]
    if guarantee_unique and len(set(strs)) != len(strs):
        chars = string.digits + string.ascii_letters
        for i in range(len(strs_unique) - 1):
            others = set(strs_unique[:i] + strs_unique[i + 1 :])
            while strs_unique[i] in others:
                for a in chars:
                    if strs_unique[i] + a not in others:
                        strs_unique[i] = strs_unique[i] + a
                        break
                else:
                    strs_unique[i] = strs_unique[i] + unique_name(
                        "", False, False, 1, ""
                    )

    lsts = [_.split(sep) for _ in strs_unique]
    names = [_[-1] for _ in lsts]
    if len(names) != len(set(names)):
        locs = defaultdict(list)
        for i, s in enumerate(names):
            locs[s].append(i)
        mapping = {"": ""}
        for s, L_ in locs.items():
            if len(s) < 1:
                continue
            if len(L_) == 1:
                mapping[strs_unique[L_[0]]] = s
                continue
            strs_unique_new = [sep.join(lsts[_][:-1]) for _ in L_]
            prefixes = shortest_unique_names(
                strs_unique_new, sep, max_compression, guarantee_unique
            )
            for loc, prefix in zip(L_, prefixes):
                x = sep.join([prefix, s])
                if x.startswith(sep):
                    x = x[len(sep) :]
                mapping[strs_unique[loc]] = x
        strs_unique = [mapping[_] for _ in strs_unique]
    else:
        strs_unique = names
    if not max_compression:
        return strs_unique
    for i, s in enumerate(strs_unique):
        for j in range(1, len(s)):
            for k in itertools.chain(range(i), range(i + 1, len(strs_unique))):
                if strs_unique[k][:j] == s[:j]:
                    break
            else:
                strs_unique[i] = s[:j]
                break
    return strs_unique




[docs]
def snake_case(s: str) -> str:
    """Converts a string from CamelCase to snake_case

    Example:

        >>> print(snake_case("ThisIsATest"))
        this_is_a_test



    Args:
        s: input string

    Returns:
        str: converted string
    """
    return (
        re.sub("(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))", "_\\1", s).lower().strip("_")
    )




[docs]
def camel_case(
    s: str, capitalize_first: bool = False, lower_first: bool = False
) -> str:
    """Converts a string from snake_case to CamelCase

    Example:

        >>> print(camel_case("this_is_a_test"))
        thisIsATest
        >>> print(camel_case("this_is_a_test", capitalize_first=True))
        ThisIsATest
        >>> print(camel_case("This_is_a_test", lower_first=True))
        thisIsATest
        >>> print(camel_case("This_is_a_test"))
        ThisIsATest

    Args:
        s: input string
        capitalize_first: if true, the first character will be capitalized
        lower_first: If true, the first character will be lowered

    Returns:
        str: converted string
    """
    if len(s) < 1:
        return s
    parts = s.split("_")
    if capitalize_first:
        parts = [_.capitalize() for _ in parts]
    elif lower_first:
        parts = [parts[0].lower()] + [_.capitalize() for _ in parts[1:]]
    else:
        parts = [parts[0]] + [_.capitalize() for _ in parts[1:]]

    return "".join(parts)



def _pretty_string(src, dpth=0, current_key="", tab_size=2) -> str:
    """Recursively print nested elements.

    Args:
        dpth (int): Current depth
        current_key (str): Current key being printed
        tab_size: Tab size in spaces

    Returns:
        str: The pretty version of the input
    """

    def tabs(n):
        return " " * n * tab_size  # or 2 or 8 or...

    output = ""
    if isinstance(src, dict):
        output += tabs(dpth) + "{\n"
        for key, value in src.items():
            output += _pretty_string(value, dpth + 1, key) + "\n"
        output += tabs(dpth) + "}"
    elif isinstance(src, list) or isinstance(src, tuple):
        output += tabs(dpth) + "[\n"
        for litem in src:
            output += _pretty_string(litem, dpth + 1) + "\n"
        output += tabs(dpth) + "]"
    else:
        if len(current_key) > 0:
            output += tabs(dpth) + f'"{current_key}":{src}'
        else:
            output += tabs(dpth) + "%s" % src
    return output



[docs]
def pretty_string(src, tab_size=2, compact=False) -> str:
    """Recursively print nested elements.

    Args:
        src (Any): The source to be converted to a printable string
        tab_size (int): Tab size in spaces
        compact (bool): If true the output is  converted into a single line

    Returns:
        str: The pretty version of the input

    Remarks:
        - This function assumes that the patterns `` "`` and ``":`` do not appear anywhere in the input.
          If they appear, the space, : will be removed.
    """
    s = _pretty_string(src, dpth=0, current_key="", tab_size=tab_size)
    if compact:
        return s.replace("\n", "")

    else:
        return s.replace(' "', " ").replace('":', ":")




[docs]
def exception2str(limit=None, chain=True) -> str:
    return traceback.format_exc(limit=limit, chain=chain)




[docs]
def humanize_time(
    secs: int | float | None,
    align=False,
    always_show_all_units=False,
    show_us=False,
    show_ms=False,
    always_show_from="",
) -> str | None:
    """
    Prints time that is given as seconds in human readable form. Useful only for times >=1sec.

    :param secs: float: number of seconds
    :param align: bool, optional: whether to align outputs so that they all take the same size (not implemented)
    :param always_show_all_units: bool, optional: Whether to always show days, hours, and minutes even when they
                                are zeros. default False
    :param always_show_from: One of d,h,m,s,ms,u (day, hour, minute, second, milli-sec, micro-sec) to always show
                             as well as everything shorter than it (i.e passing 'm' shows minutes, seconds, ... etc)
    :param show_us: bool, if given microseconds and milliseconds will be shown
    :param show_ms: bool, if given milliseconds will be shown
    :return: str: formated string with the humanized form
    """
    if secs is None:
        return None
    if show_us:
        secs *= 1_000_000
        units = [
            ("d", 86400_000_000),
            ("h", 3600_000_000),
            ("m", 60_000_000),
            ("s", 1_000_000),
            ("ms", 1000),
            ("u", 1),
        ]
    elif show_ms:
        secs *= 1_000
        units = [
            ("d", 86400_000),
            ("h", 3600_000),
            ("m", 60_000),
            ("s", 1_000),
            ("ms", 1),
        ]
    else:
        units = [("d", 86400), ("h", 3600), ("m", 60), ("s", 1)]
    parts = []
    for unit, mul in units:
        if unit == always_show_from:
            always_show_all_units = True
        if secs / mul >= 1 or mul == 1 or always_show_all_units:
            if mul > 1:
                n = int(math.floor(secs / mul))
                secs -= n * mul
            else:
                n = secs if secs != int(secs) else int(secs)
            if align:
                parts.append("%2d%s%s" % (n, unit, ""))
            else:
                parts.append("%2d%s%s" % (n, unit, ""))
    return str("".join(parts))