#!/usr/bin/env python
"""
A set of utilities to handle strings
"""
from __future__ import annotations
import datetime
import itertools
import math
import random
import re
import socket
import string
import traceback
from collections import defaultdict
__all__ = [
"shortest_unique_names",
"snake_case",
"camel_case",
"unique_name",
"pretty_string",
"exception2str",
"humanize_time",
"shorten",
]
COMMON_NAME_PARTS = (
"Mechanism",
"Negotiator",
"Agent",
"Controller",
"Acceptance",
"Component",
"Model",
"Strategy",
"Offering",
"Entity",
)
"""Default parts of names removed by `shorten` """
[docs]
def shorten(name: str, length: int = 4, common_parts=COMMON_NAME_PARTS) -> str:
"""
Returns a short version of the name.
Remarks:
- Removes common parts of names in negmas like Negotiator, Agent, etc
- Keeps Capital letters up to the given length
- Adds some of the lowercase letters to fit the length
- If the input is shorter than the length, it is returned as it is
"""
for p in common_parts:
if len(name) <= len(p):
continue
name = name.replace(p, "")
if len(name) <= length:
return name
caps = [_ for _ in name if _.isupper()]
if len(caps) >= length:
return "".join(caps[:length])
needed = length - len(caps)
caps = []
for c in name:
if len(caps) >= length:
break
if c.isupper():
caps.append(c)
continue
if needed < 1:
continue
needed -= 1
caps.append(c)
return str("".join(caps[:length]))
[docs]
def unique_name(base, add_time=True, add_host=False, rand_digits=8, sep="/") -> str:
"""Return a unique name.
Can be used to return a unique directory name on the givn base.
Args:
base: (any): base path/string (it is converted to string whatever it is)
add_time (bool, optional): Defaults to True. Add current time
rand_digits (int, optional): Defaults to 8. The number of random
characters to add to the name
Examples:
>>> a = unique_name("")
>>> len(a) == 8 + 1 + 6 + 8 + 6
True
Returns:
str: The unique name.
"""
base = str(base)
_time, rand_part = "", ""
host_part = socket.gethostname() if add_host else ""
if rand_digits > 0:
rand_part = "".join(
random.choices(string.digits + string.ascii_letters, k=rand_digits)
)
if add_time:
_time = datetime.datetime.now().strftime("%Y%m%dH%H%M%S%f")
sub = _time + host_part + rand_part
if len(sub) == 0:
return base
if len(base) == 0:
return sub
return f"{str(base)}{sep}{sub}"
[docs]
def shortest_unique_names(
strs: list[str], sep=".", max_compression=False, guarantee_unique=False
):
"""
Finds the shortest unique strings starting from the end of each input
string based on the separator.
The final strings will only be unique if the inputs are unique.
Args:
strs: A list of strings
sep: The separator used to separate fields in each string
max_compression: If True, each string will be further compressed
by taking the shortest prefix that keeps the
strings unique (if they were originally unique)
guarantee_unique: If given, random characters will be postfixed on
strings to guarantee uniquness
Example:
given ["a.b.cat", "d.e.f", "a.d.cat"] it will generate ["b.c", "f", "d.cat"]
if max_compression was false and will generate ["b", "f", "d"] if it was
True
"""
if len(strs) < 2:
return strs
strs_unique = [_ for _ in strs]
if guarantee_unique and len(set(strs)) != len(strs):
chars = string.digits + string.ascii_letters
for i in range(len(strs_unique) - 1):
others = set(strs_unique[:i] + strs_unique[i + 1 :])
while strs_unique[i] in others:
for a in chars:
if strs_unique[i] + a not in others:
strs_unique[i] = strs_unique[i] + a
break
else:
strs_unique[i] = strs_unique[i] + unique_name(
"", False, False, 1, ""
)
lsts = [_.split(sep) for _ in strs_unique]
names = [_[-1] for _ in lsts]
if len(names) != len(set(names)):
locs = defaultdict(list)
for i, s in enumerate(names):
locs[s].append(i)
mapping = {"": ""}
for s, L_ in locs.items():
if len(s) < 1:
continue
if len(L_) == 1:
mapping[strs_unique[L_[0]]] = s
continue
strs_unique_new = [sep.join(lsts[_][:-1]) for _ in L_]
prefixes = shortest_unique_names(
strs_unique_new, sep, max_compression, guarantee_unique
)
for loc, prefix in zip(L_, prefixes):
x = sep.join([prefix, s])
if x.startswith(sep):
x = x[len(sep) :]
mapping[strs_unique[loc]] = x
strs_unique = [mapping[_] for _ in strs_unique]
else:
strs_unique = names
if not max_compression:
return strs_unique
for i, s in enumerate(strs_unique):
for j in range(1, len(s)):
for k in itertools.chain(range(i), range(i + 1, len(strs_unique))):
if strs_unique[k][:j] == s[:j]:
break
else:
strs_unique[i] = s[:j]
break
return strs_unique
[docs]
def snake_case(s: str) -> str:
"""Converts a string from CamelCase to snake_case
Example:
>>> print(snake_case("ThisIsATest"))
this_is_a_test
Args:
s: input string
Returns:
str: converted string
"""
return (
re.sub("(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))", "_\\1", s).lower().strip("_")
)
[docs]
def camel_case(
s: str, capitalize_first: bool = False, lower_first: bool = False
) -> str:
"""Converts a string from snake_case to CamelCase
Example:
>>> print(camel_case("this_is_a_test"))
thisIsATest
>>> print(camel_case("this_is_a_test", capitalize_first=True))
ThisIsATest
>>> print(camel_case("This_is_a_test", lower_first=True))
thisIsATest
>>> print(camel_case("This_is_a_test"))
ThisIsATest
Args:
s: input string
capitalize_first: if true, the first character will be capitalized
lower_first: If true, the first character will be lowered
Returns:
str: converted string
"""
if len(s) < 1:
return s
parts = s.split("_")
if capitalize_first:
parts = [_.capitalize() for _ in parts]
elif lower_first:
parts = [parts[0].lower()] + [_.capitalize() for _ in parts[1:]]
else:
parts = [parts[0]] + [_.capitalize() for _ in parts[1:]]
return "".join(parts)
def _pretty_string(src, dpth=0, current_key="", tab_size=2) -> str:
"""Recursively print nested elements.
Args:
dpth (int): Current depth
current_key (str): Current key being printed
tab_size: Tab size in spaces
Returns:
str: The pretty version of the input
"""
def tabs(n):
return " " * n * tab_size # or 2 or 8 or...
output = ""
if isinstance(src, dict):
output += tabs(dpth) + "{\n"
for key, value in src.items():
output += _pretty_string(value, dpth + 1, key) + "\n"
output += tabs(dpth) + "}"
elif isinstance(src, list) or isinstance(src, tuple):
output += tabs(dpth) + "[\n"
for litem in src:
output += _pretty_string(litem, dpth + 1) + "\n"
output += tabs(dpth) + "]"
else:
if len(current_key) > 0:
output += tabs(dpth) + f'"{current_key}":{src}'
else:
output += tabs(dpth) + "%s" % src
return output
[docs]
def pretty_string(src, tab_size=2, compact=False) -> str:
"""Recursively print nested elements.
Args:
src (Any): The source to be converted to a printable string
tab_size (int): Tab size in spaces
compact (bool): If true the output is converted into a single line
Returns:
str: The pretty version of the input
Remarks:
- This function assumes that the patterns `` "`` and ``":`` do not appear anywhere in the input.
If they appear, the space, : will be removed.
"""
s = _pretty_string(src, dpth=0, current_key="", tab_size=tab_size)
if compact:
return s.replace("\n", "")
else:
return s.replace(' "', " ").replace('":', ":")
[docs]
def exception2str(limit=None, chain=True) -> str:
return traceback.format_exc(limit=limit, chain=chain)
[docs]
def humanize_time(
secs: int | float | None,
align=False,
always_show_all_units=False,
show_us=False,
show_ms=False,
always_show_from="",
) -> str | None:
"""
Prints time that is given as seconds in human readable form. Useful only for times >=1sec.
:param secs: float: number of seconds
:param align: bool, optional: whether to align outputs so that they all take the same size (not implemented)
:param always_show_all_units: bool, optional: Whether to always show days, hours, and minutes even when they
are zeros. default False
:param always_show_from: One of d,h,m,s,ms,u (day, hour, minute, second, milli-sec, micro-sec) to always show
as well as everything shorter than it (i.e passing 'm' shows minutes, seconds, ... etc)
:param show_us: bool, if given microseconds and milliseconds will be shown
:param show_ms: bool, if given milliseconds will be shown
:return: str: formated string with the humanized form
"""
if secs is None:
return None
if show_us:
secs *= 1_000_000
units = [
("d", 86400_000_000),
("h", 3600_000_000),
("m", 60_000_000),
("s", 1_000_000),
("ms", 1000),
("u", 1),
]
elif show_ms:
secs *= 1_000
units = [
("d", 86400_000),
("h", 3600_000),
("m", 60_000),
("s", 1_000),
("ms", 1),
]
else:
units = [("d", 86400), ("h", 3600), ("m", 60), ("s", 1)]
parts = []
for unit, mul in units:
if unit == always_show_from:
always_show_all_units = True
if secs / mul >= 1 or mul == 1 or always_show_all_units:
if mul > 1:
n = int(math.floor(secs / mul))
secs -= n * mul
else:
n = secs if secs != int(secs) else int(secs)
if align:
parts.append("%2d%s%s" % (n, unit, ""))
else:
parts.append("%2d%s%s" % (n, unit, ""))
return str("".join(parts))