Source code for negmas.serialization

"""
Implements serialization to and from strings and secondary storage.

"""

from __future__ import annotations
import importlib
import types
from pathlib import Path
from typing import Any, Callable, Iterable
import sys

import cloudpickle
import numpy as np
from pandas import json_normalize

from negmas import warnings

from .helpers import (
    TYPE_START,
    get_class,
    get_full_type_name,
    is_jsonable,
    is_lambda_or_partial_function,
    is_not_lambda_nor_partial_function,
)
from .helpers.inout import dump, load

__all__ = [
    "serialize",
    "deserialize",
    "dump",
    "load",
    "to_flat_dict",
    "PYTHON_CLASS_IDENTIFIER",
]

PYTHON_CLASS_IDENTIFIER = "__python_class__"
PATH_START = "__PATH__:"
LAMBDA_START = b"__LAMBDAOBJ__:"
FUNCTION_START = b"__FUNCTION_START__:"
# JSON_START = b"__JSON_START__:"
CLOUDPICKLE_START = b"__CLOUDPICKLE_START__:"
SPECIAL_FIELDS = ("_NamedObject__uuid", "_NamedObject__name")
SPECIAL_FIELDS_SHORT_NAMES = ("id", "name")



[docs]
def serialize(
    value,
    deep=True,
    add_type_field=True,
    keep_private=False,
    ignore_methods=True,
    ignore_lambda=False,
    shorten_type_field=False,
    objmem=None,
    python_class_identifier=PYTHON_CLASS_IDENTIFIER,
):
    """
    Encodes the given value as nothing more complex than simple dict
    of either dicts, lists or builtin numeric or string values. The resulting
    dictionary will be json serializable

    Args:

        value: Any object
        deep: Whether we should go deep in the encoding or do a shallow encoding
        add_type_field: Whether to add a type field. If True, A field named `PYTHON_CLASS_IDENTIFIER` will be added
        giving the type of `value`
        keep_private: Keeps fields starting with "_"
        shorten_type_field: IF given, the type will be shortened to class name only if it starts with "negmas."

    Remarks:

        - All iterables are converted to lists when `deep` is true.
        - If the `value` object has a `to_dict` member, it will be called to
          do the conversion, otherwise its `__dict__` or `__slots__` member
          will be used.

    See Also:
          `deserialize`, `PYTHON_CLASS_IDENTIFIER`

    """

    def add_to_mem(x, objmem):
        if not objmem:
            objmem = {id(x)}
        else:
            objmem.add(id(x))
        return objmem

    def good_field(k: str, v, objmem):
        if not isinstance(k, str):
            return True
        if objmem and id(v) in objmem:
            return False
        if ignore_lambda and is_lambda_or_partial_function(v):
            return False
        if ignore_methods and is_not_lambda_nor_partial_function(v):
            return False
        if not isinstance(k, str):
            return False
        return keep_private or not (k != python_class_identifier and k.startswith("_"))

    def adjust_dict(d):
        if not isinstance(d, dict):
            return d
        for a, b in zip(SPECIAL_FIELDS, SPECIAL_FIELDS_SHORT_NAMES):
            if a in d.keys():
                if b in d.keys() and d[b] != d[a]:
                    warnings.warn(
                        f"Field {a} and {b} already exist and are not equal.",
                        warnings.NegmasSarializationWarning,
                    )
                d[b] = d[a]
                del d[b]
        return d

    if value is None:
        return None

    def get_type_field(value):
        t = value.__class__.__name__
        if shorten_type_field and t.startswith("negmas."):
            return t
        return value.__class__.__module__ + "." + t

    if isinstance(value, dict):
        if not deep:
            return adjust_dict({k: v for k, v in value.items()})
        return adjust_dict(
            {
                k: serialize(
                    v,
                    deep=deep,
                    add_type_field=add_type_field,
                    objmem=objmem,
                    python_class_identifier=python_class_identifier,
                )
                for k, v in value.items()
                if good_field(k, v, objmem)
            }
        )
    if isinstance(value, Iterable) and not deep:
        # add_to_mem(value)
        return value
    if isinstance(value, type):
        return TYPE_START + get_full_type_name(value)
    if isinstance(value, Path):
        return PATH_START + str(value)
    # if isinstance(value, np.ndarray):
    #     return value.tolist()
    if isinstance(value, (list, tuple)) and not isinstance(value, str):
        objmem = add_to_mem(value, objmem)
        try:
            return adjust_dict(
                type(value)(
                    serialize(
                        _,
                        deep=deep,
                        add_type_field=add_type_field,
                        objmem=objmem,
                        python_class_identifier=python_class_identifier,
                    )
                    for _ in value
                )
            )
        except Exception:
            return adjust_dict(
                [
                    serialize(
                        _,
                        deep=deep,
                        add_type_field=add_type_field,
                        objmem=objmem,
                        python_class_identifier=python_class_identifier,
                    )
                    for _ in value
                ]
            )

    def convertwith(value, method, pass_identifier=False):
        if hasattr(value, method) and isinstance(
            getattr(value, method), types.MethodType
        ):
            if pass_identifier:
                converted = getattr(value, method)(
                    python_class_identifier=python_class_identifier
                )  # type: ignore
            else:
                converted = getattr(value, method)()  # type: ignore
            if isinstance(converted, dict):
                if add_type_field and (python_class_identifier not in converted.keys()):
                    converted[python_class_identifier] = get_type_field(value)
                return adjust_dict({k: v for k, v in converted.items()})
            else:
                return adjust_dict(converted)

    for method in ("to_dict", "asdict", "dict"):
        try:
            converted = convertwith(value, method, pass_identifier=True)
        except Exception:
            converted = convertwith(value, method, pass_identifier=False)
        if converted is not None:
            return converted
    if isinstance(value, str):
        return value
    if isinstance(value, bytes):
        if (
            value.startswith(FUNCTION_START)
            or value.startswith(LAMBDA_START)
            or value.startswith(CLOUDPICKLE_START)
            # or value.startswith(JSON_START)
        ):
            warnings.warn(
                f"{value} starts with a reserved part!! Will just keep it as"
                f" it is. May be you are serializing an already serialized object",
                warnings.NegmasSarializationWarning,
            )
        return value

    if is_lambda_or_partial_function(value):
        return LAMBDA_START + cloudpickle.dumps(value)

    if is_not_lambda_nor_partial_function(value):
        return FUNCTION_START + cloudpickle.dumps(value)

    if hasattr(value, "__dict__"):
        if deep:
            objmem = add_to_mem(value, objmem)
            d = {
                k: serialize(
                    v,
                    deep=deep,
                    add_type_field=add_type_field,
                    objmem=objmem,
                    python_class_identifier=python_class_identifier,
                )
                for k, v in value.__dict__.items()
                if good_field(k, v, objmem)
            }
        else:
            d = {k: v for k, v in value.__dict__.items() if good_field(k, v, objmem)}
        if add_type_field:
            d[python_class_identifier] = get_type_field(value)
        return adjust_dict(d)

    if hasattr(value, "__slots__"):
        if deep:
            objmem = add_to_mem(value, objmem)
            d = dict(
                zip(
                    (k for k in value.__slots__),  # type: ignore
                    (
                        serialize(
                            getattr(value, _),
                            deep=deep,
                            add_type_field=add_type_field,
                            objmem=objmem,
                            python_class_identifier=python_class_identifier,
                        )
                        for _ in value.__slots__  # type: ignore
                    ),
                )
            )
        else:
            d = dict(
                zip(
                    (k for k in value.__slots__),  # type: ignore
                    (getattr(value, _) for _ in value.__slots__),  # type: ignore
                )
            )
        if add_type_field:
            d[python_class_identifier] = get_type_field(value)
        return adjust_dict(d)
    if isinstance(value, np.int64):  # type: ignore
        return int(value)
    # a builtin
    if is_jsonable(value):
        return value
    try:
        vv = CLOUDPICKLE_START + cloudpickle.dumps(value)
        return vv
    except Exception:
        pass
    warnings.warn(
        f"{value} of type {type(value)} is not serializable",
        warnings.NegmasSarializationWarning,
    )
    return value




[docs]
def to_flat_dict(
    value,
    deep=True,
    add_type_field=False,
    shorten_type_field=False,
    python_class_identifier=PYTHON_CLASS_IDENTIFIER,
) -> dict[str, Any]:
    """
    Encodes the given value as a flat dictionary

    Args:
        value: The value to be converted to a flat dictionary
        deep: Converting all sub-objects
        add_type_field: If true, a special field for the object type will be added
        shorten_type_field: If true, the type field will be shortened to just class name if it is defined in NegMAS

    Returns:

    """
    d = serialize(
        value,
        add_type_field=add_type_field,
        shorten_type_field=shorten_type_field,
        deep=deep,
        python_class_identifier=python_class_identifier,
    )
    if d is None:
        return {}
    if not isinstance(d, dict):
        raise ValueError(
            f"value is of type {type(value)} cannot be converted to a flat dict"
        )
    for k, v in d.items():
        if isinstance(v, list) or isinstance(v, tuple):
            d[k] = str(v)
    return json_normalize(d, errors="ignore", sep="_").to_dict(orient="records")[0]




[docs]
def deserialize(
    d: Any,
    deep=True,
    remove_type_field=True,
    keep_private=False,
    fallback_class_name: str | None = None,
    base_module: str = "",
    deep_ignore: bool = True,
    ignored_keys: tuple[str, ...] = tuple(),
    python_class_identifier=PYTHON_CLASS_IDENTIFIER,
    type_marker: str = TYPE_START,
    path_marker: str = PATH_START,
    lambda_marker: bytes = LAMBDA_START,
    function_marker: bytes = FUNCTION_START,
    cloudpickle_marker: bytes = CLOUDPICKLE_START,
    extra_paths: tuple[str | Path, ...] = tuple(),
    extra_modules: tuple[str, ...] = tuple(),
    type_name_adapter: Callable[[str], str] | None = None,
    path_adapter: Callable[[str], str] | None = None,
):
    """Decodes a dict/object coming from `serialize`

    Args:

        d: The value to be decoded. If it is not a dict, it is returned as it is.
        deep: If true, decode recursively
        remove_type_field: If true the field called `PYTHON_CLASS_IDENTIFIER` will be removed if found.
        keep_private: If given, private fields (starting with _) will be kept
        fallback_class_name: If given, it is used as the fall-back  type if ``PYTHON_CLASS_IDENTIFIER` is not in the dict.
        ignored_keys: Keys to ignore
        deep_ignore: if given, ignored keys are ignored in all components recusrively when deep is specified

    Remarks:

        - If the object is not a dict or if it has no `PYTHON_CLASS_IDENTIFIER` field and no `fallback_class_name` is
          given, the input `d` is returned as it is. It will not even be copied.

    See Also:
        `serialize`, `PYTHON_CLASS_IDENTIFIER`



    """

    def do_nothing(x):
        return x

    params_ = dict(
        deep=deep,
        remove_type_field=remove_type_field,
        keep_private=keep_private,
        fallback_class_name=fallback_class_name,
        base_module=base_module,
        deep_ignore=deep_ignore if deep_ignore else False,
        ignored_keys=ignored_keys if deep_ignore else tuple(),
        python_class_identifier=python_class_identifier,
        type_marker=type_marker,
        path_marker=path_marker,
        lambda_marker=lambda_marker,
        function_marker=function_marker,
        cloudpickle_marker=cloudpickle_marker,
        extra_paths=extra_paths,
        extra_modules=extra_modules,
        type_name_adapter=type_name_adapter,
        path_adapter=path_adapter,
    )

    if type_name_adapter is None:
        type_name_adapter = do_nothing
    if path_adapter is None:
        path_adapter = do_nothing
    for p in extra_paths:
        sys.path.append(str(p))
    for module in extra_modules:
        importlib.import_module(module)

    def good_field(k: str):
        if k in ignored_keys:
            return False
        if not isinstance(k, str):
            return True
        return keep_private or not (k != python_class_identifier and k.startswith("_"))

    if d is None or isinstance(d, int) or isinstance(d, float) or isinstance(d, str):
        return d
    if isinstance(d, dict):
        if remove_type_field:
            python_class_name = d.pop(python_class_identifier, fallback_class_name)
        else:
            python_class_name = d.get(python_class_identifier, fallback_class_name)
        if python_class_name is not None and python_class_name != "functools.partial":
            try:
                python_class = get_class(type_name_adapter(python_class_name))
            except Exception as e:
                if base_module:
                    python_class = get_class(
                        type_name_adapter(f"{base_module}.{python_class_name}")
                    )
                else:
                    raise e
            # we resolve sub-objects first from the dict if deep is specified before calling deserialize on the class
            if deep:
                d = {
                    k: deserialize(v, **params_)  # type: ignore
                    for k, v in d.items()
                    if good_field(k)
                }
            # deserialize needs to do a shallow conversion from a dict as deep conversion is taken care of already.
            #
            if hasattr(python_class, "from_dict"):
                try:
                    return python_class.from_dict(
                        {k: v for k, v in d.items() if k not in ignored_keys},
                        python_class_identifier=python_class_identifier,
                    )  # type: ignore
                except Exception:
                    return python_class.from_dict(
                        {k: v for k, v in d.items() if k not in ignored_keys}
                    )
            if deep:
                d = {
                    k: deserialize(v, **params_)  # type: ignore
                    for k, v in d.items()
                    if good_field(k)
                }
            else:
                d = {k: v for k, v in d.items() if good_field(k)}
            return python_class(**d)
        if not deep:
            return d
        return {k: deserialize(v, **params_) for k, v in d.items() if good_field(k)}  # type: ignore
    if not deep:
        return d
    if isinstance(d, str):
        if d.startswith(type_marker):
            return get_class(type_name_adapter(d[len(type_marker) :]))
        elif d.startswith(path_marker):
            return Path(path_adapter(d[len(path_marker) :]))
        return d
    if isinstance(d, bytes):
        if d.startswith(lambda_marker):
            return cloudpickle.loads(d[len(lambda_marker) :])
        if d.startswith(function_marker):
            return cloudpickle.loads(d[len(function_marker) :])
        if d.startswith(cloudpickle_marker):
            return cloudpickle.loads(d[len(cloudpickle_marker) :])
        # if d.startswith(JSON_START):
        #     return json.loads(d[JSON_START:])
        return d
    if isinstance(d, tuple) or isinstance(d, list):
        return type(d)(
            deserialize(_, **params_)  # type: ignore
            for _ in d
        )
    return d