turbo_broccoli.context

A context object holds information about the (de)serialization process, such as the current position in the document, output paths, etc.

View Source

  1"""
  2A context object holds information about the (de)serialization process, such as
  3the current position in the document, output paths, etc.
  4"""
  5
  6import tempfile
  7from os import environ as ENV
  8from pathlib import Path
  9from typing import Literal
 10from uuid import uuid4
 11
 12from turbo_broccoli.exceptions import TypeIsNodecode
 13
 14
 15def _list_of_types_to_dict(lot: list[type]) -> dict[str, type]:
 16    """
 17    Converts a list of types `[T1, T2, ...]` to a dict that looks like `{"T1":
 18    T1, "T2": T2, ...}`.
 19    """
 20    return {t.__name__: t for t in lot}
 21
 22
 23# pylint: disable=too-many-instance-attributes
 24class Context:
 25    """
 26    (De)Serialization context, which is an object that contains various
 27    information and parameters about the ongoing operation. If you want your
 28    (de)serialization to behave a certain way, create a context object and pass
 29    it to
 30    [`turbo_broccoli.to_json`](https://altaris.github.io/turbo-broccoli/turbo_broccoli/turbo_broccoli.html#to_json)
 31    or
 32    [`turbo_broccoli.from_json`](https://altaris.github.io/turbo-broccoli/turbo_broccoli/turbo_broccoli.html#from_json).
 33    For convenience,
 34    [`turbo_broccoli.save_json`](https://altaris.github.io/turbo-broccoli/turbo_broccoli/turbo_broccoli.html#save_json)
 35    and
 36    [`turbo_broccoli.load_json`](https://altaris.github.io/turbo-broccoli/turbo_broccoli/turbo_broccoli.html#load_json)
 37    take the context parameter's as kwargs.
 38    """
 39
 40    artifact_path: Path
 41    dataclass_types: dict[str, type]
 42    file_path: Path | None
 43    json_path: str
 44    keras_format: str
 45    min_artifact_size: int = 8000
 46    nacl_shared_key: bytes | None
 47    nodecode_types: list[str]
 48    pandas_format: str
 49    pandas_kwargs: dict
 50    pytorch_module_types: dict[str, type]
 51    compress: bool
 52
 53    # pylint: disable=too-many-arguments
 54    def __init__(
 55        self,
 56        file_path: str | Path | None = None,
 57        artifact_path: str | Path | None = None,
 58        min_artifact_size: int | None = None,
 59        nodecode_types: list[str] | None = None,
 60        keras_format: Literal["keras", "tf", "h5"] | None = None,
 61        pandas_format: (
 62            Literal[
 63                "csv",
 64                "excel",
 65                "feather",
 66                "html",
 67                "json",
 68                "latex",
 69                "orc",
 70                "parquet",
 71                "pickle",
 72                "sql",
 73                "stata",
 74                "xml",
 75            ]
 76            | None
 77        ) = None,
 78        pandas_kwargs: dict | None = None,
 79        nacl_shared_key: bytes | None = None,
 80        dataclass_types: dict[str, type] | list[type] | None = None,
 81        pytorch_module_types: dict[str, type] | list[type] | None = None,
 82        json_path: str = "$",
 83        compress: bool = False,
 84    ) -> None:
 85        """
 86        Args:
 87            file_path (str | Path | None, optional): Output JSON file path.
 88            artifact_path (str | Path | None, optional): Artifact path.
 89                Defaults to the parent directory of `file_path`, or a new
 90                temporary directory if `file_path` is `None`.
 91            min_artifact_size (int, optional): Byte strings (and everything
 92                that serialize to byte strings such as numpy arrays) larget
 93                than this will be stored in artifact rather than be embedded in
 94                the output JSON string/file.
 95            nodecode_types (list[str], optional): List of type names which
 96                shall be deserialized to `None` rather than their true value.
 97                See
 98                [`TB_NODECODE`](https://altaris.github.io/turbo-broccoli/turbo_broccoli.html#environment-variables)
 99            keras_format ("keras", "tf", "h5", optional): Format for Keras
100                artifacts
101            pandas_format ("csv", "excel", "feather", "html", "json", "latex",
102                "orc", "parquet", "pickle", "sql", "stata", "xml", optional):
103                Format for pandas artifacts
104            pandas_kwargs (dict, optional): kwargs to forward to the pandas
105                `to_*` and `read_*` function. For example, if
106                `pandas_format="parquet"`, then the content of `pandas.kwargs`
107                will be forwarded to
108                [`pandas.DataFrame.to_parquet`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_parquet.html)
109                and
110                [`pandas.read_parquet`](https://pandas.pydata.org/docs/reference/api/pandas.read_parquet.html)
111            nacl_shared_key (bytes, optional): PyNaCl shared key. See also
112                [PyNaCl's
113                documentation](https://pynacl.readthedocs.io/en/latest/secret/#key)
114            dataclass_types (dict[str, type] | list[type], optional): List of
115                dataclass types for deserialization. See the
116                [README](https://altaris.github.io/turbo-broccoli/turbo_broccoli.html#supported-types).
117            pytorch_module_types (dict[str, type] | list[type], optional): List
118                of pytorch module types for deserialization. See the
119                [README](https://altaris.github.io/turbo-broccoli/turbo_broccoli.html#supported-types).
120            json_path (str, optional): Current JSONpath. Don't use.
121            compress (bool, optional): Wether to compress the output JSON file/
122                string. Defaults to `False`. If `file_path` is provided and
123                ends in `.json.gz`, then this parameter is overrode to `True`.
124        """
125        self.json_path = json_path
126        self.file_path = (
127            Path(file_path) if isinstance(file_path, str) else file_path
128        )
129        if artifact_path is None:
130            if p := ENV.get("TB_ARTIFACT_PATH"):
131                self.artifact_path = Path(p)
132            else:
133                self.artifact_path = (
134                    self.file_path.parent
135                    if self.file_path is not None
136                    else Path(tempfile.mkdtemp())
137                )
138        else:
139            self.artifact_path = Path(artifact_path)
140        self.min_artifact_size = (
141            min_artifact_size
142            if min_artifact_size is not None
143            else int(ENV.get("TB_MAX_NBYTES", 8000))
144        )
145        self.nodecode_types = nodecode_types or ENV.get(
146            "TB_NODECODE", ""
147        ).split(",")
148        self.keras_format = keras_format or str(
149            ENV.get("TB_KERAS_FORMAT", "tf")
150        )
151        self.pandas_format = pandas_format or str(
152            ENV.get("TB_PANDAS_FORMAT", "csv")
153        )
154        self.pandas_kwargs = pandas_kwargs or {}
155        if isinstance(nacl_shared_key, bytes):
156            self.nacl_shared_key = nacl_shared_key
157        elif "TB_SHARED_KEY" in ENV:
158            self.nacl_shared_key = str(ENV["TB_SHARED_KEY"]).encode("utf-8")
159        else:
160            self.nacl_shared_key = None
161        self.dataclass_types = (
162            _list_of_types_to_dict(dataclass_types)
163            if isinstance(dataclass_types, list)
164            else (dataclass_types or {})
165        )
166        self.pytorch_module_types = (
167            _list_of_types_to_dict(pytorch_module_types)
168            if isinstance(pytorch_module_types, list)
169            else (pytorch_module_types or {})
170        )
171        self.compress = (
172            True
173            if (
174                self.file_path is not None
175                and self.file_path.name.endswith(".json.gz")
176            )
177            else compress
178        )
179
180    def __repr__(self) -> str:
181        fp, ap = str(self.file_path), str(self.artifact_path)
182        return (
183            f"Context(file_path={fp}, artifact_path={ap}, "
184            f"json_path={self.json_path})"
185        )
186
187    def __truediv__(self, x: str | int) -> "Context":
188        """
189        Returns a copy of the current context but where the `json_path`
190        attribute is `self.json_path + "." + str(x)`. Use this when you're
191        going down the document.
192        """
193        kwargs = self.__dict__.copy()
194        kwargs["json_path"] = self.json_path + "." + str(x)
195        return Context(**kwargs)
196
197    def id_to_artifact_path(self, art_id: str, extension: str = "tb") -> Path:
198        """
199        Takes an artifact id (which is an UUID4 string) and returns the
200        absolute path to the corresponding artifact file.
201        """
202        art_fn = art_id + "." + extension
203        if self.file_path is not None:
204            art_fn = self.file_path.stem + "." + art_fn
205        return self.artifact_path / art_fn
206
207    def new_artifact_path(self, extension: str = "tb") -> tuple[Path, str]:
208        """Returns the path to a new artifact alongside the artifact's ID"""
209        art_id = str(uuid4())
210        return self.id_to_artifact_path(art_id, extension), art_id
211
212    def raise_if_nodecode(self, type_name: str) -> None:
213        """
214        Raises a `turbo_broccoli.exceptions.TypeIsNodecode` exception if
215        `type_name` or any prefix is set to not be decoded in this context (see
216        `nodecode_types` constructor argument).
217
218        For example, if `type_name` is `a.b.c`, then this method raises
219        `turbo_broccoli.exceptions.TypeIsNodecode` if either `a`, `a.b`, or
220        `a.b.c` is set as a nodecode type.
221        """
222        parts = type_name.split(".")
223        for i in range(1, len(parts) + 1):
224            t = ".".join(parts[:i])
225            if t in self.nodecode_types:
226                raise TypeIsNodecode(t)

class Context: View Source

 25class Context:
 26    """
 27    (De)Serialization context, which is an object that contains various
 28    information and parameters about the ongoing operation. If you want your
 29    (de)serialization to behave a certain way, create a context object and pass
 30    it to
 31    [`turbo_broccoli.to_json`](https://altaris.github.io/turbo-broccoli/turbo_broccoli/turbo_broccoli.html#to_json)
 32    or
 33    [`turbo_broccoli.from_json`](https://altaris.github.io/turbo-broccoli/turbo_broccoli/turbo_broccoli.html#from_json).
 34    For convenience,
 35    [`turbo_broccoli.save_json`](https://altaris.github.io/turbo-broccoli/turbo_broccoli/turbo_broccoli.html#save_json)
 36    and
 37    [`turbo_broccoli.load_json`](https://altaris.github.io/turbo-broccoli/turbo_broccoli/turbo_broccoli.html#load_json)
 38    take the context parameter's as kwargs.
 39    """
 40
 41    artifact_path: Path
 42    dataclass_types: dict[str, type]
 43    file_path: Path | None
 44    json_path: str
 45    keras_format: str
 46    min_artifact_size: int = 8000
 47    nacl_shared_key: bytes | None
 48    nodecode_types: list[str]
 49    pandas_format: str
 50    pandas_kwargs: dict
 51    pytorch_module_types: dict[str, type]
 52    compress: bool
 53
 54    # pylint: disable=too-many-arguments
 55    def __init__(
 56        self,
 57        file_path: str | Path | None = None,
 58        artifact_path: str | Path | None = None,
 59        min_artifact_size: int | None = None,
 60        nodecode_types: list[str] | None = None,
 61        keras_format: Literal["keras", "tf", "h5"] | None = None,
 62        pandas_format: (
 63            Literal[
 64                "csv",
 65                "excel",
 66                "feather",
 67                "html",
 68                "json",
 69                "latex",
 70                "orc",
 71                "parquet",
 72                "pickle",
 73                "sql",
 74                "stata",
 75                "xml",
 76            ]
 77            | None
 78        ) = None,
 79        pandas_kwargs: dict | None = None,
 80        nacl_shared_key: bytes | None = None,
 81        dataclass_types: dict[str, type] | list[type] | None = None,
 82        pytorch_module_types: dict[str, type] | list[type] | None = None,
 83        json_path: str = "$",
 84        compress: bool = False,
 85    ) -> None:
 86        """
 87        Args:
 88            file_path (str | Path | None, optional): Output JSON file path.
 89            artifact_path (str | Path | None, optional): Artifact path.
 90                Defaults to the parent directory of `file_path`, or a new
 91                temporary directory if `file_path` is `None`.
 92            min_artifact_size (int, optional): Byte strings (and everything
 93                that serialize to byte strings such as numpy arrays) larget
 94                than this will be stored in artifact rather than be embedded in
 95                the output JSON string/file.
 96            nodecode_types (list[str], optional): List of type names which
 97                shall be deserialized to `None` rather than their true value.
 98                See
 99                [`TB_NODECODE`](https://altaris.github.io/turbo-broccoli/turbo_broccoli.html#environment-variables)
100            keras_format ("keras", "tf", "h5", optional): Format for Keras
101                artifacts
102            pandas_format ("csv", "excel", "feather", "html", "json", "latex",
103                "orc", "parquet", "pickle", "sql", "stata", "xml", optional):
104                Format for pandas artifacts
105            pandas_kwargs (dict, optional): kwargs to forward to the pandas
106                `to_*` and `read_*` function. For example, if
107                `pandas_format="parquet"`, then the content of `pandas.kwargs`
108                will be forwarded to
109                [`pandas.DataFrame.to_parquet`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_parquet.html)
110                and
111                [`pandas.read_parquet`](https://pandas.pydata.org/docs/reference/api/pandas.read_parquet.html)
112            nacl_shared_key (bytes, optional): PyNaCl shared key. See also
113                [PyNaCl's
114                documentation](https://pynacl.readthedocs.io/en/latest/secret/#key)
115            dataclass_types (dict[str, type] | list[type], optional): List of
116                dataclass types for deserialization. See the
117                [README](https://altaris.github.io/turbo-broccoli/turbo_broccoli.html#supported-types).
118            pytorch_module_types (dict[str, type] | list[type], optional): List
119                of pytorch module types for deserialization. See the
120                [README](https://altaris.github.io/turbo-broccoli/turbo_broccoli.html#supported-types).
121            json_path (str, optional): Current JSONpath. Don't use.
122            compress (bool, optional): Wether to compress the output JSON file/
123                string. Defaults to `False`. If `file_path` is provided and
124                ends in `.json.gz`, then this parameter is overrode to `True`.
125        """
126        self.json_path = json_path
127        self.file_path = (
128            Path(file_path) if isinstance(file_path, str) else file_path
129        )
130        if artifact_path is None:
131            if p := ENV.get("TB_ARTIFACT_PATH"):
132                self.artifact_path = Path(p)
133            else:
134                self.artifact_path = (
135                    self.file_path.parent
136                    if self.file_path is not None
137                    else Path(tempfile.mkdtemp())
138                )
139        else:
140            self.artifact_path = Path(artifact_path)
141        self.min_artifact_size = (
142            min_artifact_size
143            if min_artifact_size is not None
144            else int(ENV.get("TB_MAX_NBYTES", 8000))
145        )
146        self.nodecode_types = nodecode_types or ENV.get(
147            "TB_NODECODE", ""
148        ).split(",")
149        self.keras_format = keras_format or str(
150            ENV.get("TB_KERAS_FORMAT", "tf")
151        )
152        self.pandas_format = pandas_format or str(
153            ENV.get("TB_PANDAS_FORMAT", "csv")
154        )
155        self.pandas_kwargs = pandas_kwargs or {}
156        if isinstance(nacl_shared_key, bytes):
157            self.nacl_shared_key = nacl_shared_key
158        elif "TB_SHARED_KEY" in ENV:
159            self.nacl_shared_key = str(ENV["TB_SHARED_KEY"]).encode("utf-8")
160        else:
161            self.nacl_shared_key = None
162        self.dataclass_types = (
163            _list_of_types_to_dict(dataclass_types)
164            if isinstance(dataclass_types, list)
165            else (dataclass_types or {})
166        )
167        self.pytorch_module_types = (
168            _list_of_types_to_dict(pytorch_module_types)
169            if isinstance(pytorch_module_types, list)
170            else (pytorch_module_types or {})
171        )
172        self.compress = (
173            True
174            if (
175                self.file_path is not None
176                and self.file_path.name.endswith(".json.gz")
177            )
178            else compress
179        )
180
181    def __repr__(self) -> str:
182        fp, ap = str(self.file_path), str(self.artifact_path)
183        return (
184            f"Context(file_path={fp}, artifact_path={ap}, "
185            f"json_path={self.json_path})"
186        )
187
188    def __truediv__(self, x: str | int) -> "Context":
189        """
190        Returns a copy of the current context but where the `json_path`
191        attribute is `self.json_path + "." + str(x)`. Use this when you're
192        going down the document.
193        """
194        kwargs = self.__dict__.copy()
195        kwargs["json_path"] = self.json_path + "." + str(x)
196        return Context(**kwargs)
197
198    def id_to_artifact_path(self, art_id: str, extension: str = "tb") -> Path:
199        """
200        Takes an artifact id (which is an UUID4 string) and returns the
201        absolute path to the corresponding artifact file.
202        """
203        art_fn = art_id + "." + extension
204        if self.file_path is not None:
205            art_fn = self.file_path.stem + "." + art_fn
206        return self.artifact_path / art_fn
207
208    def new_artifact_path(self, extension: str = "tb") -> tuple[Path, str]:
209        """Returns the path to a new artifact alongside the artifact's ID"""
210        art_id = str(uuid4())
211        return self.id_to_artifact_path(art_id, extension), art_id
212
213    def raise_if_nodecode(self, type_name: str) -> None:
214        """
215        Raises a `turbo_broccoli.exceptions.TypeIsNodecode` exception if
216        `type_name` or any prefix is set to not be decoded in this context (see
217        `nodecode_types` constructor argument).
218
219        For example, if `type_name` is `a.b.c`, then this method raises
220        `turbo_broccoli.exceptions.TypeIsNodecode` if either `a`, `a.b`, or
221        `a.b.c` is set as a nodecode type.
222        """
223        parts = type_name.split(".")
224        for i in range(1, len(parts) + 1):
225            t = ".".join(parts[:i])
226            if t in self.nodecode_types:
227                raise TypeIsNodecode(t)

(De)Serialization context, which is an object that contains various information and parameters about the ongoing operation. If you want your (de)serialization to behave a certain way, create a context object and pass it to turbo_broccoli.to_json or turbo_broccoli.from_json. For convenience, turbo_broccoli.save_json and turbo_broccoli.load_json take the context parameter's as kwargs.

Context( file_path: str | pathlib.Path | None = None, artifact_path: str | pathlib.Path | None = None, min_artifact_size: int | None = None, nodecode_types: list[str] | None = None, keras_format: Optional[Literal['keras', 'tf', 'h5']] = None, pandas_format: Optional[Literal['csv', 'excel', 'feather', 'html', 'json', 'latex', 'orc', 'parquet', 'pickle', 'sql', 'stata', 'xml']] = None, pandas_kwargs: dict | None = None, nacl_shared_key: bytes | None = None, dataclass_types: dict[str, type] | list[type] | None = None, pytorch_module_types: dict[str, type] | list[type] | None = None, json_path: str = '$', compress: bool = False) View Source

 55    def __init__(
 56        self,
 57        file_path: str | Path | None = None,
 58        artifact_path: str | Path | None = None,
 59        min_artifact_size: int | None = None,
 60        nodecode_types: list[str] | None = None,
 61        keras_format: Literal["keras", "tf", "h5"] | None = None,
 62        pandas_format: (
 63            Literal[
 64                "csv",
 65                "excel",
 66                "feather",
 67                "html",
 68                "json",
 69                "latex",
 70                "orc",
 71                "parquet",
 72                "pickle",
 73                "sql",
 74                "stata",
 75                "xml",
 76            ]
 77            | None
 78        ) = None,
 79        pandas_kwargs: dict | None = None,
 80        nacl_shared_key: bytes | None = None,
 81        dataclass_types: dict[str, type] | list[type] | None = None,
 82        pytorch_module_types: dict[str, type] | list[type] | None = None,
 83        json_path: str = "$",
 84        compress: bool = False,
 85    ) -> None:
 86        """
 87        Args:
 88            file_path (str | Path | None, optional): Output JSON file path.
 89            artifact_path (str | Path | None, optional): Artifact path.
 90                Defaults to the parent directory of `file_path`, or a new
 91                temporary directory if `file_path` is `None`.
 92            min_artifact_size (int, optional): Byte strings (and everything
 93                that serialize to byte strings such as numpy arrays) larget
 94                than this will be stored in artifact rather than be embedded in
 95                the output JSON string/file.
 96            nodecode_types (list[str], optional): List of type names which
 97                shall be deserialized to `None` rather than their true value.
 98                See
 99                [`TB_NODECODE`](https://altaris.github.io/turbo-broccoli/turbo_broccoli.html#environment-variables)
100            keras_format ("keras", "tf", "h5", optional): Format for Keras
101                artifacts
102            pandas_format ("csv", "excel", "feather", "html", "json", "latex",
103                "orc", "parquet", "pickle", "sql", "stata", "xml", optional):
104                Format for pandas artifacts
105            pandas_kwargs (dict, optional): kwargs to forward to the pandas
106                `to_*` and `read_*` function. For example, if
107                `pandas_format="parquet"`, then the content of `pandas.kwargs`
108                will be forwarded to
109                [`pandas.DataFrame.to_parquet`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_parquet.html)
110                and
111                [`pandas.read_parquet`](https://pandas.pydata.org/docs/reference/api/pandas.read_parquet.html)
112            nacl_shared_key (bytes, optional): PyNaCl shared key. See also
113                [PyNaCl's
114                documentation](https://pynacl.readthedocs.io/en/latest/secret/#key)
115            dataclass_types (dict[str, type] | list[type], optional): List of
116                dataclass types for deserialization. See the
117                [README](https://altaris.github.io/turbo-broccoli/turbo_broccoli.html#supported-types).
118            pytorch_module_types (dict[str, type] | list[type], optional): List
119                of pytorch module types for deserialization. See the
120                [README](https://altaris.github.io/turbo-broccoli/turbo_broccoli.html#supported-types).
121            json_path (str, optional): Current JSONpath. Don't use.
122            compress (bool, optional): Wether to compress the output JSON file/
123                string. Defaults to `False`. If `file_path` is provided and
124                ends in `.json.gz`, then this parameter is overrode to `True`.
125        """
126        self.json_path = json_path
127        self.file_path = (
128            Path(file_path) if isinstance(file_path, str) else file_path
129        )
130        if artifact_path is None:
131            if p := ENV.get("TB_ARTIFACT_PATH"):
132                self.artifact_path = Path(p)
133            else:
134                self.artifact_path = (
135                    self.file_path.parent
136                    if self.file_path is not None
137                    else Path(tempfile.mkdtemp())
138                )
139        else:
140            self.artifact_path = Path(artifact_path)
141        self.min_artifact_size = (
142            min_artifact_size
143            if min_artifact_size is not None
144            else int(ENV.get("TB_MAX_NBYTES", 8000))
145        )
146        self.nodecode_types = nodecode_types or ENV.get(
147            "TB_NODECODE", ""
148        ).split(",")
149        self.keras_format = keras_format or str(
150            ENV.get("TB_KERAS_FORMAT", "tf")
151        )
152        self.pandas_format = pandas_format or str(
153            ENV.get("TB_PANDAS_FORMAT", "csv")
154        )
155        self.pandas_kwargs = pandas_kwargs or {}
156        if isinstance(nacl_shared_key, bytes):
157            self.nacl_shared_key = nacl_shared_key
158        elif "TB_SHARED_KEY" in ENV:
159            self.nacl_shared_key = str(ENV["TB_SHARED_KEY"]).encode("utf-8")
160        else:
161            self.nacl_shared_key = None
162        self.dataclass_types = (
163            _list_of_types_to_dict(dataclass_types)
164            if isinstance(dataclass_types, list)
165            else (dataclass_types or {})
166        )
167        self.pytorch_module_types = (
168            _list_of_types_to_dict(pytorch_module_types)
169            if isinstance(pytorch_module_types, list)
170            else (pytorch_module_types or {})
171        )
172        self.compress = (
173            True
174            if (
175                self.file_path is not None
176                and self.file_path.name.endswith(".json.gz")
177            )
178            else compress
179        )

Args: file_path (str | Path | None, optional): Output JSON file path. artifact_path (str | Path | None, optional): Artifact path. Defaults to the parent directory of file_path, or a new temporary directory if file_path is None. min_artifact_size (int, optional): Byte strings (and everything that serialize to byte strings such as numpy arrays) larget than this will be stored in artifact rather than be embedded in the output JSON string/file. nodecode_types (list[str], optional): List of type names which shall be deserialized to None rather than their true value. See TB_NODECODE keras_format ("keras", "tf", "h5", optional): Format for Keras artifacts pandas_format ("csv", "excel", "feather", "html", "json", "latex", "orc", "parquet", "pickle", "sql", "stata", "xml", optional): Format for pandas artifacts pandas_kwargs (dict, optional): kwargs to forward to the pandas to_* and read_* function. For example, if pandas_format="parquet", then the content of pandas.kwargs will be forwarded to pandas.DataFrame.to_parquet and pandas.read_parquet nacl_shared_key (bytes, optional): PyNaCl shared key. See also PyNaCl's documentation dataclass_types (dict[str, type] | list[type], optional): List of dataclass types for deserialization. See the README. pytorch_module_types (dict[str, type] | list[type], optional): List of pytorch module types for deserialization. See the README. json_path (str, optional): Current JSONpath. Don't use. compress (bool, optional): Wether to compress the output JSON file/ string. Defaults to False. If file_path is provided and ends in .json.gz, then this parameter is overrode to True.

artifact_path: pathlib.Path

dataclass_types: dict[str, type]

file_path: pathlib.Path | None

json_path: str

keras_format: str

min_artifact_size: int = 8000

nacl_shared_key: bytes | None

nodecode_types: list[str]

pandas_format: str

pandas_kwargs: dict

pytorch_module_types: dict[str, type]

compress: bool

def id_to_artifact_path(self, art_id: str, extension: str = 'tb') -> pathlib.Path: View Source

198    def id_to_artifact_path(self, art_id: str, extension: str = "tb") -> Path:
199        """
200        Takes an artifact id (which is an UUID4 string) and returns the
201        absolute path to the corresponding artifact file.
202        """
203        art_fn = art_id + "." + extension
204        if self.file_path is not None:
205            art_fn = self.file_path.stem + "." + art_fn
206        return self.artifact_path / art_fn

Takes an artifact id (which is an UUID4 string) and returns the absolute path to the corresponding artifact file.

def new_artifact_path(self, extension: str = 'tb') -> tuple[pathlib.Path, str]: View Source

208    def new_artifact_path(self, extension: str = "tb") -> tuple[Path, str]:
209        """Returns the path to a new artifact alongside the artifact's ID"""
210        art_id = str(uuid4())
211        return self.id_to_artifact_path(art_id, extension), art_id

Returns the path to a new artifact alongside the artifact's ID

def raise_if_nodecode(self, type_name: str) -> None: View Source

213    def raise_if_nodecode(self, type_name: str) -> None:
214        """
215        Raises a `turbo_broccoli.exceptions.TypeIsNodecode` exception if
216        `type_name` or any prefix is set to not be decoded in this context (see
217        `nodecode_types` constructor argument).
218
219        For example, if `type_name` is `a.b.c`, then this method raises
220        `turbo_broccoli.exceptions.TypeIsNodecode` if either `a`, `a.b`, or
221        `a.b.c` is set as a nodecode type.
222        """
223        parts = type_name.split(".")
224        for i in range(1, len(parts) + 1):
225            t = ".".join(parts[:i])
226            if t in self.nodecode_types:
227                raise TypeIsNodecode(t)

Raises a turbo_broccoli.exceptions.TypeIsNodecode exception if type_name or any prefix is set to not be decoded in this context (see nodecode_types constructor argument).

For example, if type_name is a.b.c, then this method raises turbo_broccoli.exceptions.TypeIsNodecode if either a, a.b, or a.b.c is set as a nodecode type.