Utilities

Utilities.

`Table` `dataclass`

Spreadsheet table.

Source code in named_xlsx/utils.py

@dataclass
class Table:
    """Spreadsheet table."""

    name: str
    sheet: str
    _table: openpyxl.worksheet.table.Table = field(repr=False)

    @property
    def cells(self):
        return self._table.ref

    def mapper_columns(self, **kwargs) -> dict[str, tuple[str, str]]:
        table_colnames = self._table.column_names
        cr = CellRange(self.cells)
        cr.shrink(**kwargs)
        table_colsindx = [col for row, col in cr.top]
        table_rowsindx = [row for row, col in cr.left]
        table_row_top, table_row_bottom = table_rowsindx[0], table_rowsindx[-1]

        data = {}
        for col_name, col_indx in zip(table_colnames, table_colsindx):
            col_letter = get_column_letter(col_indx)
            addr = f"{col_letter}{table_row_top}:{col_letter}{table_row_bottom}"
            data[col_name] = (self.sheet, addr)
        return data

`XLSXAddress` `dataclass`

Excel address, with or without sheet name, according to project conventions.

Examples:

>>> a = XLSXAddress("My Sheet!A10:A15")
>>> a
XLSXAddress(value='My Sheet!A10:A15')
>>> a.sheet
'My Sheet'
>>> a.coord
'A10:A15'
>>> a = XLSXAddress("A10:A15")
>>> a
XLSXAddress(value='A10:A15')
>>> a.sheet
>>> a.coord
'A10:A15'

Source code in named_xlsx/utils.py

@dataclass(frozen=True)
class XLSXAddress:
    """
    Excel address, with or without sheet name, according to project conventions.

    Examples
    --------
    >>> a = XLSXAddress("My Sheet!A10:A15")
    >>> a
    XLSXAddress(value='My Sheet!A10:A15')
    >>> a.sheet
    'My Sheet'
    >>> a.coord
    'A10:A15'
    >>> a = XLSXAddress("A10:A15")
    >>> a
    XLSXAddress(value='A10:A15')
    >>> a.sheet
    >>> a.coord
    'A10:A15'

    """

    value: str
    sheet: str = field(init=False, repr=False)
    coord: str = field(init=False, repr=False)
    _parsed: CellRange = field(init=False, repr=False)

    def __post_init__(self):

        value = self.value
        if " " in self.value and "!" in self.value:
            sheet, coord = self.value.rsplit("!")
            value = f"'{sheet}'!{coord}"
        try:
            _parsed = CellRange(value)
        except ValueError as e:
            raise ValueError(f"Cannot parse: {self.value=}") from e
        object.__setattr__(self, "_parsed", _parsed)
        object.__setattr__(self, "sheet", self._parsed.title)
        object.__setattr__(self, "coord", self._parsed.coord)

    @classmethod
    def from_parts(cls, sheet: str, coords: str):
        """
        Build instance from parts.

        Parameters
        ----------
        sheet
            Sheet name. Can be `None`.
        coords
            Coordinates. Must always be given.

        Examples
        --------
        >>> XLSXAddress.from_parts("My Sheet", "A10")
        XLSXAddress(value='My Sheet!A10')
        >>> XLSXAddress.from_parts(None, "A10")
        XLSXAddress(value='A10')

        """
        return cls(coords if sheet is None else f"{sheet}!{coords}")

    def as_array(self, order: str = "row", squeeze: bool = True) -> np.ndarray:
        """
        Generate coordinate array (row major: left-right then top-down).

        Parameters
        ----------
        order
            Specify the order, one of ['row', 'col'].
        squeeze
            Drop single-dimensional entries from the shape of the resulting array.

        Examples
        --------
        >>> XLSXAddress("A10:B11").as_array()
        array([['A10', 'B10'],
               ['A11', 'B11']], dtype='<U3')
        >>> XLSXAddress("Hello!A10:B11").as_array()
        array([['A10', 'B10'],
               ['A11', 'B11']], dtype='<U3')

        """
        d = dict(row=rows_from_range, col=cols_from_range)
        out = np.array(list(d[order](self.coord)))
        if squeeze:
            out = np.squeeze(out)
        return out

    @property
    def is_range(self) -> bool:
        """

        Examples
        --------
        >>> XLSXAddress("A10").is_range
        False
        >>> XLSXAddress("A10:B11").is_range
        True

        """
        if self.size == 1:
            return False
        if self.size > 1:
            return True
        raise ValueError(f"Unexpected {self.size=} for {self.value=}")

    @property
    def size(self) -> int:
        """
        Size of the address (number of elements).

        Examples
        --------
        >>> XLSXAddress("A10").size
        np.int64(1)
        >>> XLSXAddress("A10:A10").size
        np.int64(1)
        >>> XLSXAddress("A10:C11").size
        np.int64(6)
        >>> XLSXAddress("A10:A12").size
        np.int64(3)

        """
        return np.prod(list(self._parsed.size.values()))

    @property
    def shape(self) -> tuple[int, int]:
        """
        Shape of the address as `(row, column)`.

        Examples
        --------
        >>> XLSXAddress("A10").shape
        (1, 1)
        >>> XLSXAddress("A10:A10").shape
        (1, 1)
        >>> XLSXAddress("A10:C11").shape
        (2, 3)
        >>> XLSXAddress("A10:A12").shape
        (3, 1)

        """
        d = self._parsed.size
        return d["rows"], d["columns"]

    def format(self):
        return f"{self.sheet}!{self.coord}" if self.sheet is not None else self.coord

`is_range` `property`

Examples:

>>> XLSXAddress("A10").is_range
False
>>> XLSXAddress("A10:B11").is_range
True

`shape` `property`

Shape of the address as (row, column).

Examples:

>>> XLSXAddress("A10").shape
(1, 1)
>>> XLSXAddress("A10:A10").shape
(1, 1)
>>> XLSXAddress("A10:C11").shape
(2, 3)
>>> XLSXAddress("A10:A12").shape
(3, 1)

`size` `property`

Size of the address (number of elements).

Examples:

>>> XLSXAddress("A10").size
np.int64(1)
>>> XLSXAddress("A10:A10").size
np.int64(1)
>>> XLSXAddress("A10:C11").size
np.int64(6)
>>> XLSXAddress("A10:A12").size
np.int64(3)

`as_array(order='row', squeeze=True)`

Generate coordinate array (row major: left-right then top-down).

Parameters:

Name	Type	Description	Default
`order`	`str`	Specify the order, one of ['row', 'col'].	`'row'`
`squeeze`	`bool`	Drop single-dimensional entries from the shape of the resulting array.	`True`

Examples:

>>> XLSXAddress("A10:B11").as_array()
array([['A10', 'B10'],
       ['A11', 'B11']], dtype='<U3')
>>> XLSXAddress("Hello!A10:B11").as_array()
array([['A10', 'B10'],
       ['A11', 'B11']], dtype='<U3')

Source code in named_xlsx/utils.py

def as_array(self, order: str = "row", squeeze: bool = True) -> np.ndarray:
    """
    Generate coordinate array (row major: left-right then top-down).

    Parameters
    ----------
    order
        Specify the order, one of ['row', 'col'].
    squeeze
        Drop single-dimensional entries from the shape of the resulting array.

    Examples
    --------
    >>> XLSXAddress("A10:B11").as_array()
    array([['A10', 'B10'],
           ['A11', 'B11']], dtype='<U3')
    >>> XLSXAddress("Hello!A10:B11").as_array()
    array([['A10', 'B10'],
           ['A11', 'B11']], dtype='<U3')

    """
    d = dict(row=rows_from_range, col=cols_from_range)
    out = np.array(list(d[order](self.coord)))
    if squeeze:
        out = np.squeeze(out)
    return out

`from_parts(sheet, coords)` `classmethod`

Build instance from parts.

Parameters:

Name	Type	Description	Default
`sheet`	`str`	Sheet name. Can be `None`.	required
`coords`	`str`	Coordinates. Must always be given.	required

Examples:

>>> XLSXAddress.from_parts("My Sheet", "A10")
XLSXAddress(value='My Sheet!A10')
>>> XLSXAddress.from_parts(None, "A10")
XLSXAddress(value='A10')

Source code in named_xlsx/utils.py

@classmethod
def from_parts(cls, sheet: str, coords: str):
    """
    Build instance from parts.

    Parameters
    ----------
    sheet
        Sheet name. Can be `None`.
    coords
        Coordinates. Must always be given.

    Examples
    --------
    >>> XLSXAddress.from_parts("My Sheet", "A10")
    XLSXAddress(value='My Sheet!A10')
    >>> XLSXAddress.from_parts(None, "A10")
    XLSXAddress(value='A10')

    """
    return cls(coords if sheet is None else f"{sheet}!{coords}")

`get_destinations(defined_name, tables)`

Get plain spreadsheet address from defined names.

This is a middle-man function because openpyxl does not seem to deal with defined names that point to table columns. As per project conventions, tables are labelled t.<name>, so this function deals with that case separately with a bunch of hard-set conditions.

Source code in named_xlsx/utils.py

@cached
def get_destinations(
    defined_name: DefinedName, tables: dict[str, Table]
) -> list[tuple[str, str]]:
    """
    Get plain spreadsheet address from defined names.

    This is a middle-man function because openpyxl does not seem to deal with
    defined names that point to table columns.
    As per project conventions, tables are labelled `t.<name>`,
    so this function deals with that case separately with a bunch of
    hard-set conditions.

    """
    if defined_name.attr_text.startswith("t."):
        # Has a table as per project convention: with header and total rows
        # Get destination, shrinking range to data cells (without header and total rows).
        dest = defined_name.attr_text
        return [table_destination(dest, tables=tables, top=1, bottom=1)]
    return list(defined_name.destinations)

`nanaverage(arr, weights=None)`

Compute average ignoring nan values in arr and weights.

Examples:

>>> nanaverage(np.array([1, 2, 3]))
2.0
>>> nanaverage(np.array([np.nan, 2, 3]))
2.5
>>> nanaverage(np.array([np.nan, 2, 3]), np.array([1, 2, np.nan]))
2.0

Source code in named_xlsx/utils.py

def nanaverage(arr: np.ndarray, weights: np.ndarray | None = None) -> float:
    """
    Compute average ignoring `nan` values in `arr` and `weights`.

    Examples
    --------
    >>> nanaverage(np.array([1, 2, 3]))
    2.0
    >>> nanaverage(np.array([np.nan, 2, 3]))
    2.5
    >>> nanaverage(np.array([np.nan, 2, 3]), np.array([1, 2, np.nan]))
    2.0

    """
    msg = "Argument `{}` must be a numpy array (got `{}`)."
    if not isinstance(arr, np.ndarray):
        raise ValueError(msg.format("arr", type(arr)))
    if weights is not None and not isinstance(weights, np.ndarray):
        raise ValueError(msg.format("weights", type(weights)))
    if weights is None:
        weights = np.full_like(arr, fill_value=1)
    indices = ~np.isnan(arr) & ~np.isnan(weights)
    return np.average(arr[indices], weights=weights[indices]).item()

`table_destination(ref, /, tables, **kwargs)`

Return the address of a table.

Parameters:

Name	Type	Default
`ref`	`str`	required
`tables`	`dict[str, Table]`	required
`kwargs`		`{}`

Source code in named_xlsx/utils.py

@cached
def table_destination(
    ref: str, /, tables: dict[str, Table], **kwargs
) -> tuple[str, str]:
    """
    Return the address of a table.

    Parameters
    ----------
    ref
    tables
    kwargs

    Returns
    -------

    """
    r = parse("{table_name}[{table_colname}]", ref)
    table_name = r["table_name"]
    table_colname = r["table_colname"]
    return tables[table_name].mapper_columns(**kwargs)[table_colname]

Utilities

Table dataclass

XLSXAddress dataclass

is_range property

shape property

size property

as_array(order='row', squeeze=True)

from_parts(sheet, coords) classmethod

get_destinations(defined_name, tables)

nanaverage(arr, weights=None)

table_destination(ref, /, tables, **kwargs)

`Table` `dataclass`

`XLSXAddress` `dataclass`

`is_range` `property`

`shape` `property`

`size` `property`

`as_array(order='row', squeeze=True)`

`from_parts(sheet, coords)` `classmethod`

`get_destinations(defined_name, tables)`

`nanaverage(arr, weights=None)`

`table_destination(ref, /, tables, **kwargs)`