Skip to content

Utilities

Utilities.

Table dataclass

Spreadsheet table.

Source code in named_xlsx/utils.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
@dataclass
class Table:
    """Spreadsheet table."""

    name: str
    sheet: str
    _table: openpyxl.worksheet.table.Table = field(repr=False)

    @property
    def cells(self):
        return self._table.ref

    def mapper_columns(self, **kwargs) -> dict[str, tuple[str, str]]:
        table_colnames = self._table.column_names
        cr = CellRange(self.cells)
        cr.shrink(**kwargs)
        table_colsindx = [col for row, col in cr.top]
        table_rowsindx = [row for row, col in cr.left]
        table_row_top, table_row_bottom = table_rowsindx[0], table_rowsindx[-1]

        data = {}
        for col_name, col_indx in zip(table_colnames, table_colsindx):
            col_letter = get_column_letter(col_indx)
            addr = f"{col_letter}{table_row_top}:{col_letter}{table_row_bottom}"
            data[col_name] = (self.sheet, addr)
        return data

XLSXAddress dataclass

Excel address, with or without sheet name, according to project conventions.

Examples:

>>> a = XLSXAddress("My Sheet!A10:A15")
>>> a
XLSXAddress(value='My Sheet!A10:A15')
>>> a.sheet
'My Sheet'
>>> a.coord
'A10:A15'
>>> a = XLSXAddress("A10:A15")
>>> a
XLSXAddress(value='A10:A15')
>>> a.sheet
>>> a.coord
'A10:A15'
Source code in named_xlsx/utils.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
@dataclass(frozen=True)
class XLSXAddress:
    """
    Excel address, with or without sheet name, according to project conventions.

    Examples
    --------
    >>> a = XLSXAddress("My Sheet!A10:A15")
    >>> a
    XLSXAddress(value='My Sheet!A10:A15')
    >>> a.sheet
    'My Sheet'
    >>> a.coord
    'A10:A15'
    >>> a = XLSXAddress("A10:A15")
    >>> a
    XLSXAddress(value='A10:A15')
    >>> a.sheet
    >>> a.coord
    'A10:A15'

    """

    value: str
    sheet: str = field(init=False, repr=False)
    coord: str = field(init=False, repr=False)
    _parsed: CellRange = field(init=False, repr=False)

    def __post_init__(self):

        value = self.value
        if " " in self.value and "!" in self.value:
            sheet, coord = self.value.rsplit("!")
            value = f"'{sheet}'!{coord}"
        try:
            _parsed = CellRange(value)
        except ValueError as e:
            raise ValueError(f"Cannot parse: {self.value=}") from e
        object.__setattr__(self, "_parsed", _parsed)
        object.__setattr__(self, "sheet", self._parsed.title)
        object.__setattr__(self, "coord", self._parsed.coord)

    @classmethod
    def from_parts(cls, sheet: str, coords: str):
        """
        Build instance from parts.

        Parameters
        ----------
        sheet
            Sheet name. Can be `None`.
        coords
            Coordinates. Must always be given.

        Examples
        --------
        >>> XLSXAddress.from_parts("My Sheet", "A10")
        XLSXAddress(value='My Sheet!A10')
        >>> XLSXAddress.from_parts(None, "A10")
        XLSXAddress(value='A10')

        """
        return cls(coords if sheet is None else f"{sheet}!{coords}")

    def as_array(self, order: str = "row", squeeze: bool = True) -> np.ndarray:
        """
        Generate coordinate array (row major: left-right then top-down).

        Parameters
        ----------
        order
            Specify the order, one of ['row', 'col'].
        squeeze
            Drop single-dimensional entries from the shape of the resulting array.

        Examples
        --------
        >>> XLSXAddress("A10:B11").as_array()
        array([['A10', 'B10'],
               ['A11', 'B11']], dtype='<U3')
        >>> XLSXAddress("Hello!A10:B11").as_array()
        array([['A10', 'B10'],
               ['A11', 'B11']], dtype='<U3')

        """
        d = dict(row=rows_from_range, col=cols_from_range)
        out = np.array(list(d[order](self.coord)))
        if squeeze:
            out = np.squeeze(out)
        return out

    @property
    def is_range(self) -> bool:
        """

        Examples
        --------
        >>> XLSXAddress("A10").is_range
        False
        >>> XLSXAddress("A10:B11").is_range
        True

        """
        if self.size == 1:
            return False
        if self.size > 1:
            return True
        raise ValueError(f"Unexpected {self.size=} for {self.value=}")

    @property
    def size(self) -> int:
        """
        Size of the address (number of elements).

        Examples
        --------
        >>> XLSXAddress("A10").size
        np.int64(1)
        >>> XLSXAddress("A10:A10").size
        np.int64(1)
        >>> XLSXAddress("A10:C11").size
        np.int64(6)
        >>> XLSXAddress("A10:A12").size
        np.int64(3)

        """
        return np.prod(list(self._parsed.size.values()))

    @property
    def shape(self) -> tuple[int, int]:
        """
        Shape of the address as `(row, column)`.

        Examples
        --------
        >>> XLSXAddress("A10").shape
        (1, 1)
        >>> XLSXAddress("A10:A10").shape
        (1, 1)
        >>> XLSXAddress("A10:C11").shape
        (2, 3)
        >>> XLSXAddress("A10:A12").shape
        (3, 1)

        """
        d = self._parsed.size
        return d["rows"], d["columns"]

    def format(self):
        return f"{self.sheet}!{self.coord}" if self.sheet is not None else self.coord

is_range property

Examples:

>>> XLSXAddress("A10").is_range
False
>>> XLSXAddress("A10:B11").is_range
True

shape property

Shape of the address as (row, column).

Examples:

>>> XLSXAddress("A10").shape
(1, 1)
>>> XLSXAddress("A10:A10").shape
(1, 1)
>>> XLSXAddress("A10:C11").shape
(2, 3)
>>> XLSXAddress("A10:A12").shape
(3, 1)

size property

Size of the address (number of elements).

Examples:

>>> XLSXAddress("A10").size
np.int64(1)
>>> XLSXAddress("A10:A10").size
np.int64(1)
>>> XLSXAddress("A10:C11").size
np.int64(6)
>>> XLSXAddress("A10:A12").size
np.int64(3)

as_array(order='row', squeeze=True)

Generate coordinate array (row major: left-right then top-down).

Parameters:

Name Type Description Default
order str

Specify the order, one of ['row', 'col'].

'row'
squeeze bool

Drop single-dimensional entries from the shape of the resulting array.

True

Examples:

>>> XLSXAddress("A10:B11").as_array()
array([['A10', 'B10'],
       ['A11', 'B11']], dtype='<U3')
>>> XLSXAddress("Hello!A10:B11").as_array()
array([['A10', 'B10'],
       ['A11', 'B11']], dtype='<U3')
Source code in named_xlsx/utils.py
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
def as_array(self, order: str = "row", squeeze: bool = True) -> np.ndarray:
    """
    Generate coordinate array (row major: left-right then top-down).

    Parameters
    ----------
    order
        Specify the order, one of ['row', 'col'].
    squeeze
        Drop single-dimensional entries from the shape of the resulting array.

    Examples
    --------
    >>> XLSXAddress("A10:B11").as_array()
    array([['A10', 'B10'],
           ['A11', 'B11']], dtype='<U3')
    >>> XLSXAddress("Hello!A10:B11").as_array()
    array([['A10', 'B10'],
           ['A11', 'B11']], dtype='<U3')

    """
    d = dict(row=rows_from_range, col=cols_from_range)
    out = np.array(list(d[order](self.coord)))
    if squeeze:
        out = np.squeeze(out)
    return out

from_parts(sheet, coords) classmethod

Build instance from parts.

Parameters:

Name Type Description Default
sheet str

Sheet name. Can be None.

required
coords str

Coordinates. Must always be given.

required

Examples:

>>> XLSXAddress.from_parts("My Sheet", "A10")
XLSXAddress(value='My Sheet!A10')
>>> XLSXAddress.from_parts(None, "A10")
XLSXAddress(value='A10')
Source code in named_xlsx/utils.py
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
@classmethod
def from_parts(cls, sheet: str, coords: str):
    """
    Build instance from parts.

    Parameters
    ----------
    sheet
        Sheet name. Can be `None`.
    coords
        Coordinates. Must always be given.

    Examples
    --------
    >>> XLSXAddress.from_parts("My Sheet", "A10")
    XLSXAddress(value='My Sheet!A10')
    >>> XLSXAddress.from_parts(None, "A10")
    XLSXAddress(value='A10')

    """
    return cls(coords if sheet is None else f"{sheet}!{coords}")

get_destinations(defined_name, tables)

Get plain spreadsheet address from defined names.

This is a middle-man function because openpyxl does not seem to deal with defined names that point to table columns. As per project conventions, tables are labelled t.<name>, so this function deals with that case separately with a bunch of hard-set conditions.

Source code in named_xlsx/utils.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
@cached
def get_destinations(
    defined_name: DefinedName, tables: dict[str, Table]
) -> list[tuple[str, str]]:
    """
    Get plain spreadsheet address from defined names.

    This is a middle-man function because openpyxl does not seem to deal with
    defined names that point to table columns.
    As per project conventions, tables are labelled `t.<name>`,
    so this function deals with that case separately with a bunch of
    hard-set conditions.

    """
    if defined_name.attr_text.startswith("t."):
        # Has a table as per project convention: with header and total rows
        # Get destination, shrinking range to data cells (without header and total rows).
        dest = defined_name.attr_text
        return [table_destination(dest, tables=tables, top=1, bottom=1)]
    return list(defined_name.destinations)

nanaverage(arr, weights=None)

Compute average ignoring nan values in arr and weights.

Examples:

>>> nanaverage(np.array([1, 2, 3]))
2.0
>>> nanaverage(np.array([np.nan, 2, 3]))
2.5
>>> nanaverage(np.array([np.nan, 2, 3]), np.array([1, 2, np.nan]))
2.0
Source code in named_xlsx/utils.py
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
def nanaverage(arr: np.ndarray, weights: np.ndarray | None = None) -> float:
    """
    Compute average ignoring `nan` values in `arr` and `weights`.

    Examples
    --------
    >>> nanaverage(np.array([1, 2, 3]))
    2.0
    >>> nanaverage(np.array([np.nan, 2, 3]))
    2.5
    >>> nanaverage(np.array([np.nan, 2, 3]), np.array([1, 2, np.nan]))
    2.0

    """
    msg = "Argument `{}` must be a numpy array (got `{}`)."
    if not isinstance(arr, np.ndarray):
        raise ValueError(msg.format("arr", type(arr)))
    if weights is not None and not isinstance(weights, np.ndarray):
        raise ValueError(msg.format("weights", type(weights)))
    if weights is None:
        weights = np.full_like(arr, fill_value=1)
    indices = ~np.isnan(arr) & ~np.isnan(weights)
    return np.average(arr[indices], weights=weights[indices]).item()

table_destination(ref, /, tables, **kwargs)

Return the address of a table.

Parameters:

Name Type Description Default
ref str
required
tables dict[str, Table]
required
kwargs
{}
Source code in named_xlsx/utils.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
@cached
def table_destination(
    ref: str, /, tables: dict[str, Table], **kwargs
) -> tuple[str, str]:
    """
    Return the address of a table.

    Parameters
    ----------
    ref
    tables
    kwargs

    Returns
    -------

    """
    r = parse("{table_name}[{table_colname}]", ref)
    table_name = r["table_name"]
    table_colname = r["table_colname"]
    return tables[table_name].mapper_columns(**kwargs)[table_colname]