# encoding: utf-8
"""Cube-partition objects.
A cube-partition allows cubes of various dimensionality to be processed in a uniform
way. For example, a 2D cube becomes a `_Slice` object, but a 3D cube is "sliced" into
a sequence of `_Slice` objects; a `_Slice` object corresponds to a crosstab, and can be
operated on consistently whether it is "alone" or one of a sequence that came from a 3D
cube.
Cube-partition objects are typically used for display of secondary analysis, often in an
Excel spreadsheet but also other formats.
The three types of cube partition are the *slice*, *strand*, and *nub*, which are 2D,
1D, and 0D respectively.
"""
import math
import numpy as np
from tabulate import tabulate
from cr.cube.collator import PayloadOrderCollator
from cr.cube.enums import (
CUBE_MEASURE as CM,
DIMENSION_TYPE as DT,
ORDER_FORMAT,
MARGINAL_ORIENTATION as MO,
)
from cr.cube.matrix.assembler import _BaseOrderHelper
from cr.cube.matrix.measure import SecondOrderMeasures
from cr.cube.matrix.subtotals import SumSubtotals
from cr.cube.min_base_size_mask import MinBaseSizeMask
from cr.cube.measures.pairwise_significance import PairwiseSignificance
from cr.cube.scalar import MeansScalar
from cr.cube.stripe.assembler import _BaseOrderHelper as stripe_BaseOrderHelper
from cr.cube.stripe.measure import StripeMeasures
from cr.cube.util import lazyproperty
# ---This is the quantile of the normal Cumulative Distribution Function (CDF) at
# ---probability 97.5% (p=.975), since the computed confidence interval
# ---is ±2.5% (.025) on each side of the CDF.
Z_975 = 1.959964
[docs]class CubePartition:
"""A slice, a strand, or a nub drawn from a cube-response.
These represent 2, 1, or 0 dimensions of a cube, respectively.
"""
def __init__(self, cube, transforms=None):
self._cube = cube
self._transforms_arg = transforms
[docs] @classmethod
def factory(
cls,
cube,
slice_idx=0,
transforms=None,
population=None,
ca_as_0th=None,
mask_size=0,
):
"""Return slice, strand, or nub object appropriate to passed parameters."""
if cube.ndim == 0:
return _Nub(cube)
if cube.ndim == 1 or ca_as_0th:
return _Strand(
cube, transforms, population, ca_as_0th, slice_idx, mask_size
)
return _Slice(cube, slice_idx, transforms, population, mask_size)
[docs] @lazyproperty
def cube_index(self):
"""Offset of this partition's cube in its CubeSet.
Used to differentiate certain partitions like a filtered rows-summary strand.
"""
return self._cube.cube_index
[docs] @lazyproperty
def dimension_types(self):
"""Sequence of member of `cr.cube.enum.DIMENSION_TYPE` for each dimension.
Items appear in rows-dimension, columns-dimension order.
"""
return tuple(d.dimension_type for d in self._dimensions)
[docs] @lazyproperty
def ndim(self):
"""int count of dimensions for this partition."""
return len(self._dimensions)
[docs] @lazyproperty
def population_fraction(self):
"""population fraction of the cube"""
return self._cube.population_fraction
[docs] @lazyproperty
def selected_category_labels(self):
"""Tuple of str: names of any and all underlying categories in 'Selected'."""
return tuple(
s["name"]
for d in self._dimensions
for s in d.selected_categories
if s.get("name")
)
[docs] @lazyproperty
def shape(self):
"""Tuple of int vector counts for this partition.
Not to be confused with `numpy.ndarray.shape`, this represent the count of rows
and columns respectively, in this partition. It does not necessarily represent
the shape of any underlying `numpy.ndarray` object that may arise in the
implementation of the cube partition. In particular, the value of any count in
the shape can be zero.
A _Slice has a shape like (2, 3) representing (row-count, col-count). A _Strand
has a shape like (5,) which represents its row-count. The shape of a _Nub is
unconditionally () (an empty tuple).
"""
raise NotImplementedError(
"must be implemented by each subclass"
) # pragma: no cover
[docs] @lazyproperty
def variable_name(self):
"""str representing the name of the superheading variable."""
return self._dimensions[0 if self.ndim < 2 else 1].name
@lazyproperty
def _alpha(self):
"""float confidence-interval threshold for pairwise-t (sig) tests."""
return self._alpha_values[0]
@lazyproperty
def _alpha_alt(self):
"""Alternate float confidence-interval threshold or None.
This is an optional secondary confidence interval allowing two-level
significance testing. Value is None if no alternate alpha was specified by user.
"""
return self._alpha_values[1]
@lazyproperty
def _alpha_values(self):
"""Pair (tuple) of confidence-interval thresholds to be used for t-tests.
The second value is optional and is None when no secondary alpha value was
defined for the cube-set.
"""
value = self._transforms_dict.get("pairwise_indices", {}).get("alpha")
# --- handle omitted, None, [], (), {}, "", 0, and 0.0 cases ---
if not value:
return (0.05, None)
# --- reject invalid types ---
if not isinstance(value, (float, list, tuple)):
raise TypeError(
f"transforms.pairwise_indices.alpha, when defined, must be a list of 1 "
f"or 2 float values between 0.0 and 1.0 exclusive. Got {repr(value)}"
)
# --- legacy float "by-itself" case ---
if isinstance(value, float):
if not 0.0 < value < 1.0:
raise ValueError(
"alpha value, when provided, must be between 0.0 and 1.0 "
f"exclusive. Got {repr(value)}"
)
return (value, None)
# --- sequence case ---
for x in value[:2]:
if not isinstance(x, float) or not 0.0 < x < 1.0:
raise ValueError(
f"transforms.pairwise_indices.alpha must be a list of 1 or 2 float "
f"values between 0.0 and 1.0 exclusive. Got {repr(value)}"
)
if len(value) == 1:
return (value[0], None)
return tuple(sorted(value[:2]))
@lazyproperty
def _available_measures(self):
"""sorted list of available CUBE_MEASURE members in the cube response."""
return sorted(list(self._cube.available_measures), key=lambda el: el.name)
@lazyproperty
def _default_contents(self):
"""1D/2D np.float64 ndarray of counts, means or sums, if available."""
measure = self._available_measures[0]
return getattr(
self, {CM.COUNT: "counts", CM.MEAN: "means", CM.SUM: "sums"}[measure]
)
@lazyproperty
def _dimensions(self):
"""tuple of Dimension object for each dimension in cube-partition."""
raise NotImplementedError(
"must be implemented by each subclass"
) # pragma: no cover
@lazyproperty
def _only_larger(self):
"""True if only the larger of reciprocal pairwise-t values should appear.
In general, pairwise-t tests are reciprocal. That is, if A is significant with
respect to B, then B is significant with respect to A. Having a letter in both
columns can produce a cluttered appearance. When this flag is set by the user,
only the cell in the reciprocal pair having the largest value gets a letter.
Defaults to True unless explicitly set False.
"""
return (
False
if self._transforms_dict.get("pairwise_indices", {}).get(
"only_larger", True
)
is False
else True
)
@lazyproperty
def _transforms_dict(self):
"""dict holding transforms for this partition, provided as `transforms` arg.
This value is an empty dict (`{}`) when no transforms were specified on
construction.
"""
return {} if self._transforms_arg is None else self._transforms_arg
[docs]class _Slice(CubePartition):
"""2D cube partition.
A slice represents the cross-tabulation of two dimensions, often, but not
necessarily contributed by two different variables. A single CA variable has two
dimensions which can be crosstabbed in a slice.
"""
def __init__(self, cube, slice_idx, transforms, population, mask_size):
super(_Slice, self).__init__(cube, transforms)
self._slice_idx = slice_idx
self._population = population
self._mask_size = mask_size
def __repr__(self):
"""Provide text representation suitable for working at console.
Falls back to a default repr on exception, such as might occur in
unit tests where object need not otherwise be provided with all
instance variable values.
"""
try:
dimensionality = " x ".join(dt.name for dt in self.dimension_types)
title = (
f"{type(self).__name__}(name='{self.name}', "
f"dimension_types='{dimensionality}')"
)
contents = [
[row_label] + row.tolist()
for row_label, row in zip(self.row_labels, self._default_contents)
]
return (
f"{title}"
f"\nShowing: {self._available_measures[0].name}"
f"\n{tabulate(contents, [''] + self.column_labels.tolist())}"
f"\nAvailable measures: {str(self._available_measures)}"
)
except Exception:
return super(_Slice, self).__repr__() # noqa
# ---interface ---------------------------------------------------
[docs] @lazyproperty
def column_aliases(self):
"""1D str ndarray of alias for each column, for use as column headings."""
dim = self._dimensions[1]
return np.array(dim.element_aliases + dim.subtotal_aliases)[self._column_order]
[docs] @lazyproperty
def column_codes(self):
"""1D int ndarray of code for each column, for use as column headings."""
dim = self._dimensions[1]
return np.array(dim.element_ids + dim.insertion_ids)[self._column_order]
[docs] @lazyproperty
def column_index(self):
"""2D np.float64 ndarray of column-index "percentage".
The index values represent the difference of the percentages to the
corresponding baseline values. The baseline values are the univariate
percentages of the rows variable.
"""
return self._assemble_matrix(self._measures.column_index.blocks)
[docs] @lazyproperty
def column_labels(self):
"""1D str ndarray of name for each column, for use as column headings."""
dim = self._dimensions[1]
return np.array(dim.element_labels + dim.subtotal_labels)[self._column_order]
@lazyproperty
def column_percentages(self):
return self.column_proportions * 100
[docs] @lazyproperty
def column_proportions(self):
"""2D np.float64 ndarray of column-proportion for each matrix cell.
This is the proportion of the weighted-N (aka. weighted base) of its column
that the *weighted-count* in each cell represents, generally a number between
0.0 and 1.0. Note that within an inserted subtotal vector involving differences,
the values can range between -1.0 and 1.0.
"""
return self._assemble_matrix(self._measures.column_proportions.blocks)
[docs] @lazyproperty
def column_proportions_moe(self):
"""1D/2D np.float64 ndarray of margin-of-error (MoE) for columns proportions.
The values are represented as fractions, analogue to the `column_proportions`
property. This means that the value of 3.5% will have the value 0.035.
The values can be np.nan when the corresponding percentage is also np.nan, which
happens when the respective columns margin is 0.
"""
return Z_975 * self.column_std_err
[docs] @lazyproperty
def column_share_sum(self):
"""2D optional np.float64 ndarray of column share sum value for each table cell.
Raises `ValueError` if the cube-result does not include a sum cube-measure.
Column share of sum is the sum of each subvar item divided by the TOTAL number
of column items.
"""
try:
return self._assemble_matrix(self._measures.column_share_sum.blocks)
except ValueError:
raise ValueError(
"`.column_share_sum` is undefined for a cube-result without a sum "
"measure"
)
[docs] @lazyproperty
def column_proportion_variances(self):
"""2D ndarray of np.float64 column-proportion variance for each matrix cell."""
return self._assemble_matrix(self._measures.column_proportion_variances.blocks)
[docs] @lazyproperty
def column_std_dev(self):
"""standard deviation for column percentages
`std_deviation = sqrt(variance)`
"""
return np.sqrt(self.column_proportion_variances)
[docs] @lazyproperty
def column_std_err(self):
"""standard error for column percentages
`std_error = sqrt(variance/N)`
"""
return self._assemble_matrix(self._measures.column_std_err.blocks)
[docs] @lazyproperty
def column_unweighted_bases(self):
"""2D np.float64 ndarray of unweighted col-proportion denominator per cell."""
return self._assemble_matrix(self._measures.column_unweighted_bases.blocks)
[docs] @lazyproperty
def column_weighted_bases(self):
"""2D np.float64 ndarray of column-proportion denominator for each cell."""
return self._assemble_matrix(self._measures.column_weighted_bases.blocks)
[docs] @lazyproperty
def columns_base(self):
"""1D/2D np.float64 ndarray of unweighted-N for each column/cell of slice.
This array is 2D (a distinct base for each cell) when the rows dimension is MR,
because each MR-subvariable has its own unweighted N. This is because not every
possible response is necessarily offered to every respondent.
In all other cases, the array is 1D, containing one value for each column.
"""
# --- an MR_X slice produces a 2D columns-base (each cell has its own N) ---
# --- This is really just another way to call the columns_weighted_bases ---
# --- TODO: Should column_base only be defined when it's 1D? This would
# --- require changes to exporter to use the bases to give a
# --- "column_base_range"
if not self._measures.columns_unweighted_base.is_defined:
return self.column_unweighted_bases
# --- otherwise columns-base is a vector ---
return self._assemble_marginal(self._measures.columns_unweighted_base)
[docs] @lazyproperty
def columns_dimension_description(self):
"""str description assigned to columns-dimension."""
return self._dimensions[1].description
[docs] @lazyproperty
def columns_dimension_name(self):
"""str name assigned to columns-dimension.
Reflects the resolved dimension-name transform cascade.
"""
return self._dimensions[1].name
[docs] @lazyproperty
def columns_dimension_type(self):
"""Member of `cr.cube.enum.DIMENSION_TYPE` describing columns dimension."""
return self._dimensions[1].dimension_type
[docs] @lazyproperty
def columns_margin(self):
"""1D or 2D np.float64 ndarray of weighted-N for each column of slice.
This array is 2D (a distinct margin value for each cell) when the rows dimension
is MR, because each MR-subvariable has its own weighted N. This is because not
every possible response is necessarily offered to every respondent.
In all other cases, the array is 1D, containing one value for each column.
"""
# --- an MR_X slice produces a 2D columns-margin (each cell has its own N) ---
# --- This is really just another way to call the columns_weighted_bases ---
# --- TODO: Should column_margin only be defined when it's 1D? This would
# --- require changes to exporter to use the bases to give a
# --- "column_margin_range"
if not self._measures.columns_weighted_base.is_defined:
return self.column_weighted_bases
# --- otherwise columns-base is a vector ---
return self._assemble_marginal(self._measures.columns_weighted_base)
[docs] @lazyproperty
def columns_margin_proportion(self):
"""1D or 2D np.float64 ndarray of weighted-proportion for each column of slice.
This array is 2D (a distinct margin value for each cell) when the rows dimension
is MR, because each MR-subvariable has its own weighted N. This is because not
every possible response is necessarily offered to every respondent.
In all other cases, the array is 1D, containing one value for each column.
"""
# --- an MR_X slice produces a 2D columns-margin (each cell has its own N) ---
# --- TODO: Should colums_margin_proportion only be defined when it's 1D? This
# --- requires changes to exporter to use the bases to give a
# --- "columns_margin_range"
if not self._measures.columns_table_proportion.is_defined:
return self._assemble_matrix(
SumSubtotals.blocks(
self.columns_margin / self.table_weighted_bases,
self._dimensions,
)
)
# --- otherwise columns-margin-proportion is a maginal ---
return self._assemble_marginal(self._measures.columns_table_proportion)
[docs] @lazyproperty
def columns_scale_mean(self):
"""Optional 1D np.float64 ndarray of scale mean for each column.
The returned vector is to be interpreted as a summary *row*. Also note that
the underlying scale values are based on the numeric values of the opposing
*rows-dimension* elements.
This value is `None` if no row element has an assigned numeric value.
"""
return self._assemble_marginal(self._measures.columns_scale_mean)
[docs] @lazyproperty
def columns_scale_mean_margin(self):
"""Optional float overall mean of column-scale values.
This value is the "margin" of the `.columns_scale_mean` vector and might
typically appear in the cell immediately to the right of the
`.columns_scale_mean` summary-row. It is similar to a "table-total" value, in
that it is a scalar that might appear in the lower right-hand corner of a table,
but note that it does *not* represent the overall table in that
`.rows_scale_mean_margin` will not have the same value (except by chance).
This value derives from the numeric values of the row elements whereas its
counterpart `.rows_scale_mean_margin` derives from the numeric values of the
column elements.
This value is `None` if no row has an assigned numeric-value.
"""
if not self._rows_have_numeric_value:
return None
# TODO: This is a hack for X_Array slices, where rows-margin is undefined.
# I think this probably shouldn't be defined across arrays, but to minimize
# test damage, we use the first column of the weighted bases, which is
# equal to the rows_margin for CAT_X_CAT and always exists for others.
rows_margin = self.row_weighted_bases[:, 0]
scale_total = np.nansum(self._rows_dimension_numeric_values * rows_margin)
not_a_nan_mask = ~np.isnan(self._rows_dimension_numeric_values)
scale_count = np.sum(rows_margin[not_a_nan_mask])
return scale_total / scale_count
[docs] @lazyproperty
def columns_scale_mean_pairwise_indices(self):
"""Sequence of column-idx tuples indicating pairwise-t result of scale-means.
The sequence contains one tuple for each column. The indicies in a column's
tuple each identify *another* of the columns who's scale-mean is
pairwise-significant to that of the tuple's column. Pairwise significance is
computed based on the more restrictive (lesser-value) threshold specified in the
analysis.
"""
return PairwiseSignificance.scale_mean_pairwise_indices(
self, self._alpha, self._only_larger
)
[docs] @lazyproperty
def columns_scale_mean_pairwise_indices_alt(self):
"""Optional sequence of column-idx tuples indicating pairwise-t of scale-means.
This value is `None` if no secondary threshold value (alpha) was specified in
the analysis. Otherwise, it is the same calculation as
`.columns_scale_mean_pairwise_indices` computed using the less restrictive
(greater-valued) threshold.
"""
if self._alpha_alt is None:
return None
return PairwiseSignificance.scale_mean_pairwise_indices(
self, self._alpha_alt, self._only_larger
)
[docs] @lazyproperty
def columns_scale_mean_stddev(self):
"""Optional 1D np.float64 ndarray of scale-mean std-deviation for each column.
The returned vector (1D array) is to be interpreted as a summary *row*. Also
note that the underlying scale values are based on the numeric values of the
opposing *rows-dimension* elements.
This value is `None` if no row element has been assigned a numeric value.
"""
return self._assemble_marginal(self._measures.columns_scale_mean_stddev)
[docs] @lazyproperty
def columns_scale_mean_stderr(self):
"""Optional 1D np.float64 ndarray of scale-mean standard-error for each row.
The returned vector is to be interpreted as a summary *row*. Also note that the
underlying scale values are based on the numeric values of the opposing
*rows-dimension* elements.
This value is `None` if no row element has a numeric value assigned or if
the columns-weighted-base is `None` (eg an array variable in the row dim).
"""
return self._assemble_marginal(self._measures.columns_scale_mean_stderr)
[docs] @lazyproperty
def counts(self):
"""2D np.float64 ndarray of weighted cube counts."""
return self._assemble_matrix(self._measures.weighted_counts.blocks)
weighted_counts = counts
[docs] @lazyproperty
def description(self):
"""str description of this slice, which it takes from its rows-dimension."""
return self._rows_dimension.description
[docs] @lazyproperty
def has_scale_means(self):
"""True if the slice has valid columns scale mean."""
return True if self.columns_scale_mean is not None else False
[docs] @lazyproperty
def inserted_column_idxs(self):
"""tuple of int index of each subtotal column in slice."""
# --- insertions have a negative idx in their order sequence ---
return tuple(i for i, col_idx in enumerate(self._column_order) if col_idx < 0)
[docs] @lazyproperty
def inserted_row_idxs(self):
"""tuple of int index of each subtotal row in slice."""
# --- insertions have a negative idx in their order sequence ---
return tuple(
i for i, row_idx in enumerate(self._row_order_signed_indexes) if row_idx < 0
)
[docs] @lazyproperty
def derived_column_idxs(self):
"""tuple of int index of each derived column-element in slice.
An element is derived if it's a subvariable of a multiple response dimension,
which has been produced by the zz9, and inserted into the response data.
All other elements, including regular MR and CA subvariables, as well as
categories of CAT dimensions, are not derived. Subtotals are also not derived
in this sense, because they're not even part of the data (elements).
"""
return self._derived_element_idxs(self._dimensions[1], self._column_order)
[docs] @lazyproperty
def derived_row_idxs(self):
"""tuple of int index of each derived row-element in slice.
An element is derived if it's a subvariable of a multiple response dimension,
which has been produced by the zz9, and inserted into the response data.
All other elements, including regular MR and CA subvariables, as well as
categories of CAT dimensions, are not derived. Subtotals are also not derived
in this sense, because they're not even part of the data (elements).
"""
return self._derived_element_idxs(
self._rows_dimension, self._row_order_signed_indexes
)
[docs] @lazyproperty
def diff_column_idxs(self):
"""tuple of int index of each difference column-element in slice."""
return self._diff_element_idxs(self._dimensions[1], self._column_order)
[docs] @lazyproperty
def diff_row_idxs(self):
"""tuple of int index of each difference row-element in slice."""
return self._diff_element_idxs(
self._rows_dimension, self._row_order_signed_indexes
)
@lazyproperty
def is_empty(self):
return any(s == 0 for s in self.shape)
[docs] @lazyproperty
def means(self):
"""2D optional np.float64 ndarray of mean value for each table cell.
Cell value is `np.nan` for each cell corresponding to an inserted subtotal
(mean of addend cells cannot simply be added to get the mean of the subtotal).
Raises `ValueError` if the cube-result does not include a means cube-measure.
"""
try:
return self._assemble_matrix(self._measures.means.blocks)
except ValueError:
raise ValueError(
"`.means` is undefined for a cube-result without a mean measure"
)
@lazyproperty
def min_base_size_mask(self):
return MinBaseSizeMask(self, self._mask_size)
[docs] @lazyproperty
def name(self):
"""str name assigned to this slice.
A slice takes the name of its rows-dimension.
"""
return self.rows_dimension_name
@staticmethod
def _pairwise_indices(p_vals, t_stats, alpha, only_larger):
"""1D ndarray containing tuples of int pairwise indices of each column."""
with np.errstate(divide="ignore", invalid="ignore"):
significance = p_vals < alpha
if only_larger:
significance = np.logical_and(t_stats < 0, significance)
col_significance = np.empty((len(significance),), dtype=object)
col_significance[:] = [
tuple(np.where(sig_row)[0]) for sig_row in significance
]
return col_significance
def _pairwise_means_indices(self, alpha, only_larger):
"""2D optional ndarray of tuple of int column-idxs means pairwise-t threshold.
Raises `ValueError if the cube-result does not include `means` cube-measures.
"""
return np.array(
[
self._pairwise_indices(
self._pairwise_significance_means_p_vals(col),
self._pairwise_significance_means_t_stats(col),
alpha,
only_larger,
)
for col in range(len(self._column_order))
]
).T
def _pairwise_significance_p_vals(self, column_idx):
"""2D optional np.float64 ndarray of overlaps-p_vals matrices for subvar idx.
For cubes where the last dimension is categorical, column idxs represent
specific categories.
For cubes where the last dimension is a multiple response, each subvariable
pairwise significance matrix is a 2D ndarray of the p-vals for the selected
subvariable index (the selected column).
Raises `ValueError if the cube-result does not include `overlaps`
and `valid_overlaps` cube-measures.
"""
base_column_idx = self._column_order[column_idx]
if self._cube_has_overlaps:
# If overlaps are defined, calculate significance based on them
return self._assemble_matrix(
self._measures.pairwise_p_vals_for_subvar(base_column_idx).blocks
)
return self._assemble_matrix(
self._measures.pairwise_p_vals(base_column_idx).blocks
)
def _pairwise_significance_t_stats(self, column_idx):
"""2D optional np.float64 ndarray of overlaps-t_stats matrices for subvar idx.
For cubes where the last dimension is categorical, column idxs represent
specific categories.
For cubes where the last dimension is a multiple response, each subvariable
pairwise significance matrix is a 2D ndarray of the t-stats for the selected
subvariable index (the selected column).
Raises `ValueError if the cube-result does not include `overlaps`
and `valid_overlaps` cube-measures.
"""
base_column_idx = self._column_order[column_idx]
if self._cube_has_overlaps:
# If overlaps are defined, calculate significance based on them
return self._assemble_matrix(
self._measures.pairwise_t_stats_for_subvar(base_column_idx).blocks
)
return self._assemble_matrix(
self._measures.pairwise_t_stats(base_column_idx).blocks
)
def _pairwise_significance_means_p_vals(self, column_idx):
"""2D optional np.float64 ndarray of mean difference p_vals for column idx.
Raises `ValueError if the cube-result does not include `mean` cube-measures.
"""
base_column_idx = self._column_order[column_idx]
return self._assemble_matrix(
self._measures.pairwise_significance_means_p_vals(base_column_idx).blocks
)
def _pairwise_significance_means_t_stats(self, column_idx):
"""2D optional np.float64 ndarray of mean difference t_stats for column idx.
Raises `ValueError if the cube-result does not include `mean` cube-measures.
"""
base_column_idx = self._column_order[column_idx]
return self._assemble_matrix(
self._measures.pairwise_significance_means_t_stats(base_column_idx).blocks
)
[docs] @lazyproperty
def pairwise_indices(self):
"""2D ndarray of tuple of int column-idxs meeting pairwise-t threshold.
Like::
[
[(1, 3, 4), (), (0,), (), ()],
[(2,), (1, 2), (), (), (0, 3)],
[(), (), (), (), ()],
]
Has the same shape as `.counts`. Each int represents the offset of another
column in the same row with a confidence interval meeting the threshold defined
for this analysis.
"""
return np.array(
[
self._pairwise_indices(
self._pairwise_significance_p_vals(col),
self._pairwise_significance_t_stats(col),
self._alpha,
self._only_larger,
)
for col in range(len(self._column_order))
]
).T
[docs] @lazyproperty
def pairwise_indices_alt(self):
"""2D ndarray of tuple of int column-idxs meeting alternate threshold.
This value is None if no alternate threshold has been defined.
"""
if self._alpha_alt is None:
return None
return np.array(
[
self._pairwise_indices(
self._pairwise_significance_p_vals(col),
self._pairwise_significance_t_stats(col),
self._alpha_alt,
self._only_larger,
)
for col in range(len(self._column_order))
]
).T
[docs] @lazyproperty
def pairwise_means_indices(self):
"""Optional 2D ndarray of tuple column-idxs significance threshold for mean.
Like::
[
[(1, 3, 4), (), (0,), (), ()],
[(2,), (1, 2), (), (), (0, 3)],
[(), (), (), (), ()],
]
Has the same shape as `.means`. Each int represents the offset of another
column in the same row with a confidence interval meeting the threshold defined
for this analysis.
"""
try:
return self._pairwise_means_indices(self._alpha, self._only_larger)
except ValueError:
raise ValueError(
"`.pairwise_means_indices` is undefined for a cube-result "
"without a mean measure"
)
[docs] @lazyproperty
def pairwise_means_indices_alt(self):
"""2D ndarray of tuple of column-idxs meeting alternate threshold for mean.
This value is None if no alternate threshold has been defined.
"""
if self._alpha_alt is None:
return None
try:
return self._pairwise_means_indices(self._alpha_alt, self._only_larger)
except ValueError:
raise ValueError(
"`.pairwise_means_indices_alt` is undefined for a cube-result "
"without a mean measure"
)
[docs] def pairwise_significance_p_vals(self, column_idx):
"""2D ndarray of pairwise-significance p-vals matrices for column idx."""
return self._pairwise_significance_p_vals(column_idx)
[docs] def pairwise_significance_t_stats(self, column_idx):
"""return 2D ndarray of pairwise-significance t-stats for selected column."""
return self._pairwise_significance_t_stats(column_idx)
[docs] def pairwise_significance_means_p_vals(self, column_idx):
"""Optional 2D ndarray of means significance p-vals matrices for column idx."""
# Significance of means difference is available only is cube contains means.
try:
return self._pairwise_significance_means_p_vals(column_idx)
except ValueError:
raise ValueError(
"`.pairwise_significance_means_p_vals` is undefined for a cube-result "
"without a mean measure"
)
[docs] def pairwise_significance_means_t_stats(self, column_idx):
"""Optional 2D ndarray of means significance t-stats matrices for column idx."""
# Significance of means difference is available only is cube contains means.
try:
return self._pairwise_significance_means_t_stats(column_idx)
except ValueError:
raise ValueError(
"`.pairwise_significance_means_t_stats` is undefined for a cube-result "
"without a mean measure"
)
[docs] @lazyproperty
def pairwise_significance_tests(self):
"""tuple of _ColumnPairwiseSignificance tests.
Result has as many elements as there are columns in the slice. Each
significance test contains `p_vals` and `t_stats` (ndarrays that represent
probability values and statistical scores).
"""
return tuple(
PairwiseSignificance(self).values[column_idx]
for column_idx in range(len(self.column_labels))
)
[docs] @lazyproperty
def payload_order(self):
"""1D np.int64 ndarray of signed int idx respecting the payload order.
Positive integers indicate the 1-indexed position in payload of regular
elements, while negative integers are the subtotal insertions.
Needed for reordering color palette in exporter.
"""
empty_rows_idxs = tuple(np.where(self._measures.rows_pruning_mask)[0])
po = PayloadOrderCollator(self._rows_dimension, empty_rows_idxs).payload_order
return tuple(po)
[docs] @lazyproperty
def population_proportions(self):
"""2D np.float64 ndarray of proportions
The proportion used to calculate proportion counts depends on the dimension
types.
"""
population_proportions = self._assemble_matrix(
self._measures.population_proportions.blocks
)
# Diff subtotals not allowed in population measure
if self.diff_row_idxs:
population_proportions[self.diff_row_idxs, :] = np.nan
if self.diff_column_idxs:
population_proportions[:, self.diff_column_idxs] = np.nan
return population_proportions
[docs] @lazyproperty
def population_counts(self):
"""2D np.float64 ndarray of population counts per cell.
The (estimated) population count is computed based on the `population` value
provided when the Slice is created (`._population`). It is also adjusted to
account for any filters that were applied as part of the query
(`._cube.population_fraction`).
`._population` and `_cube.population_fraction` are both scalars and so do not
affect sort order.
"""
return (
self.population_proportions
* self._population
* self._cube.population_fraction
)
[docs] @lazyproperty
def population_std_err(self):
"""2D np.float64 ndarray of standard errors
The proportion used to calculate proportion counts depends on the dimension
types.
"""
return self._assemble_matrix(self._measures.population_std_err.blocks)
[docs] @lazyproperty
def population_counts_moe(self):
"""2D np.float64 ndarray of population-count margin-of-error (MoE) per cell.
The values are represented as population estimates, analogue to the
`population_counts` property. This means that the values will be presented by
actual estimated counts of the population. The values can be np.nan when the
corresponding percentage is also np.nan, which happens when the
respective margin is 0.
When calculating the estimates of categorical dates, the total populatioin is
not "divided" between its categories, but rather considered constant for all
categorical dates (or waves). Hence, the different standard errors will be
applied in these specific cases (like the `row_std_err` or `column_std_err`).
If categorical dates are not involved, the standard `table_std_err` is used.
"""
std_err = self.population_std_err
total_filtered_population = self._population * self._cube.population_fraction
return Z_975 * total_filtered_population * std_err
[docs] @lazyproperty
def pvals(self):
"""2D optional np.float64 ndarray of p-value for each cell.
A p-value is a measure of the probability that an observed difference could have
occurred just by random chance. The lower the p-value, the greater the
statistical significance of the observed difference.
A cell value of np.nan indicates a meaningful p-value could not be computed for
that cell.
"""
return self._assemble_matrix(self._measures.pvalues.blocks)
pvalues = pvals
[docs] @lazyproperty
def residual_test_stats(self):
"""Exposes pvals and zscores (with HS) stacked together
Public method used as cube_method for the SOA API
"""
return np.stack([self.pvals, self.zscores])
[docs] @lazyproperty
def row_aliases(self):
"""1D str ndarray of row alias for each matrix row.
These are suitable for use as row headings; alias for subtotal rows appear in
the sequence and alias are ordered to correspond with their respective data
row.
"""
dim = self._dimensions[0]
return np.array(dim.element_aliases + dim.subtotal_aliases)[
self._row_order_signed_indexes
]
[docs] @lazyproperty
def row_codes(self):
"""1D int ndarray of row codes for each matrix row.
These are suitable for use as row headings; codes for subtotal rows appear in
the sequence and codes are ordered to correspond with their respective data
row.
"""
dim = self._dimensions[0]
return np.array(dim.element_ids + dim.insertion_ids)[
self._row_order_signed_indexes
]
[docs] @lazyproperty
def row_labels(self):
"""1D str ndarray of row name for each matrix row.
These are suitable for use as row headings; labels for subtotal rows appear in
the sequence and labels are ordered to correspond with their respective data
row.
"""
dim = self._dimensions[0]
return np.array(dim.element_labels + dim.subtotal_labels)[
self._row_order_signed_indexes
]
[docs] def row_order(self, format=ORDER_FORMAT.SIGNED_INDEXES):
"""1D np.int64 ndarray of idx for each assembled row of matrix.
If order format is `SIGNED_INDEXES` negative values represent inserted
subtotal-row locations; for `BOGUS_IDS` insertios are represented by
`ins_{insertion_id}` string.
Indices appear in the order rows are to appear in the final result.
Needed for reordering color palette in exporter.
"""
if format == ORDER_FORMAT.BOGUS_IDS:
return _BaseOrderHelper.row_display_order(
self._dimensions, self._measures, format=ORDER_FORMAT.BOGUS_IDS
)
return self._row_order_signed_indexes
@lazyproperty
def row_percentages(self):
return self.row_proportions * 100
[docs] @lazyproperty
def row_proportions(self):
"""2D np.float64 ndarray of row-proportion for each matrix cell.
This is the proportion of the weighted-N (aka. weighted base) of its row
that the *weighted-count* in each cell represents, generally a number between
0.0 and 1.0. Note that within an inserted subtotal vector involving differences,
the values can range between -1.0 and 1.0.
"""
return self._assemble_matrix(self._measures.row_proportions.blocks)
[docs] @lazyproperty
def row_proportions_moe(self):
"""2D np.float64 ndarray of margin-of-error (MoE) for rows proportions.
The values are represented as percentage-fractions, analogue to the
`row_proportions` property. This means that the value of 3.5% will have the
value 0.035. The values can be np.nan when the corresponding percentage is also
np.nan, which happens when the respective table margin is 0.
"""
return Z_975 * self.row_std_err
[docs] @lazyproperty
def row_share_sum(self):
"""2D optional np.float64 ndarray of row share sum value for each table cell.
Raises `ValueError` if the cube-result does not include a sum cube-measure.
Row share of sum is the sum of each subvar item divided by the TOTAL number of
row items.
"""
try:
return self._assemble_matrix(self._measures.row_share_sum.blocks)
except ValueError:
raise ValueError(
"`.row_share_sum` is undefined for a cube-result without a sum measure"
)
[docs] @lazyproperty
def row_proportion_variances(self):
"""2D ndarray of np.float64 row-proportion variance for each matrix cell."""
return self._assemble_matrix(self._measures.row_proportion_variances.blocks)
[docs] @lazyproperty
def row_std_dev(self):
"""2D np.float64 ndarray of standard deviation for row percentages."""
return np.sqrt(self.row_proportion_variances)
[docs] @lazyproperty
def row_std_err(self):
"""2D np.float64 ndarray of standard errors for row percentages."""
return self._assemble_matrix(self._measures.row_std_err.blocks)
[docs] @lazyproperty
def row_unweighted_bases(self):
"""2D np.float64 ndarray of unweighted row-proportion denominator per cell."""
return self._assemble_matrix(self._measures.row_unweighted_bases.blocks)
[docs] @lazyproperty
def row_weighted_bases(self):
"""2D np.float64 ndarray of row-proportion denominator for each table cell."""
return self._assemble_matrix(self._measures.row_weighted_bases.blocks)
[docs] @lazyproperty
def rows_base(self):
"""1D/2D np.float64 ndarray of unweighted-N for each row/cell of slice.
This array is 2D (a distinct base for each cell) when the columns dimension is
MR, because each MR-subvariable has its own unweighted N. This is because not
every possible response is necessarily offered to every respondent.
In all other cases, the array is 1D, containing one value for each column.
"""
# --- an X_ARRAY slice produces a 2D row-base (each cell has its own N) ---
# --- TODO: Should rows_base only be defined when it's 1D? This would
# --- require changes to exporter to use the bases to give a "rows_base_range"
if not self._measures.rows_unweighted_base.is_defined:
return self.row_unweighted_bases
# --- otherwise rows-base is a vector ---
return self._assemble_marginal(self._measures.rows_unweighted_base)
[docs] @lazyproperty
def rows_dimension_alias(self):
"""str alias assigned to rows-dimension."""
return self._rows_dimension.alias
[docs] @lazyproperty
def rows_dimension_description(self):
"""str description assigned to rows-dimension.
Reflects the resolved dimension-description transform cascade.
"""
return self._rows_dimension.description
[docs] @lazyproperty
def rows_dimension_fills(self):
"""tuple of optional RGB str like "#def032" fill color for each row in slice.
The values reflect the resolved element-fill transform cascade. The length and
ordering of the sequence correspond to the rows in the slice, including
accounting for insertions and hidden rows. A value of `None` indicates the
default fill, possibly determined by a theme or template.
"""
elements = self._rows_dimension.valid_elements
subtotals = self._rows_dimension.subtotals
return tuple(
# ---Subtotals have negative sequential indexes (-1, -2, ..., -m)---
# ---To index them properly, we need to convert those indexes to---
# ---zero based positive indexes (0, 1, ... m - 1) i.e. -idx - 1---
(elements[idx].fill if idx >= 0 else subtotals[idx + len(subtotals)].fill)
for idx in self._row_order_signed_indexes
)
[docs] @lazyproperty
def rows_dimension_name(self):
"""str name assigned to rows-dimension.
Reflects the resolved dimension-name transform cascade.
"""
return self._rows_dimension.name
[docs] @lazyproperty
def rows_dimension_type(self):
"""Member of `cr.cube.enum.DIMENSION_TYPE` specifying type of rows dimension."""
return self._rows_dimension.dimension_type
[docs] @lazyproperty
def rows_margin(self):
"""1D or 2D np.float64 ndarray of weighted-N for each column of slice.
This array is 2D (a distinct margin value for each cell) when the columns
dimension is MR, because each MR-subvariable has its own weighted N. This is
because not every possible response is necessarily offered to every respondent.
In all other cases, the array is 1D, containing one value for each column.
"""
# --- an X_MR slice produces a 2D rows-margin (each cell has its own N) ---
# --- This is really just another way to call the row_weighted_bases ---
# --- TODO: Should rows_margin only be defined when it's 1D? This would
# --- require changes to exporter to use the bases to give a "rows_margin_range"
if not self._measures.rows_weighted_base.is_defined:
return self.row_weighted_bases
# --- otherwise rows-margin is a vector ---
return self._assemble_marginal(self._measures.rows_weighted_base)
[docs] @lazyproperty
def rows_margin_proportion(self):
"""1D or 2D np.float64 ndarray of weighted-proportion for each column of slice.
This array is 2D (a distinct margin value for each cell) when the columns
dimension is MR, because each MR-subvariable has its own weighted N. This is
because not every possible response is necessarily offered to every respondent.
In all other cases, the array is 1D, containing one value for each column.
"""
# --- an X_MR slice produces a 2D rows-margin (each cell has its own N) ---
# --- TODO: Should rows_margin_proportion only be defined when it's 1D? This
# --- would require changes to exporter to use the bases to give a
# --- "rows_margin_range"
if not self._measures.rows_table_proportion.is_defined:
return self._assemble_matrix(
SumSubtotals.blocks(
self.rows_margin / self.table_weighted_bases,
self._dimensions,
)
)
# --- otherwise rows-margin is a vector ---
return self._assemble_marginal(self._measures.rows_table_proportion)
[docs] @lazyproperty
def rows_scale_mean(self):
"""Optional 1D np.float64 ndarray of scale mean for each row.
The returned vector is to be interpreted as a summary *column*. Also note that
the underlying scale values are based on the numeric values of the opposing
*columns-dimension* elements.
This value is `None` if no column element has an assigned numeric value.
"""
return self._assemble_marginal(self._measures.rows_scale_mean)
[docs] @lazyproperty
def rows_scale_mean_margin(self):
"""Optional float overall mean of row-scale values.
This value is the "margin" of the `.rows_scale_mean` vector and might typically
appear in the cell immediately below the `.rows_scale_mean` summary-column. It
is similar to a "table-total" value, in that it is a scalar that might appear in
the lower right-hand corner of a table, but note that it does *not* represent
the overall table in that `.columns_scale_mean_margin` will not have the same
value (except by chance). This value derives from the numeric values of the
column elements whereas its counterpart `.columns_scale_mean_margin` derives
from the numeric values of the row elements.
This value is `None` if no column has an assigned numeric-value.
"""
if not self._columns_have_numeric_value:
return None
# TODO: This is a hack for X_Array slices, where columns-margin is undefined.
# I think this measure probably also shouldn't be defined across arrays, but to
# minimize test damage, we use the first row of the row weighted bases, which is
# equal to the columnss_margin for CAT_X_CAT and always exists for others.
columns_margin = self.column_weighted_bases[0, :]
scale_total = np.nansum(self._columns_dimension_numeric_values * columns_margin)
not_a_nan_mask = ~np.isnan(self._columns_dimension_numeric_values)
scale_count = np.sum(columns_margin[not_a_nan_mask])
return scale_total / scale_count
[docs] @lazyproperty
def rows_scale_mean_stddev(self):
"""Optional 1D np.float64 ndarray of std-deviation of scale-mean for each row.
The returned vector (1D array) is to be interpreted as a summary *column*. Also
note that the underlying scale values are based on the numeric values of the
opposing *columns-dimension* elements.
This value is `None` if no column elements have an assigned numeric value.
"""
return self._assemble_marginal(self._measures.rows_scale_mean_stddev)
[docs] @lazyproperty
def rows_scale_mean_stderr(self):
"""Optional 1D np.float64 ndarray of standard-error of scale-mean for each row.
The returned vector is to be interpreted as a summary *column*. Also note that
the underlying scale values are based on the numeric values of the opposing
*columns-dimension* elements.
This value is `None` if no column element has a numeric value assigned or if
the rows-weighted-base is `None` (eg an array variable in the column dim).
"""
return self._assemble_marginal(self._measures.rows_scale_mean_stderr)
@lazyproperty
def shape(self):
return self.counts.shape
[docs] @lazyproperty
def smoothed_column_index(self):
"""2D np.float64 ndarray of smoothed column-index "percentage".
If cube has smoothing specification in the transforms it will return the
column index smoothed according to the algorithm and the parameters
specified, otherwise it fallbacks to unsmoothed values.
"""
return self._assemble_matrix(self._measures.smoothed_column_index.blocks)
[docs] @lazyproperty
def smoothed_column_percentages(self):
"""2D np.float64 ndarray of smoothed column-percentages for each matrix cell.
If cube has smoothing specification in the transforms it will return the
column percentages smoothed according to the algorithm and the parameters
specified, otherwise it fallbacks to unsmoothed values.
"""
return self.smoothed_column_proportions * 100
[docs] @lazyproperty
def smoothed_column_proportions(self):
"""2D np.float64 ndarray of smoothed column-proportion for each matrix cell.
This is the proportion of the weighted-count for cell to the weighted-N of the
column the cell appears in (aka. column-margin). Generally a number between 0.0
and 1.0 inclusive, but subtotal differences can be between -1.0 and 1.0
inclusive.
If cube has smoothing specification in the transforms it will return the
column proportions smoothed according to the algorithm and the parameters
specified, otherwise it fallbacks to unsmoothed values.
"""
return self._assemble_matrix(self._measures.smoothed_column_proportions.blocks)
[docs] @lazyproperty
def smoothed_columns_scale_mean(self):
"""Optional 1D np.float64 ndarray of smoothed scale mean for each column.
If cube has smoothing specification in the transforms it will return the
column scale mean smoothed according to the algorithm and the parameters
specified, otherwise it fallbacks to unsmoothed values.
"""
return self._assemble_marginal(self._measures.smoothed_columns_scale_mean)
[docs] @lazyproperty
def smoothed_means(self):
"""2D optional np.float64 ndarray of smoothed mean value for each table cell.
If cube has smoothing specification in the transforms it will return the
smoothed means according to the algorithm and the parameters specified,
otherwise it fallbacks to unsmoothed values.
"""
try:
return self._assemble_matrix(self._measures.smoothed_means.blocks)
except ValueError:
raise ValueError(
"`.means` is undefined for a cube-result without a mean measure"
)
[docs] @lazyproperty
def stddev(self):
"""2D optional np.float64 ndarray of stddev value for each table cell.
Raises `ValueError` if the cube-result does not include a stddev cube-measure.
"""
try:
return self._assemble_matrix(self._measures.stddev.blocks)
except ValueError:
raise ValueError(
"`.stddev` is undefined for a cube-result without a stddev measure"
)
[docs] @lazyproperty
def sums(self):
"""2D optional np.float64 ndarray of sum value for each table cell.
Raises `ValueError` if the cube-result does not include a sum cube-measure.
"""
try:
return self._assemble_matrix(self._measures.sums.blocks)
except ValueError:
raise ValueError(
"`.sums` is undefined for a cube-result without a sum measure"
)
@lazyproperty
def summary_pairwise_indices(self):
return PairwiseSignificance(
self, self._alpha, self._only_larger
).summary_pairwise_indices
[docs] @lazyproperty
def tab_label(self):
"""Subvar label of slice id if first dimension is a CA_SUBVAR, '"' otherwise."""
first_dimension = self._cube.dimensions[0]
return (
first_dimension.valid_elements[self._slice_idx].label
if first_dimension.dimension_type == DT.CA_SUBVAR
else ""
)
[docs] @lazyproperty
def tab_alias(self):
"""Subvar alias of slice id if first dimension is a CA_SUBVAR, '"' otherwise."""
first_dimension = self._cube.dimensions[0]
return (
first_dimension.valid_elements[self._slice_idx].alias
if first_dimension.dimension_type == DT.CA_SUBVAR
else ""
)
[docs] @lazyproperty
def table_base(self):
"""Scalar or 1D/2D np.float64 ndarray of unweighted-N for table.
This value is scalar when the slice has no MR dimensions, 1D when the slice has
one MR dimension (either MR_X or X_MR), and 2D for an MR_X_MR slice.
The caller must know the dimensionality of the slice in order to correctly
interpret a 1D value for this property.
This value has four distinct forms, depending on the slice dimensions:
* ARR_X_ARR - 2D ndarray with a distinct table-base value per cell.
* ARR_X - 1D ndarray of value per *row* when only rows dimension is ARR.
* X_ARR - 1D ndarray of value per *column* when only col dimension is ARR
* CAT_X_CAT - scalar float value when slice has no MR dimension.
"""
if self._measures.table_unweighted_base.is_defined:
return self._measures.table_unweighted_base.value
if self._measures.columns_table_unweighted_base.is_defined:
return self._assemble_marginal(self._measures.columns_table_unweighted_base)
if self._measures.rows_table_unweighted_base.is_defined:
return self._assemble_marginal(self._measures.rows_table_unweighted_base)
return self.table_unweighted_bases
[docs] @lazyproperty
def table_margin(self):
"""Scalar or 1D/2D np.float64 ndarray of weighted-N table.
This value is scalar when the slice has no MR dimensions, 1D when the slice has
one MR dimension (either MR_X or X_MR), and 2D for an MR_X_MR slice.
The caller must know the dimensionality of the slice in order to correctly
interpret a 1D value for this property.
This value has four distinct forms, depending on the slice dimensions:
* CAT_X_CAT - scalar float value when slice has no ARRAY dimension.
* ARRAY_X - 1D ndarray of value per *row* when only rows dimension is ARRAY.
* X_ARRAY - 1D ndarray of value per *column* when only column is ARRAY.
* ARRAY_X_ARRAY - 2D ndarray with a distinct table-margin value per cell.
"""
if self._measures.table_weighted_base.is_defined:
return self._measures.table_weighted_base.value
if self._measures.columns_table_weighted_base.is_defined:
return self._assemble_marginal(self._measures.columns_table_weighted_base)
if self._measures.rows_table_weighted_base.is_defined:
return self._assemble_marginal(self._measures.rows_table_weighted_base)
return self.table_weighted_bases
[docs] @lazyproperty
def table_name(self):
"""Optional table name for this Slice
Provides differentiated name for each stacked table of a 3D cube.
"""
if self._cube.ndim < 3:
return None
title = self._cube.name
valid_elements = self._cube.dimensions[0].valid_elements
if valid_elements.element_ids:
table_name = valid_elements[self._slice_idx].label
return f"{title}: {table_name}"
return None
@lazyproperty
def table_percentages(self):
return self.table_proportions * 100
[docs] @lazyproperty
def table_proportions(self):
"""2D ndarray of np.float64 fraction of table count each cell contributes.
This is the proportion of the weighted-count for cell to the weighted-N of the
row the cell appears in (aka. table-margin). Generally a number between 0.0 and
1.0 inclusive, but subtotal differences can be between -1.0 and 1.0 inclusive.
"""
return self._assemble_matrix(self._measures.table_proportions.blocks)
[docs] @lazyproperty
def table_proportions_moe(self):
"""1D/2D np.float64 ndarray of margin-of-error (MoE) for table proportions.
The values are represented as fractions, analogue to the `table_proportions`
property. This means that the value of 3.5% will have the value 0.035. The
values can be np.nan when the corresponding percentage is also np.nan, which
happens when the respective table margin is 0.
"""
return Z_975 * self.table_std_err
[docs] @lazyproperty
def table_proportion_variances(self):
"""2D ndarray of np.float64 table-proportion variance for each matrix cell."""
return self._assemble_matrix(self._measures.table_proportion_variances.blocks)
[docs] @lazyproperty
def table_std_dev(self):
"""2D np.float64 ndarray of std-dev of table-percent for each table cell."""
return np.sqrt(self.table_proportion_variances)
[docs] @lazyproperty
def table_std_err(self):
"""2D optional np.float64 ndarray of std-error of table-percent for each cell.
A cell value can be np.nan under certain conditions.
"""
return self._assemble_matrix(self._measures.table_std_err.blocks)
[docs] @lazyproperty
def table_unweighted_bases(self):
"""2D np.float64 ndarray of unweighted table-proportion denominator per cell."""
return self._assemble_matrix(self._measures.table_unweighted_bases.blocks)
[docs] @lazyproperty
def table_weighted_bases(self):
"""2D np.float64 ndarray of table-proportion denominator for each cell."""
return self._assemble_matrix(self._measures.table_weighted_bases.blocks)
[docs] @lazyproperty
def total_share_sum(self):
"""2D optional np.float64 ndarray of total share sum value for each table cell.
Raises `ValueError` if the cube-result does not include a sum cube-measure.
Total share of sum is the sum of each subvar item divided by the TOTAL of items.
"""
try:
return self._assemble_matrix(self._measures.total_share_sum.blocks)
except ValueError:
raise ValueError(
"`.total_share_sum` is undefined for a cube-result without a sum "
"measure"
)
[docs] @lazyproperty
def table_base_range(self):
"""[min, max] np.float64 ndarray range of the table_base (table-unweighted-base)
A CAT_X_CAT has a scalar for all table-unweighted-bases, but arrays have more
than one table-weighted-base. This collapses all the values them to the range,
and it is "unpruned", meaning that it is calculated before any hiding or
removing of empty rows/columns.
"""
return self._measures.table_unweighted_bases_range.value
[docs] @lazyproperty
def table_margin_range(self):
"""[min, max] np.float64 ndarray range of the table_margin (table-weighted-base)
A CAT_X_CAT has a scalar for all table-weighted-bases, but arrays have more than
one table-weighted-base. This collapses all of the values to a range, and
it is "unpruned", meaning that it is calculated before any hiding or removing
of empty rows/columns.
"""
return self._measures.table_weighted_bases_range.value
[docs] @lazyproperty
def unweighted_counts(self):
"""2D np.float64 ndarray of unweighted count for each slice matrix cell."""
return self._assemble_matrix(self._measures.unweighted_counts.blocks)
[docs] @lazyproperty
def zscores(self):
"""2D np.float64 ndarray of std-res value for each cell of matrix.
A z-score is also known as a *standard score* and is the number of standard
deviations above (positive) or below (negative) the population mean a cell's
value is.
"""
return self._assemble_matrix(self._measures.zscores.blocks)
# ---implementation (helpers)-------------------------------------
def _assemble_marginal(self, marginal):
"""Optional 1D ndarray created from a marginal.
The assembled marginal is the shape of either a row or column (determined by
`marginal.orientation`), and with the ordering that's applied to those
dimensions.
It is None when the marginal is not defined (`marginal._is_defined`).
"""
if not marginal.is_defined:
return None
order = (
self._row_order_signed_indexes
if marginal.orientation == MO.ROWS
else self._column_order
)
return np.hstack(marginal.blocks)[order]
def _assemble_matrix(self, blocks):
"""Return 2D ndarray matrix assembled from `blocks`.
The assembled matrix includes inserted vectors (rows and columns), has hidden
vectors removed, and is ordered by whatever sort method is applied in the
dimension transforms.
"""
# --- These are assembled into a single 2D array, and then rearranged based on
# --- row and column orders. All insertion, ordering, and hiding transforms are
# --- reflected in the row and column orders. They each include (negative)
# --- insertion idxs, hidden and pruned vector indices have been removed, and
# --- the ordering method has been applied to determine the sequence each idx
# --- appears in. This directly produces a final array that is exactly the
# --- desired output.
return np.block(blocks)[
np.ix_(self._row_order_signed_indexes, self._column_order)
]
def _assemble_vector(self, base_vector, subtotals, order, diffs_nan=False):
"""Return 1D ndarray of `base_vector` with inserted `subtotals`, in `order`.
Each subtotal value is the result of applying np.sum to the addends and
subtrahends extracted from `base_vector` according to the `addend_idxs`
and `subtrahend_idxs` property of each subtotal in `subtotals`. The returned
array is arranged by `order`, including possibly removing hidden or pruned
values.
"""
# TODO: This works for "sum" and "diff" subtotals, because either we set to
# nan or add & subtract, but a fuller solution will probably get the subtotal
# values from a _BaseSubtotals subclass.
vector_subtotals = np.array(
[
np.nan
if diffs_nan and len(subtotal.subtrahend_idxs) > 0
else np.sum(base_vector[subtotal.addend_idxs])
- np.sum(base_vector[subtotal.subtrahend_idxs])
for subtotal in subtotals
]
)
return np.hstack([base_vector, vector_subtotals])[order]
@lazyproperty
def _column_order(self):
"""1D np.int64 ndarray of signed int idx for each assembled column.
Negative values represent inserted subtotal-column locations.
"""
return _BaseOrderHelper.column_display_order(self._dimensions, self._measures)
@lazyproperty
def _columns_dimension_numeric_values(self):
"""1D optional np.int/float64 ndarray of numeric-value for each column element.
A value of np.nan appears for a column element without a numeric-value. All
subtotal rows have a value of np.nan (subtotals have no numeric value).
"""
elements = self._dimensions[1].valid_elements
return np.array(
[
(elements[idx].numeric_value if idx >= 0 else np.nan)
for idx in self._column_order
]
)
@lazyproperty
def _columns_have_numeric_value(self):
"""True when one or more column elements have an assigned numeric-value."""
return not np.all(np.isnan(self._columns_dimension_numeric_values))
@lazyproperty
def _columns_scale_mean_variance(self):
"""Optional 1D np.float64 ndarray of scale-mean variance for each column."""
if not self._rows_have_numeric_value:
return None
# --- Note: the variance for scale is defined as sum((Yi−Y~)2/(N)), where Y~ is
# --- the mean of the data.
not_a_nan_index = ~np.isnan(self._rows_dimension_numeric_values)
row_dim_numeric_values = self._rows_dimension_numeric_values[not_a_nan_index]
numerator = (
self.counts[not_a_nan_index, :]
* pow(
np.broadcast_to(
row_dim_numeric_values, self.counts[not_a_nan_index, :].T.shape
)
- self.columns_scale_mean.reshape(-1, 1),
2,
).T
)
denominator = np.sum(self.counts[not_a_nan_index, :], axis=0)
with np.errstate(divide="ignore", invalid="ignore"):
return np.nansum(numerator, axis=0) / denominator
@lazyproperty
def _cube_has_overlaps(self):
"""True if overlaps are defined and the last dimension is MR, False otherwise"""
return (
self._dimensions[-1].dimension_type == DT.MR
and self._cube.overlaps is not None
and self._cube.valid_overlaps is not None
)
def _derived_element_idxs(self, dimension, order):
"""Return tuple(int) of derived elements' indices for a dimension.
Subtotals cannot be derived elements. Only some elements (subvariables) can.
"""
n_subtotals = len(dimension.valid_elements)
derivs = [e.derived for e in dimension.valid_elements] + [False] * n_subtotals
return tuple(np.where(np.array(derivs)[order])[0])
def _diff_element_idxs(self, dimension, order):
"""Return tuple(int) of difference elements' indices for a dimension.
Valid elements cannot be differences. Only some subtotals can.
"""
n_valids = len(dimension.valid_elements)
diffs = [False] * n_valids + [e.is_difference for e in dimension.subtotals]
return tuple(np.where(np.array(diffs)[order])[0])
@lazyproperty
def _dimensions(self):
"""tuple of (rows_dimension, columns_dimension) Dimension objects."""
return tuple(
dimension.apply_transforms(transforms)
for dimension, transforms in zip(
self._cube.dimensions[-2:], self._transform_dicts
)
)
@lazyproperty
def _measures(self):
"""SecondOrderMeasures collection object for this cube-result."""
return SecondOrderMeasures(self._cube, self._dimensions, self._slice_idx)
@lazyproperty
def _row_order_signed_indexes(self):
"""Row order idx with signed idxs."""
return _BaseOrderHelper.row_display_order(
self._dimensions, self._measures, format=ORDER_FORMAT.SIGNED_INDEXES
)
@lazyproperty
def _rows_dimension(self):
return self._dimensions[0]
@lazyproperty
def _rows_dimension_numeric_values(self):
"""1D optional np.int/float64 ndarray of numeric-value for each row element.
A value of np.nan appears for a row element without a numeric-value. All
subtotal rows have a value of np.nan (subtotals have no numeric value).
"""
elements = self._rows_dimension.valid_elements
return np.array(
[
(elements[idx].numeric_value if idx >= 0 else np.nan)
for idx in self._row_order_signed_indexes
]
)
@lazyproperty
def _rows_have_numeric_value(self):
"""True when one or more row elements have an assigned numeric-value."""
return not np.all(np.isnan(self._rows_dimension_numeric_values))
@lazyproperty
def _transform_dicts(self):
"""Pair of dict (rows_dimension_transforms, columns_dimension_transforms).
Resolved from the `transforms` argument provided on construction, it always has
two members, even when one or both dimensions have no transforms. The transforms
item is an empty dict (`{}`) when no transforms are specified for that
dimension.
"""
return (
self._transforms_dict.get("rows_dimension", {}),
self._transforms_dict.get("columns_dimension", {}),
)
[docs]class _Strand(CubePartition):
"""1D cube-partition.
A strand can arise from a 1D cube (non-CA univariate), or as a partition of
a CA-cube (CAs are 2D) into a sequence of 1D partitions, one for each subvariable.
"""
def __init__(self, cube, transforms, population, ca_as_0th, slice_idx, mask_size):
super(_Strand, self).__init__(cube, transforms)
self._population = population
self._ca_as_0th = ca_as_0th
self._slice_idx = slice_idx
self._mask_size = mask_size
def __repr__(self):
"""Provide text representation suitable for working at console.
Falls back to a default repr on exception, such as might occur in
unit tests where object need not otherwise be provided with all
instance variable values.
"""
try:
title = (
f"{type(self).__name__}(name='{self.name}', "
f"dimension_type='{self.dimension_types[0].name}')"
)
contents = [
[row_label, row]
for row_label, row in zip(self.row_labels, self._default_contents)
]
return (
f"{title}"
f"\nShowing: {self._available_measures[0].name}"
f"\n{tabulate(contents, ['', self.name])}"
f"\nAvailable measures: {str(self._available_measures)}"
)
except Exception:
return super(_Strand, self).__repr__() # noqa
[docs] @lazyproperty
def weighted_counts(self):
"""1D np.float64 ndarray of weighted count for each row of strand.
The values are int when the underlying cube-result has no weighting.
"""
return self._assemble_vector(self._measures.weighted_counts.blocks)
counts = weighted_counts
[docs] @lazyproperty
def derived_row_idxs(self):
"""tuple of int index of each derived row-element in this strand.
Subtotals cannot be derived
An element is derived if it's a subvariable of a multiple response dimension,
which has been produced by the zz9, and inserted into the response data.
All other elements, including regular MR and CA subvariables, as well as
categories of CAT dimensions, are not derived. Subtotals are also not derived
in this sense, because they're not even part of the data (elements).
"""
rows_dim = self._rows_dimension
n_subtotals = len(rows_dim.subtotals)
derivs = [e.derived for e in rows_dim.valid_elements] + [False] * n_subtotals
return tuple(np.where(np.array(derivs)[self._row_order_signed_indexes])[0])
[docs] @lazyproperty
def diff_row_idxs(self):
"""tuple of int index of each difference row-element in this strand.
Valid elements are cannot be differences, only some subtotals can.
"""
rows_dim = self._rows_dimension
n_valids = len(rows_dim.valid_elements)
diffs = [False] * n_valids + [e.is_difference for e in rows_dim.subtotals]
return tuple(np.where(np.array(diffs)[self._row_order_signed_indexes])[0])
[docs] @lazyproperty
def inserted_row_idxs(self):
"""tuple of int index of each inserted row in this strand.
Suitable for use in applying different formatting (e.g. Bold) to inserted rows.
Provided index values correspond to measure values as-delivered by this strand,
after any insertion of subtotals, re-ordering, and hiding/pruning of rows
specified in a transform has been applied.
Provided index values correspond rows after any insertion of subtotals,
re-ordering, and hiding/pruning.
"""
return tuple(
i for i, row_idx in enumerate(self._row_order_signed_indexes) if row_idx < 0
)
[docs] @lazyproperty
def has_scale_means(self):
"""True if the strand has valid scale means."""
return True if self.scale_mean is not None else False
@lazyproperty
def is_empty(self):
return any(s == 0 for s in self.shape)
[docs] @lazyproperty
def means(self):
"""1D np.float64 ndarray of mean for each row of strand.
Raises ValueError when accessed on a cube-result that does not contain a means
cube-measure.
"""
try:
return self._assemble_vector(self._measures.means.blocks)
except ValueError:
raise ValueError(
"`.means` is undefined for a cube-result without a mean measure"
)
[docs] @lazyproperty
def min_base_size_mask(self):
"""1D bool ndarray of True for each row that fails to meet min-base spec.
The "base" is the physical (unweighted) count of respondents to the question.
When this is lower than a specified threshold, the reliability of the value is
too low to be meaningful. The threshold is defined by the caller (user).
"""
return self.unweighted_bases < self._mask_size
@lazyproperty
def name(self):
return self.rows_dimension_name
[docs] @lazyproperty
def payload_order(self):
"""1D np.int64 ndarray of signed int idx respecting the payload order.
Positive integers indicate the 1-indexed position in payload of regular
elements, while negative integers are the subtotal insertions.
Needed for reordering color palette in exporter.
"""
empty_row_idxs = tuple(
i for i, N in enumerate(self._measures.pruning_base) if N == 0
)
return tuple(
PayloadOrderCollator(self._rows_dimension, empty_row_idxs).payload_order
)
[docs] @lazyproperty
def population_counts(self):
"""1D np.float64 ndarray of population count for each row of strand.
The (estimated) population count is computed based on the `population` value
provided when the Strand is created. It is also adjusted to account for any
filters that were applied as part of the query.
"""
return (
self.population_proportions
* self._population
* self._cube.population_fraction
)
[docs] @lazyproperty
def population_counts_moe(self):
"""1D np.float64 ndarray of population margin-of-error (MoE) for table percents.
The values are represented as population estimates, analogue to the
`population_counts` property. This means that the values will be presented by
actual estimated counts of the population The values can be np.nan when the
corresponding percentage is also np.nan, which happens when the respective
table margin is 0.
"""
total_filtered_population = self._population * self._cube.population_fraction
return Z_975 * total_filtered_population * self.population_proportion_stderrs
[docs] @lazyproperty
def population_proportions(self):
"""1D np.float64 population-proportion for each row
Generally equal to the table_proprotions, but because we don't divide the
population when the row is a CAT_DATE, can also be all 1s. Used to calculate
the population_counts.
"""
population_proportions = self._assemble_vector(
self._measures.population_proportions.blocks
)
# Diff subtotals not allowed in population measure
if self.diff_row_idxs:
population_proportions[self.diff_row_idxs] = np.nan
return population_proportions
[docs] @lazyproperty
def population_proportion_stderrs(self):
"""1D np.float64 population-proportion-standard-error for each row
Generally equal to the table_proprotion_standard_error, but because we don't
divide the population when the row is a CAT_DATE, can also be all 0s. Used to
calculate the population_counts_moe.
"""
return self._assemble_vector(
self._measures.population_proportion_stderrs.blocks
)
[docs] @lazyproperty
def row_count(self):
"""int count of rows in a returned measure or marginal.
This count includes inserted rows but not rows that have been hidden/pruned.
"""
return len(self._row_order_signed_indexes)
[docs] @lazyproperty
def row_aliases(self):
"""1D str ndarray of alias for each row, for use as row headings."""
return np.array(
self._rows_dimension.element_aliases + self._rows_dimension.subtotal_aliases
)[self._row_order_signed_indexes]
[docs] @lazyproperty
def row_codes(self):
"""1D int ndarray of code for each row, for use as row headings."""
return np.array(
self._rows_dimension.element_ids + self._rows_dimension.insertion_ids
)[self._row_order_signed_indexes]
[docs] @lazyproperty
def row_labels(self):
"""1D str ndarray of name for each row, suitable for use as row headings."""
return np.array(
self._rows_dimension.element_labels + self._rows_dimension.subtotal_labels
)[self._row_order_signed_indexes]
[docs] def row_order(self, format=ORDER_FORMAT.SIGNED_INDEXES):
"""1D np.int64 ndarray of idx for each assembled row of stripe.
If order format is `SIGNED_INDEXES` negative values represent inserted
subtotal-row locations; for `BOGUS_IDS` insertios are represented by
`ins_{insertion_id}` string.
Indices appear in the order rows are to appear in the final result.
Needed for reordering color palette in exporter.
"""
# --- specify dtype explicitly to prevent error when display-order is empty. The
# --- default dtype is float, which cannot be used to index an array.
if format == ORDER_FORMAT.BOGUS_IDS:
return self._row_order_bogus_ids
return self._row_order_signed_indexes
[docs] @lazyproperty
def rows_base(self):
"""1D np.float64 ndarray of unweighted-N for each row of slice."""
# --- for a strand, this is the same as unweighted-counts, but needs this
# --- alternate property so it can be accessed uniformly between a slice and a
# --- strand.
return self.unweighted_counts
[docs] @lazyproperty
def rows_dimension_alias(self):
"""str alias assigned to rows-dimension."""
return self._rows_dimension.alias
[docs] @lazyproperty
def rows_dimension_description(self):
"""str description assigned to rows-dimension.
Reflects the resolved dimension-description transform cascade.
"""
return self._rows_dimension.description
[docs] @lazyproperty
def rows_dimension_fills(self):
"""tuple of optional RGB str like "#def032" fill color for each strand row.
Each value reflects the resolved element-fill transform cascade. The length and
ordering of the sequence correspond to the rows in the slice, including
accounting for insertions, ordering, and hidden rows. A fill value is `None`
when no explicit fill color is defined for that row, indicating the default fill
color for that row should be used, probably coming from a caller-defined theme.
"""
element_fills = tuple(e.fill for e in self._rows_dimension.valid_elements)
subtotal_fills = tuple(st.fill for st in self._rows_dimension.subtotals)
return tuple(
# ---Subtotals have negative sequential indexes (-1, -2, ..., -m)---
# ---To index them properly, we need to convert those indexes to---
# ---zero based positive indexes (0, 1, ... m - 1) i.e. -idx - 1---
(
element_fills[idx]
if idx > -1
else subtotal_fills[idx + len(subtotal_fills)]
)
for idx in self._row_order_signed_indexes
)
[docs] @lazyproperty
def rows_dimension_name(self):
"""str name assigned to rows-dimension.
Reflects the resolved dimension-name transform cascade.
"""
return self._rows_dimension.name
[docs] @lazyproperty
def rows_dimension_type(self):
"""Member of DIMENSION_TYPE enum describing type of rows dimension."""
return self._rows_dimension.dimension_type
[docs] @lazyproperty
def rows_margin(self):
"""1D np.float64 ndarray of weighted-N for each row of slice."""
# --- for a strand, this is the same as (weighted) counts, but needs this
# --- alternate name so it can be accessed uniformly between a slice and strand.
return self.counts
[docs] @lazyproperty
def scale_mean(self):
"""Optional float mean of row numeric-values (scale).
This value is `None` when no row-elements have a numeric-value assigned. The
numeric value (aka. "scale") for a row is its count multiplied by the
numeric-value of its element. For example, if 100 women responded "Very Likely"
and the numeric-value of the "Very Likely" response (element) was 4, then the
scale for that row would be 400. The scale mean is the average of those scale
values over the total count of responses.
"""
return self._measures.scaled_counts.scale_mean
[docs] @lazyproperty
def scale_std_dev(self):
"""Optional np.float64 standard-deviation of scaled weighted counts.
This value is `None` when no rows have a numeric-value assigned.
"""
return self._measures.scaled_counts.scale_stddev
scale_stddev = scale_std_dev
[docs] @lazyproperty
def scale_std_err(self):
"""Optional np.float64 standard-error of scaled weighted counts.
This value is `None` when no rows have a numeric-value assigned. The value has
the same units as the assigned numeric values and indicates the dispersion of
the scaled-count distribution from its mean (scale-mean).
"""
return self._measures.scaled_counts.scale_stderr
scale_stderr = scale_std_err
[docs] @lazyproperty
def shape(self):
"""Tuple of int vector counts for this partition.
A _Strand has a shape like (5,) which represents its row-count.
Not to be confused with `numpy.ndarray.shape`, this represent the count of rows
in this strand. It does not necessarily represent the shape of any underlying
`numpy.ndarray` object In particular, the value of its row-count can be zero.
"""
return (self.row_count,)
[docs] @lazyproperty
def share_sum(self):
"""1D np.float64 ndarray of share of sum for each row of strand.
Raises `ValueError` if the cube-result does not include a sum cube-measure.
Share of sum is the sum of each subvar item divided by the TOTAL number of
items.
"""
try:
return self._assemble_vector(self._measures.share_sum.blocks)
except ValueError:
raise ValueError(
"`.share_sum` is undefined for a cube-result without a sum measure"
)
[docs] @lazyproperty
def smoothed_means(self):
"""1D np.float64 ndarray of smoothed mean for each row of strand.
If cube has smoothing specification in the transforms it will return the
smoothed means according to the algorithm and the parameters specified,
otherwise it fallbacks to unsmoothed values.
"""
try:
return self._assemble_vector(self._measures.smoothed_means.blocks)
except ValueError:
raise ValueError(
"`.means` is undefined for a cube-result without a mean measure"
)
[docs] @lazyproperty
def stddev(self):
"""1D np.float64 ndarray of stddev for each row of strand.
Raises ValueError when accessed on a cube-result that does not contain a stddev
cube-measure.
"""
try:
return self._assemble_vector(self._measures.stddev.blocks)
except ValueError:
raise ValueError(
"`.stddev` is undefined for a cube-result without a stddev measure"
)
[docs] @lazyproperty
def sums(self):
"""1D np.float64 ndarray of sum for each row of strand.
Raises ValueError when accessed on a cube-result that does not contain a sum
cube-measure.
"""
try:
return self._assemble_vector(self._measures.sums.blocks)
except ValueError:
raise ValueError(
"`.sums` is undefined for a cube-result without a sum measure"
)
[docs] @lazyproperty
def tab_label(self):
"""Subvar label of strand if first dimension is a CA_SUBVAR, '""' otherwise."""
first_dimension = self._cube.dimensions[0]
return (
first_dimension.valid_elements[self._slice_idx].label
if first_dimension.dimension_type == DT.CA_SUBVAR
else ""
)
[docs] @lazyproperty
def tab_alias(self):
"""Subvar alias of strand if first dimension is a CA_SUBVAR, '""' otherwise."""
first_dimension = self._cube.dimensions[0]
return (
first_dimension.valid_elements[self._slice_idx].alias
if first_dimension.dimension_type == DT.CA_SUBVAR
else ""
)
[docs] @lazyproperty
def table_base_range(self):
"""[min, max] np.float64 ndarray range of unweighted-N for this stripe.
A non-MR stripe will have a single base, represented by min and max being the
same value. Each row of an MR stripe has a distinct base, which is reduced to a
range in that case.
"""
return self._measures.unweighted_bases.table_base_range
[docs] @lazyproperty
def table_margin_range(self):
"""[min, max] np.float64 ndarray range of (total) weighted-N for this stripe.
A non-MR stripe will have a single margin, represented by min and max being the
same value. Each row of an MR stripe has a distinct base, which is reduced to a
range in that case.
"""
return self._measures.weighted_bases.table_margin_range
[docs] @lazyproperty
def table_name(self):
"""Optional table name for this strand
Only for CA-as-0th case, provides differentiated names for stacked tables.
"""
title = self._cube.name
valid_elements = self._cube.dimensions[0].valid_elements
if valid_elements.element_ids:
table_name = valid_elements[self._slice_idx].label
return f"{title}: {table_name}"
return None
[docs] @lazyproperty
def table_percentages(self):
"""1D np.float64 ndarray of table-percentage for each row.
Table-percentage is the fraction of the table weighted-N contributed by each
row, expressed as a percentage (float between 0.0 and 100.0 inclusive).
"""
return tuple(self.table_proportions * 100)
[docs] @lazyproperty
def table_proportion_moes(self):
"""1D np.float64 ndarray of table-proportion margin-of-error (MoE) for each row.
The values are represented as fractions, analogue to the `table_proportions`
property. This means that the value of 3.5% will have the value 0.035. The
values can be np.nan when the corresponding proportion is also np.nan, which
happens when the respective columns margin is 0.
"""
return Z_975 * self.table_proportion_stderrs
[docs] @lazyproperty
def table_proportion_stddevs(self):
"""1D np.float64 ndarray of table-proportion std-deviation for each row."""
return self._assemble_vector(self._measures.table_proportion_stddevs.blocks)
[docs] @lazyproperty
def table_proportion_stderrs(self):
"""1D np.float64 ndarray of table-proportion std-error for each row."""
return self._assemble_vector(self._measures.table_proportion_stderrs.blocks)
[docs] @lazyproperty
def table_proportions(self):
"""1D np.float64 ndarray of fraction of weighted-N contributed by each row.
The proportion is expressed as a float between 0.0 and 1.0 inclusive.
"""
return self._assemble_vector(self._measures.table_proportions.blocks)
[docs] @lazyproperty
def title(self):
"""The str display name of this strand, suitable for use as a column heading.
`Strand.name` is the rows-dimension name, which is suitable for use as a title
of the row-headings. However, a strand can also appear as a *column* and this
value is a suitable name for such a column.
"""
return self._cube.title
[docs] @lazyproperty
def unweighted_bases(self):
"""1D np.float64 ndarray of base count for each row, before weighting.
When the rows dimension is multiple-response (MR), each value is different,
reflecting the base for that individual subvariable. In all other cases, the
table base is repeated for each row.
"""
return self._assemble_vector(self._measures.unweighted_bases.blocks)
[docs] @lazyproperty
def unweighted_counts(self):
"""1D np.float64 ndarray of unweighted count for each row of stripe."""
return self._assemble_vector(self._measures.unweighted_counts.blocks)
[docs] @lazyproperty
def weighted_bases(self):
"""1D np.float64 ndarray of table-proportion denominator for each row.
For a non-MR strand, all values in the array are the same. For an MR strand,
each value may be different, reflecting the fact that not all response options
were necessarily presented to all respondents.
"""
return self._assemble_vector(self._measures.weighted_bases.blocks)
# ---implementation (helpers)-------------------------------------
def _assemble_vector(self, blocks):
"""Return 1D ndarray of base_vector with inserted subtotals, in order.
`blocks` is a pair of two 1D arrays, first the base-values and then the subtotal
values of the stripe vector. The returned array is sequenced in the computed
row order including possibly removing hidden or pruned values.
"""
return np.concatenate(blocks)[self._row_order_signed_indexes]
@lazyproperty
def _dimensions(self):
"""tuple of (row,) Dimension object."""
return (self._rows_dimension,)
@lazyproperty
def _rows_dimension(self):
"""Dimension object for the single dimension of this strand."""
return self._cube.dimensions[-1].apply_transforms(self._row_transforms_dict)
@lazyproperty
def _row_transforms_dict(self):
"""Transforms dict for the single (rows) dimension of this strand."""
return self._transforms_dict.get("rows_dimension", {})
@lazyproperty
def _measures(self):
"""StripeMeasures collection object for this stripe."""
return StripeMeasures(
self._cube, self._rows_dimension, self._ca_as_0th, self._slice_idx
)
@lazyproperty
def _row_order_bogus_ids(self):
"""Row order with bogus ids."""
return np.array(
stripe_BaseOrderHelper.display_order(
self._rows_dimension, self._measures, format=ORDER_FORMAT.BOGUS_IDS
)
)
@lazyproperty
def _row_order_signed_indexes(self):
"""Row order idx with signed idxs."""
return np.array(
stripe_BaseOrderHelper.display_order(
self._rows_dimension, self._measures, format=ORDER_FORMAT.SIGNED_INDEXES
),
dtype=int,
)
[docs]class _Nub(CubePartition):
"""0D slice."""
[docs] @lazyproperty
def is_empty(self):
"""True if the partition has no counts, False otherwise"""
if self.unweighted_count <= 0:
return True
return math.isnan(self.unweighted_count)
[docs] @lazyproperty
def means(self):
"""Float scalar representing the mean."""
return self._scalar.means
[docs] @lazyproperty
def table_base(self):
"""Int scalar of the unweighted N of the table."""
return self._scalar.table_base
@lazyproperty
def table_name(self):
return None
[docs] @lazyproperty
def unweighted_count(self):
"""Integer scalar of total unweighted count of the table"""
return self._cube.unweighted_counts
# ---implementation (helpers)-------------------------------------
@lazyproperty
def _dimensions(self):
return ()
@lazyproperty
def _scalar(self):
"""The pre-transforms data-array for this slice."""
return MeansScalar(self._cube.means, self._cube.unweighted_counts)