Skip to content

Commit

Permalink
Merge pull request #433 from lsst/tickets/SP-1724
Browse files Browse the repository at this point in the history
add option to clip trailing index value to StringCountMetric
  • Loading branch information
yoachim authored Nov 22, 2024
2 parents 1103c5c + 9179dff commit 0a9c0ea
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 3 deletions.
6 changes: 4 additions & 2 deletions rubin_sim/maf/batches/glance_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,13 +365,15 @@ def glanceBatch(
# stats from the scheduler_note column
if "scheduler_note" in colmap.keys():
displayDict = {"group": "Basic Stats", "subgroup": "Percent stats"}
metric = metrics.StringCountMetric(col=colmap["scheduler_note"], percent=True, metric_name="Percents")
metric = metrics.StringCountMetric(
col=colmap["scheduler_note"], percent=True, metric_name="Percents", clip_end=True
)
sql = ""
slicer = slicers.UniSlicer()
bundle = metric_bundles.MetricBundle(metric, slicer, sql, display_dict=displayDict)
bundle_list.append(bundle)
displayDict["subgroup"] = "Count Stats"
metric = metrics.StringCountMetric(col=colmap["scheduler_note"], metric_name="Counts")
metric = metrics.StringCountMetric(col=colmap["scheduler_note"], metric_name="Counts", clip_end=True)
bundle = metric_bundles.MetricBundle(metric, slicer, sql, display_dict=displayDict)
bundle_list.append(bundle)

Expand Down
23 changes: 22 additions & 1 deletion rubin_sim/maf/metrics/string_count_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,13 @@ class StringCountMetric(BaseMetric):
Column name that has strings to look at.
percent : `bool`, opt
Normalize and return results as percents rather than raw count.
clip_end : `bool`
Clip if the end of a string if it ends with a comma and number.
"""

def __init__(self, metric_name="stringCountMetric", col="filter", percent=False, **kwargs):
def __init__(
self, metric_name="stringCountMetric", col="filter", percent=False, clip_end=False, **kwargs
):
if percent:
units = "percent"
else:
Expand All @@ -45,8 +49,25 @@ def __init__(self, metric_name="stringCountMetric", col="filter", percent=False,
cols = [col]
super().__init__(cols, metric_name, units=units, metric_dtype=object, **kwargs)
self.col = col
self.clip_end = clip_end

def run(self, data_slice, slice_point=None):

# If we need to clip off trailing integer
if self.clip_end:
replace_col = []
for val in data_slice[self.col]:
if ", " in val:
chunks = val.split(", ")
if chunks[-1].isdigit():
new_val = ", ".join(chunks[0:-1])
replace_col.append(new_val)
else:
replace_col.append(val)
else:
replace_col.append(val)
data_slice[self.col] = replace_col

counter = Counter(data_slice[self.col])
# convert to a numpy array
lables = list(counter.keys())
Expand Down

0 comments on commit 0a9c0ea

Please sign in to comment.