-
Notifications
You must be signed in to change notification settings - Fork 1
Features/ibis custom eras #37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 2 commits
4b89967
09748f2
8f7c4d2
dd20010
f3331bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,149 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import ibis | ||
|
|
||
| from ..plan.schema import PERSON_ID, START_DATE | ||
| from .end_strategy import _replace_end_date, attach_observation_bounds | ||
|
|
||
|
|
||
| def _compute_exposure_end_date(table, *, days_supply_override: int | None): | ||
| start = table["drug_exposure_start_date"].cast("date") | ||
|
|
||
| if days_supply_override is not None: | ||
| return start + ibis.interval(days=days_supply_override) | ||
|
|
||
| raw_end = ( | ||
| table["drug_exposure_end_date"].cast("date") | ||
| if "drug_exposure_end_date" in table.columns | ||
| else ibis.null().cast("date") | ||
| ) | ||
| days_supply = ( | ||
| table["days_supply"].cast("int64") if "days_supply" in table.columns else ibis.null().cast("int64") | ||
| ) | ||
| supply_end = start + days_supply.as_interval("D") | ||
|
|
||
| return ibis.coalesce(raw_end, supply_end, start + ibis.interval(days=1)) | ||
|
|
||
|
|
||
| def _compute_eras(exposures, *, gap_days: int, offset: int): | ||
| padded = exposures.mutate(_padded_end=(exposures._exposure_end + ibis.interval(days=int(gap_days)))) | ||
|
|
||
| ordering = [ | ||
| padded.start_date, | ||
| padded._padded_end.desc(), | ||
| padded._exposure_end.desc(), | ||
| ] | ||
|
|
||
| cumulative_window = ibis.cumulative_window(group_by=padded.person_id, order_by=ordering) | ||
| ordered_window = ibis.window(group_by=padded.person_id, order_by=ordering) | ||
|
|
||
| with_cummax = padded.mutate(_cummax_padded_end=padded._padded_end.max().over(cumulative_window)) | ||
|
|
||
| with_prev = with_cummax.mutate(_prev_max=with_cummax._cummax_padded_end.lag().over(ordered_window)) | ||
|
|
||
| marked = with_prev.mutate( | ||
| _is_new=ibis.ifelse( | ||
| with_prev._prev_max.isnull() | (with_prev._prev_max < with_prev.start_date), | ||
| ibis.literal(1, type="int64"), | ||
| ibis.literal(0, type="int64"), | ||
| ) | ||
| ) | ||
|
|
||
| group_window = ibis.cumulative_window( | ||
| group_by=marked.person_id, | ||
| order_by=[ | ||
| marked.start_date, | ||
| marked._padded_end.desc(), | ||
| marked._exposure_end.desc(), | ||
| marked._is_new.desc(), | ||
| ], | ||
| ) | ||
| era_indexed = marked.mutate(_era_id=marked._is_new.sum().over(group_window)) | ||
|
|
||
| collapsed = era_indexed.group_by(era_indexed.person_id, era_indexed._era_id).aggregate( | ||
| era_start_date=era_indexed.start_date.min(), | ||
| _max_exposure_end=era_indexed._exposure_end.max(), | ||
| ) | ||
|
|
||
| return collapsed.select( | ||
| collapsed.person_id.cast("int64").name(PERSON_ID), | ||
| collapsed.era_start_date.cast("date").name("era_start_date"), | ||
| (collapsed._max_exposure_end + ibis.interval(days=int(offset))).cast("date").name("era_end_date"), | ||
| ) | ||
|
|
||
|
|
||
| def compute_drug_eras( | ||
| ctx, *, drug_codeset_id: int, gap_days: int, offset: int, days_supply_override: int | None | ||
| ): | ||
| concept_ids = ctx.concept_ids_for_codeset(drug_codeset_id) | ||
|
|
||
| if not concept_ids: | ||
| de = ctx.table("drug_exposure") | ||
| return de.filter(ibis.literal(False)).select( | ||
| de.person_id.cast("int64").name(PERSON_ID), | ||
| ibis.null().cast("date").name("era_start_date"), | ||
| ibis.null().cast("date").name("era_end_date"), | ||
| ) | ||
|
|
||
| de = ctx.table("drug_exposure") | ||
| filtered = de.filter(de.drug_concept_id.isin(concept_ids)) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should also include |
||
|
|
||
| prepared = filtered.select( | ||
| filtered.person_id.cast("int64").name("person_id"), | ||
| filtered.drug_exposure_start_date.cast("date").name("start_date"), | ||
| _compute_exposure_end_date(filtered, days_supply_override=days_supply_override).name("_exposure_end"), | ||
| ) | ||
|
|
||
| return _compute_eras(prepared, gap_days=gap_days, offset=offset) | ||
|
|
||
|
|
||
| def apply_custom_era_strategy(events, strategy, ctx): | ||
| payload = strategy.payload | ||
| drug_codeset_id = payload["drug_codeset_id"] | ||
| gap_days = payload["gap_days"] | ||
| offset = payload["offset"] | ||
| days_supply_override = payload.get("days_supply_override") | ||
|
|
||
| if drug_codeset_id is None: | ||
| with_bounds = attach_observation_bounds(events, ctx) | ||
| return _replace_end_date(events, with_bounds, with_bounds.op_end_date) | ||
|
|
||
| eras = compute_drug_eras( | ||
| ctx, | ||
| drug_codeset_id=drug_codeset_id, | ||
| gap_days=gap_days, | ||
| offset=offset, | ||
| days_supply_override=days_supply_override, | ||
| ) | ||
|
|
||
| eras_for_join = eras.select( | ||
| eras.person_id.name("_era_person_id"), | ||
| eras.era_start_date, | ||
| eras.era_end_date, | ||
| ) | ||
|
|
||
| with_bounds = attach_observation_bounds(events, ctx) | ||
|
|
||
| joined = with_bounds.left_join( | ||
| eras_for_join, | ||
| predicates=[ | ||
| with_bounds.person_id == eras_for_join._era_person_id, | ||
| with_bounds[START_DATE] >= eras_for_join.era_start_date, | ||
| with_bounds[START_DATE] <= eras_for_join.era_end_date, | ||
| ], | ||
| ) | ||
|
|
||
| event_window = ibis.window( | ||
| group_by=joined.event_id, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should partition by both For example comparing to CirceR on cohort 1043 on my local data I get: |
||
| order_by=[joined.era_end_date.desc()], | ||
| ) | ||
| ranked = joined.mutate(_rn=ibis.row_number().over(event_window)) | ||
| one_per_event = ranked.filter(ranked._rn == 0) | ||
|
|
||
| effective_end = ibis.coalesce( | ||
| one_per_event.era_end_date, | ||
| one_per_event.op_end_date, | ||
| ) | ||
| final_end = ibis.least(effective_end, one_per_event.op_end_date) | ||
|
|
||
| return _replace_end_date(events, one_per_event, final_end) | ||
|
Comment on lines
+1
to
+171
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are filtering the whole |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this needs to include
offsetin the padded end before era grouping. Circe BE groups custom eras onDRUG_EXPOSURE_END_DATE + gapDays + offset, then computes the final era end asmax(padded_end) - gapDays. Applyingoffsetonly after grouping changes which exposures merge into the same custom era.CirceBE relevant code: https://github.com/OHDSI/circe-be/blob/498893689a9cf4f09c2a43cc893bb01116db7184/src/main/resources/resources/cohortdefinition/sql/customEraStrategy.sql#L30-L38
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can test on PL cohorts: 1395, 1387, 1427, 1412 to verify.