Source code for imednet.workflows.data_extraction
"""Provides workflows for extracting specific datasets from iMednet studies."""
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
from ..models import Record, RecordRevision
if TYPE_CHECKING:
from ..sdk import ImednetSDK
[docs]class DataExtractionWorkflow:
"""
Provides methods for complex data extraction tasks involving multiple iMednet endpoints.
Args:
sdk: An instance of the ImednetSDK.
"""
def __init__(self, sdk: "ImednetSDK"):
self._sdk = sdk
[docs] def extract_records_by_criteria(
self,
study_key: str,
record_filter: Optional[Dict[str, Union[Any, Tuple[str, Any], List[Any]]]] = None,
subject_filter: Optional[Dict[str, Union[Any, Tuple[str, Any], List[Any]]]] = None,
visit_filter: Optional[Dict[str, Union[Any, Tuple[str, Any], List[Any]]]] = None,
**other_filters: Any,
) -> List[Record]:
"""
Extracts records based on criteria spanning subjects, visits, and records.
Args:
study_key: The key identifying the study.
record_filter: Dictionary of conditions for the records endpoint.
subject_filter: Dictionary of conditions for the subjects endpoint.
visit_filter: Dictionary of conditions for the visits endpoint.
**other_filters: Additional keyword arguments passed as filters to the
records endpoint `list` method.
Returns:
A list of Record objects matching all specified criteria.
"""
matching_subject_keys: Optional[List[str]] = None
if subject_filter:
subjects = self._sdk.subjects.list(study_key, **subject_filter)
matching_subject_keys = [s.subject_key for s in subjects]
if not matching_subject_keys:
return []
# Changed type hint from List[str] to List[int]
matching_visit_ids: Optional[List[int]] = None
if visit_filter:
# Client-side filtering for subject_key on visits is still needed
# as build_filter_string doesn't handle complex AND/OR structures easily
# from separate filter dictionaries.
visits = self._sdk.visits.list(study_key, **visit_filter)
if matching_subject_keys:
visits = [v for v in visits if v.subject_key in matching_subject_keys]
# Corrected attribute from oid to visit_id
matching_visit_ids = [v.visit_id for v in visits]
if not matching_visit_ids:
return []
# Build the final record filter dictionary
final_record_filter_dict = dict(record_filter) if record_filter else {}
final_record_filter_dict.update(other_filters) # Add other_filters here
# Client-side filtering is used below for subject/visit matching,
# so no need to add complex 'in' clauses here even if build_filter_string supported it.
records = self._sdk.records.list(
study_key=study_key,
record_data_filter=None,
**final_record_filter_dict,
)
# Client-side filtering fallback
if matching_subject_keys:
records = [r for r in records if r.subject_key in matching_subject_keys]
# Corrected attribute from visit_oid to visit_id and variable name
if matching_visit_ids:
records = [r for r in records if r.visit_id in matching_visit_ids]
return records
[docs] def extract_audit_trail(
self,
study_key: str,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
user_filter: Optional[Dict[str, Union[Any, Tuple[str, Any], List[Any]]]] = None,
**filters: Any,
) -> List[RecordRevision]:
"""
Extracts the audit trail (record revisions) based on specified filters.
Args:
study_key: The key identifying the study.
start_date: Optional start date filter (YYYY-MM-DD format expected by API).
end_date: Optional end date filter (YYYY-MM-DD format expected by API).
user_filter: Optional dictionary of base filter conditions.
**filters: Additional key-value pairs to be added as equality filters.
Returns:
A list of RecordRevision objects matching the criteria.
"""
# Start with the user_filter dict if provided, otherwise an empty dict
final_filter_dict = dict(user_filter) if user_filter else {}
# Add additional filters from kwargs
final_filter_dict.update(filters)
# Prepare keyword arguments for date filters if they exist
date_kwargs = {}
if start_date:
date_kwargs["start_date"] = start_date
if end_date:
date_kwargs["end_date"] = end_date
# Fetch record revisions
revisions = self._sdk.record_revisions.list(
study_key,
**final_filter_dict,
**date_kwargs,
)
return revisions
# Integration:
# - Accessed via the main SDK instance
# (e.g., `sdk.workflows.data_extraction.extract_records_by_criteria(...)`).
# - Offers powerful data retrieval capabilities beyond single endpoint calls.