from sqlalchemy.dialects import postgresql from pipelines.jobs.extract_bill_topics import ( _select_bill_text_for_topic_extraction, create_select_bills_for_topic_extraction, ) from pipelines.orm.data_science_dev.congress import Bill, BillText, BillTextSummary def _compile_sql(statement: object) -> str: return str( statement.compile( dialect=postgresql.dialect(), compile_kwargs={"literal_binds": True}, ) ) def test_select_bill_text_for_topic_extraction_uses_primary_summary() -> None: primary_summary = BillTextSummary(id=1, bill_text_id=10, summary="primary") newest_summary = BillTextSummary(id=2, bill_text_id=10, summary="newest") bill_text = BillText( id=10, bill_id=5, version_code="ih", summaries=[newest_summary], primary_summary=primary_summary, ) bill = Bill( id=5, congress=119, bill_type="hr", number=1, bill_texts=[bill_text], ) selected = _select_bill_text_for_topic_extraction(bill) assert selected is bill_text assert selected.default_summary() is primary_summary def test_select_bill_text_for_topic_extraction_uses_latest_summary_without_primary() -> None: newest_summary = BillTextSummary(id=2, bill_text_id=10, summary="newest") older_summary = BillTextSummary(id=1, bill_text_id=10, summary="older") bill_text = BillText( id=10, bill_id=5, version_code="ih", summaries=[newest_summary, older_summary], ) bill = Bill( id=5, congress=119, bill_type="hr", number=1, bill_texts=[bill_text], ) selected = _select_bill_text_for_topic_extraction(bill) assert selected is bill_text assert selected.default_summary() is newest_summary def test_create_select_bills_for_topic_extraction_uses_summary_exists_subquery() -> None: sql = _compile_sql(create_select_bills_for_topic_extraction()) assert "bill_text_summary" in sql assert "EXISTS" in sql assert "bill_text.summary" not in sql