From 12858a0f6e93262fc6fc9d420db0478102e0f26d Mon Sep 17 00:00:00 2001 From: Laura Luebbert Date: Fri, 26 Jun 2026 13:20:37 -0400 Subject: [PATCH 1/3] fix(opentargets): adapt to upstream API drift + skip deprecated expressions field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 11 opentargets tests were failing across all Python versions due to three distinct upstream changes: 1. GraphQL schema change (real bug) The Drug type's `synonyms` and `tradeNames` fields are now `[DrugLabelAndSource!]!` (was scalar list[str]). Querying without a sub-selection returns HTTP 400. Updated QUERY_STRING_DRUGS to query `synonyms { label }` and `tradeNames { label }`. The existing _collapse_singletons() post-processor flattens each {label: "X"} back to "X" automatically, so the public DataFrame shape is preserved. 2. Data drift (8 tests, fixtures refreshed) Open Targets re-indexed disease ontology IDs (EFO → MONDO), gene protein IDs, allele frequencies, depmap entries, and interaction data. Per the project convention (refresh fixtures, don't normalize) the expected_result blocks for test_opentargets, *_diseases, *_drugs, *_interactions, *_pharmacogenetics, *_depmap, *_depmap_filter, and *_interactions_no_limit were re-captured from the current upstream output via a one-shot helper script. 3. expressions field deprecated (2 tests, skipped) The Target.expressions field now returns [] for all queries; Open Targets replaced it with Target.baselineExpression which has a completely different schema (tissueBiosample, q1/q3/median/min/max instead of tissue/rna sub-objects). Migrating gget_opentargets to the new field is a user-facing API change and out of scope for this PR; the two affected tests are now marked as skipped with a reason field. Also adds a `skip` test type to tests/from_json.py so JSON-defined tests can be marked as known-skipped (with a reason message) without needing a separate Python test file or fixture deletion. Verified locally: 15 passed, 2 skipped, 0 failed. --- gget/gget_opentargets.py | 12 +++- tests/fixtures/test_opentargets.json | 104 +++++++++++++-------------- tests/from_json.py | 11 +++ 3 files changed, 69 insertions(+), 58 deletions(-) diff --git a/gget/gget_opentargets.py b/gget/gget_opentargets.py index 8037fb34..44777d72 100644 --- a/gget/gget_opentargets.py +++ b/gget/gget_opentargets.py @@ -43,8 +43,16 @@ } } description - synonyms - tradeNames + # synonyms and tradeNames are now [DrugLabelAndSource!]! (were scalar + # lists); query the `label` sub-field so _collapse_singletons flattens + # each {label: "X"} back to "X", preserving the list[str] shape users + # see in the resulting DataFrame. + synonyms { + label + } + tradeNames { + label + } maximumClinicalStage indications { rows { diff --git a/tests/fixtures/test_opentargets.json b/tests/fixtures/test_opentargets.json index 34841d71..721f8bbd 100644 --- a/tests/fixtures/test_opentargets.json +++ b/tests/fixtures/test_opentargets.json @@ -8,13 +8,13 @@ }, "expected_result": [ [ - 0.7297489019498119, - "EFO_0000274", + 0.7279798021712002, + "MONDO_0004980", "atopic eczema", "A common chronic pruritic inflammatory skin disease with a strong genetic component. Onset typically occurs during the first 2 years of life." ], [ - 0.6642728577751653, + 0.6952093612226559, "MONDO_0004979", "asthma", "A bronchial disease that is characterized by chronic inflammation and narrowing of the airways, which is caused by a combination of environmental and genetic factors resulting in recurring periods of wheezing (a whistling sound while breathing), chest tightness, shortness of breath, mucus production and coughing. The symptoms appear due to a variety of triggers such as allergens, irritants, respiratory infections, weather changes, exercise, stress, reflux disease, medications, foods and emotional anxiety." @@ -22,12 +22,13 @@ ] }, "test_opentargets_expression_no_limit": { - "type": "assert_equal_json_hash", + "type": "skip", "args": { "ensembl_id": "ENSG00000169194", "resource": "expression" }, - "expected_result": "7d32780ec48250553246c816d80b93ee" + "expected_result": "7d32780ec48250553246c816d80b93ee", + "reason": "Open Targets deprecated the `expressions` GraphQL field (returns []) in favor of `baselineExpression`, which has a different schema (tissueBiosample, q1/q3/median/min/max instead of tissue/rna sub-objects). Migration is a user-facing API change; tracked separately." }, "test_opentargets_depmap": { "type": "assert_equal_json_hash", @@ -35,7 +36,7 @@ "ensembl_id": "ENSG00000169194", "resource": "depmap" }, - "expected_result": "c335cc9c9b3167e8c5b3084e339c88a7" + "expected_result": "e81a60ff47c06bfecb3b421d28047718" }, "test_opentargets_depmap_filter": { "type": "assert_equal", @@ -47,26 +48,7 @@ }, "limit": 2 }, - "expected_result": [ - [ - "UBERON_0002367", - "prostate gland", - "DU 145", - 0.034343916922807693, - "Prostate Adenocarcinoma", - "ACH-000979", - -0.14336788654327393 - ], - [ - "UBERON_0002367", - "prostate gland", - "WPE1-NA22", - 0.0291899424046278, - "Non-Cancerous", - "ACH-001422", - 0.06934770196676254 - ] - ] + "expected_result": [] }, "test_opentargets_interactions_no_limit": { "type": "assert_equal_json_hash", @@ -74,7 +56,7 @@ "ensembl_id": "ENSG00000169194", "resource": "interactions" }, - "expected_result": "fa95d278c2d31ded3731e154d65fcda5" + "expected_result": "8ed7f7380590628acdbaa5f9501365a6" }, "test_opentargets_interactions_simple_filter": { "type": "assert_equal", @@ -163,13 +145,13 @@ }, "expected_result": [ { - "score": 0.7297489019, - "disease.id": "EFO_0000274", + "score": 0.7279798022, + "disease.id": "MONDO_0004980", "disease.name": "atopic eczema", "disease.description": "A common chronic pruritic inflammatory skin disease with a strong genetic component. Onset typically occurs during the first 2 years of life." }, { - "score": 0.6642728578, + "score": 0.6952093612, "disease.id": "MONDO_0004979", "disease.name": "asthma", "disease.description": "A bronchial disease that is characterized by chronic inflammation and narrowing of the airways, which is caused by a combination of environmental and genetic factors resulting in recurring periods of wheezing (a whistling sound while breathing), chest tightness, shortness of breath, mucus production and coughing. The symptoms appear due to a variety of triggers such as allergens, irritants, respiratory infections, weather changes, exercise, stress, reflux disease, medications, foods and emotional anxiety." @@ -190,7 +172,7 @@ "drug.name": "LEBRIKIZUMAB", "drug.drugType": "Antibody", "drug.mechanismsOfAction.rows": "Interleukin-13 inhibitor", - "drug.description": "Antibody drug with a maximum clinical stage of Approval (across all indications), with an approval for atopic eczema and eczematoid dermatitis and 5 investigational indications.", + "drug.description": "Antibody drug with a maximum clinical stage of Approval (across all indications), with 3 approved and 7 investigational indications.", "drug.synonyms": [ "Lebrikizumab", "Lebrikizumab-lbkz", @@ -200,19 +182,20 @@ "PRO-301444 RG-3637", "PRO301444", "RO-5490255", - "Tnx-650" + "Tnx-650", + "ly3650150" ], "drug.tradeNames": "Ebglyss", "drug.maximumClinicalStage": "APPROVAL", "drug.indications.rows": [ - { - "id": "EFO_0000274", - "name": "atopic eczema" - }, { "id": "EFO_0000768", "name": "idiopathic pulmonary fibrosis" }, + { + "id": "EFO_1002029", + "name": "chronic rhinosinusitis with nasal polyps" + }, { "id": "MONDO_0004979", "name": "asthma" @@ -222,16 +205,24 @@ "name": "Eczematoid dermatitis" }, { - "id": "EFO_0000341", + "id": "MONDO_0005002", "name": "chronic obstructive pulmonary disease" }, { - "id": "EFO_0000183", + "id": "MONDO_0005083", + "name": "psoriasis" + }, + { + "id": "MONDO_0004952", "name": "Hodgkins lymphoma" }, { - "id": "MONDO_0004784", - "name": "allergic asthma" + "id": "MONDO_0004980", + "name": "atopic eczema" + }, + { + "id": "MONDO_0024332", + "name": "perennial allergic rhinitis" } ] }, @@ -249,12 +240,12 @@ "drug.maximumClinicalStage": "PHASE_2", "drug.indications.rows": [ { - "id": "MONDO_0004979", - "name": "asthma" + "id": "MONDO_0005101", + "name": "ulcerative colitis" }, { - "id": "EFO_0000729", - "name": "ulcerative colitis" + "id": "MONDO_0004979", + "name": "asthma" } ] } @@ -262,7 +253,7 @@ }, "test_opentargets_expression": { "function_call_to_reproduce": "output = opentargets(ensembl_id='ENSG00000169194', resource='expression', limit=2, json=True, verbose=False); print(json.dumps(output, indent=2))", - "type": "assert_equal_json_with_keys", + "type": "skip", "args": { "ensembl_id": "ENSG00000169194", "resource": "expression", @@ -297,7 +288,8 @@ "rna.unit": "", "rna.level": -1 } - ] + ], + "reason": "Open Targets deprecated the `expressions` GraphQL field (returns []) in favor of `baselineExpression`, which has a different schema (tissueBiosample, q1/q3/median/min/max instead of tissue/rna sub-objects). Migration is a user-facing API change; tracked separately." }, "test_opentargets_interactions": { "function_call_to_reproduce": "output = opentargets(ensembl_id='ENSG00000169194', resource='interactions', limit=2, json=True, verbose=False); print(json.dumps(output, indent=2))", @@ -314,13 +306,13 @@ "sourceDatabase": "string", "intA": "ENSP00000304915", "intABiologicalRole": "unspecified role", - "intB": "ENSP00000361004", + "intB": "ENSP00000360730", "intBBiologicalRole": "unspecified role", "targetA.id": "ENSG00000169194", "targetA.approvedSymbol": "IL13", "speciesA.taxonId": 134, - "targetB.id": "ENSG00000123496", - "targetB.approvedSymbol": "IL13RA2", + "targetB.id": "ENSG00000131724", + "targetB.approvedSymbol": "IL13RA1", "speciesB.taxonId": 134 }, { @@ -329,13 +321,13 @@ "sourceDatabase": "string", "intA": "ENSP00000304915", "intABiologicalRole": "unspecified role", - "intB": "ENSP00000360730", + "intB": "ENSP00000361004", "intBBiologicalRole": "unspecified role", "targetA.id": "ENSG00000169194", "targetA.approvedSymbol": "IL13", "speciesA.taxonId": 134, - "targetB.id": "ENSG00000131724", - "targetB.approvedSymbol": "IL13RA1", + "targetB.id": "ENSG00000123496", + "targetB.approvedSymbol": "IL13RA2", "speciesB.taxonId": 134 } ] @@ -369,11 +361,11 @@ }, { "variantId": "5_132660151_T_C", - "genotypeId": "5_132660151_T_C,C", - "genotype": "CC", + "genotypeId": "5_132660151_T_C,T", + "genotype": "CT", "drugs": null, - "phenotypeText": "decreased risk for non-immune response", - "genotypeAnnotationText": "Patients with the CC genotype may be at decreased risk for non-immune response to the hepatitis B vaccine, as compared to patients with the TT genotype. Other genetic and clinical factors may also influence risk of non-immune response in patients receiving the hepatitis B vaccine.", + "phenotypeText": "decreased risk for non-immune response to the hepatitis B vaccine", + "genotypeAnnotationText": "Patients with the CT genotype may be at decreased risk for non-immune response to the hepatitis B vaccine, as compared to patients with the TT genotype, or at increased risk as compared to patients with the CC genotype. Other genetic and clinical factors may also influence risk of non-immune response in patients receiving the hepatitis B vaccine.", "pgxCategory": "efficacy", "isDirectTarget": false, "evidenceLevel": "3", diff --git a/tests/from_json.py b/tests/from_json.py index 1b41cbcf..82fcd68b 100644 --- a/tests/from_json.py +++ b/tests/from_json.py @@ -279,6 +279,16 @@ def error(self: unittest.TestCase): # _test_constructor = Callable[[str, dict[str, dict[str, ...]], Callable], Callable] +def _skip(name, td, _func): + """Mark a JSON-defined test as skipped, with the `reason` from the fixture.""" + reason = td[name].get("reason", "skipped") + + def skip(self: unittest.TestCase): + self.skipTest(reason) + + return skip + + _TYPES = { "assert_equal": _assert_equal, "assert_equal_na": _assert_equal_na, @@ -288,6 +298,7 @@ def error(self: unittest.TestCase): "assert_equal_json_hash_nested": _assert_equal_json_hash_nested, "assert_equal_json_with_keys": _assert_equal_json_with_keys, "error": _error, + "skip": _skip, } From 206f59d8c63ec0ed1b103d0a47e4ce9a8e9c4b1a Mon Sep 17 00:00:00 2001 From: Laura Luebbert Date: Fri, 26 Jun 2026 14:07:19 -0400 Subject: [PATCH 2/3] docs: add 0.30.8 entries for opentargets fixes - Drugs resource: HTTP 400 fix (synonyms/tradeNames sub-selection). Surfaces both the upstream cause and reassures users that the DataFrame column shape is unchanged. - Expression resource: known limitation pointing at the baselineExpression migration as the next step. Flags that the tests for this path are skipped in the meantime. --- docs/src/en/updates.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/src/en/updates.md b/docs/src/en/updates.md index f2c52c13..52702ece 100644 --- a/docs/src/en/updates.md +++ b/docs/src/en/updates.md @@ -8,6 +8,9 @@ - [`gget pdb`](pdb.md): Added support for the PDBx/mmCIF structure format (fixes [issue 178](https://github.com/scverse/gget/issues/178) and [issue 177](https://github.com/scverse/gget/issues/177)). - New `resource="mmcif"` option downloads the structure in PDBx/mmCIF format (`.cif`). - The default `resource="pdb"` now automatically falls back to PDBx/mmCIF when the legacy PDB file is unavailable (e.g. for large structures), since the legacy PDB format is being phased out by RCSB. A warning is logged and saved files use the correct extension (`.cif`). +- [`gget opentargets`](opentargets.md): Adapted to several upstream Open Targets GraphQL API changes: + - **Fixed**: `gget opentargets resource="drugs"` was failing with `HTTP 400 — Field 'synonyms' of type '[DrugLabelAndSource!]!' must have a sub selection.` Open Targets changed `Drug.synonyms` and `Drug.tradeNames` from scalar lists to lists of structured `DrugLabelAndSource` objects. The internal GraphQL query was updated to request the `label` sub-field; the `drug.synonyms` / `drug.tradeNames` columns in the returned DataFrame remain `list[str]`, so existing user code is unaffected. + - **Known limitation**: `gget opentargets resource="expression"` now returns an empty DataFrame because Open Targets deprecated the `Target.expressions` field. The replacement `Target.baselineExpression` field has a different schema (`tissueBiosample`, `q1`/`q3`/`median`/`min`/`max` instead of `tissue`/`rna` sub-objects) and will be wired up in a future release. The corresponding tests are marked as skipped until then. **Version ≥ 0.30.7** (Jun 21, 2026): From 97cff8a88a5cf2eb523b465f6e250f3aafef5a5b Mon Sep 17 00:00:00 2001 From: Laura Luebbert Date: Fri, 26 Jun 2026 14:40:51 -0400 Subject: [PATCH 3/3] opentargets(expression): warn user about upstream deprecation Without this, gget opentargets resource="expression" silently returns an empty DataFrame, which looks identical to "your gene has no expression data" and gives the user no signal that the upstream field is actually retired. Now emits a logger.warning naming the deprecated field, the planned baselineExpression replacement, and the tracking issue (#247) so users can subscribe / contribute. Updated the 0.30.8 entry in updates.md to mention the warning. --- docs/src/en/updates.md | 2 +- gget/gget_opentargets.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/src/en/updates.md b/docs/src/en/updates.md index 52702ece..282727ab 100644 --- a/docs/src/en/updates.md +++ b/docs/src/en/updates.md @@ -10,7 +10,7 @@ - The default `resource="pdb"` now automatically falls back to PDBx/mmCIF when the legacy PDB file is unavailable (e.g. for large structures), since the legacy PDB format is being phased out by RCSB. A warning is logged and saved files use the correct extension (`.cif`). - [`gget opentargets`](opentargets.md): Adapted to several upstream Open Targets GraphQL API changes: - **Fixed**: `gget opentargets resource="drugs"` was failing with `HTTP 400 — Field 'synonyms' of type '[DrugLabelAndSource!]!' must have a sub selection.` Open Targets changed `Drug.synonyms` and `Drug.tradeNames` from scalar lists to lists of structured `DrugLabelAndSource` objects. The internal GraphQL query was updated to request the `label` sub-field; the `drug.synonyms` / `drug.tradeNames` columns in the returned DataFrame remain `list[str]`, so existing user code is unaffected. - - **Known limitation**: `gget opentargets resource="expression"` now returns an empty DataFrame because Open Targets deprecated the `Target.expressions` field. The replacement `Target.baselineExpression` field has a different schema (`tissueBiosample`, `q1`/`q3`/`median`/`min`/`max` instead of `tissue`/`rna` sub-objects) and will be wired up in a future release. The corresponding tests are marked as skipped until then. + - **Known limitation**: `gget opentargets resource="expression"` now returns an empty DataFrame because Open Targets deprecated the `Target.expressions` field. A `logger.warning` is now emitted when this resource is invoked so users see *why* the result is empty (rather than silently getting `[]`). The replacement `Target.baselineExpression` field has a different schema (`tissueBiosample`, `q1`/`q3`/`median`/`min`/`max` instead of `tissue`/`rna` sub-objects) and will be wired up in a future release (tracked in [issue 247](https://github.com/scverse/gget/issues/247)). The corresponding tests are marked as skipped until then. **Version ≥ 0.30.7** (Jun 21, 2026): diff --git a/gget/gget_opentargets.py b/gget/gget_opentargets.py index 44777d72..fd53216f 100644 --- a/gget/gget_opentargets.py +++ b/gget/gget_opentargets.py @@ -332,6 +332,19 @@ def opentargets( query_string = QUERY_STRING_PHARMACOGENETICS rows_path = ["pharmacogenomics"] elif resource == "expression": + # Open Targets deprecated `Target.expressions` upstream (returns [] + # for all targets as of mid-2026). The replacement field + # `Target.baselineExpression` has a different schema (tissueBiosample, + # q1/q3/median/min/max instead of tissue/rna sub-objects); migrating + # gget's output is a user-facing change tracked in + # https://github.com/scverse/gget/issues/247. + logger.warning( + "gget opentargets resource='expression' is currently broken upstream: " + "Open Targets retired the `Target.expressions` GraphQL field (returns []). " + "A future gget release will migrate to the replacement `Target.baselineExpression` " + "field, which has a different output schema. Track at " + "https://github.com/scverse/gget/issues/247." + ) query_string = QUERY_STRING_EXPRESSION rows_path = ["expressions"] elif resource == "depmap":