From 8968cc8e51fd3737049f9d86b38e81fff8b50a95 Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Mon, 31 Mar 2025 12:15:23 -0400 Subject: [PATCH 1/4] jupyter - use language from language_info when necessary (#12374) --- news/changelog-1.7.md | 1 + src/command/convert/jupyter.ts | 18 ++++++++- src/core/jupyter/types.ts | 20 ++++++++++ src/execute/jupyter/jupyter-kernel.ts | 3 +- src/execute/jupyter/jupyter.ts | 14 ++++++- src/resources/jupyter/notebook.py | 34 +++++++++-------- tests/docs/jupyter/issue-12374.ipynb | 50 +++++++++++++++++++++++++ tests/smoke/jupyter/issue-12374.test.ts | 25 +++++++++++++ 8 files changed, 147 insertions(+), 18 deletions(-) create mode 100644 tests/docs/jupyter/issue-12374.ipynb create mode 100644 tests/smoke/jupyter/issue-12374.test.ts diff --git a/news/changelog-1.7.md b/news/changelog-1.7.md index 328969b6f18..3b908300467 100644 --- a/news/changelog-1.7.md +++ b/news/changelog-1.7.md @@ -123,6 +123,7 @@ All changes included in 1.7: ### `jupyter` - ([#12114](https://github.com/quarto-dev/quarto-cli/issues/12114)): `JUPYTERCACHE` environment variable from [Jupyter cache CLI](https://jupyter-cache.readthedocs.io/en/latest/using/cli.html) is now respected by Quarto when `cache: true` is used. This environment variable allows to change the path of the cache directory. +- ([#12374](https://github.com/quarto-dev/quarto-cli/issues/12374)): Detect language properly in Jupyter notebooks that lack the `language` field in their `kernelspec`s. ## Other Fixes and Improvements diff --git a/src/command/convert/jupyter.ts b/src/command/convert/jupyter.ts index d3156b21c06..3f56845fdce 100644 --- a/src/command/convert/jupyter.ts +++ b/src/command/convert/jupyter.ts @@ -47,7 +47,23 @@ export async function jupyterNotebookToMarkdown( ) { // read notebook & alias kernelspec const notebook = fixupFrontMatter(jupyterFromFile(file)); - const kernelspec = notebook.metadata.kernelspec; + let kernelspec = notebook.metadata.kernelspec; + + // https://github.com/quarto-dev/quarto-cli/issues/12374 + // narrow fix for .ipynbs that have a language_info field but no kernelspec.language + if ( + kernelspec.language === undefined && notebook.metadata.language_info?.name + ) { + kernelspec = { + ...kernelspec, + language: notebook.metadata.language_info?.name, + }; + } + if (kernelspec.language === undefined) { + throw new Error( + "No language found in kernelspec for notebook " + file, + ); + } // generate markdown const md: string[] = []; diff --git a/src/core/jupyter/types.ts b/src/core/jupyter/types.ts index e8a78a087a4..92e0862345a 100644 --- a/src/core/jupyter/types.ts +++ b/src/core/jupyter/types.ts @@ -74,6 +74,16 @@ export interface JupyterCapabilitiesEx extends JupyterCapabilities { venv?: boolean; } +// cf https://github.com/jupyter/nbformat/blob/main/nbformat/v4/nbformat.v4.schema.json +// note that this doesn't directly define the kernelspec as used in kernel.json +// but defines the kernelspec object used in .ipynb files +// +// note in addition that Quarto needs to know the language name which +// might not come from the kernelspec itself but will exist in a language_info +// field. When the kernelspec object in a jupyter notebook is missing the language field, +// this object's language field will come from the language_info.name field +// +// see https://github.com/quarto-dev/quarto-cli/issues/12374 export interface JupyterKernelspec { name: string; language: string; @@ -88,10 +98,20 @@ export interface JupyterAssets { supporting_dir: string; } +// cf https://github.com/jupyter/nbformat/blob/main/nbformat/v4/nbformat.v4.schema.json +export type JupyterLanguageInfo = { + name: string; + codemirror_mode?: string | Record; + file_extension?: string; + mimetype?: string; + pygments_lexer?: string; +}; + export interface JupyterNotebook { metadata: { kernelspec: JupyterKernelspec; widgets?: Record; + language_info?: JupyterLanguageInfo; [key: string]: unknown; }; cells: JupyterCell[]; diff --git a/src/execute/jupyter/jupyter-kernel.ts b/src/execute/jupyter/jupyter-kernel.ts index 4c8b04a88db..f980f0039f9 100644 --- a/src/execute/jupyter/jupyter-kernel.ts +++ b/src/execute/jupyter/jupyter-kernel.ts @@ -55,7 +55,8 @@ export async function executeKernelOneshot( } trace(options, "Executing notebook with oneshot kernel"); - const debug = !!options.format.execute[kExecuteDebug]; + const debug = !!options.format.execute[kExecuteDebug] || + (!!Deno.env.get("QUARTO_JUPYTER_DEBUG")); const result = await execJupyter( "execute", { ...options, debug }, diff --git a/src/execute/jupyter/jupyter.ts b/src/execute/jupyter/jupyter.ts index 45d6d933f20..97ff846bc6c 100644 --- a/src/execute/jupyter/jupyter.ts +++ b/src/execute/jupyter/jupyter.ts @@ -181,7 +181,19 @@ export const jupyterEngine: ExecutionEngine = { let nb: JupyterNotebook | undefined; if (isJupyterNotebook(file)) { const nbJSON = Deno.readTextFileSync(file); - nb = JSON.parse(nbJSON) as JupyterNotebook; + const nbRaw = JSON.parse(nbJSON); + + // https://github.com/quarto-dev/quarto-cli/issues/12374 + // kernelspecs are not guaranteed to have a language field + // so we need to check for it and if not present + // use the language_info.name field + if ( + nbRaw.metadata.kernelspec.language === undefined && + nbRaw.metadata.language_info?.name + ) { + nbRaw.metadata.kernelspec.language = nbRaw.metadata.language_info.name; + } + nb = nbRaw as JupyterNotebook; } // cache check for percent script diff --git a/src/resources/jupyter/notebook.py b/src/resources/jupyter/notebook.py index 4d79f773598..44661eb95ed 100644 --- a/src/resources/jupyter/notebook.py +++ b/src/resources/jupyter/notebook.py @@ -192,7 +192,7 @@ def notebook_execute(options, status): nb_parameterize(nb, quarto_kernel_setup_options["params"]) # insert setup cell - setup_cell = nb_setup_cell(nb.metadata.kernelspec, quarto_kernel_setup_options) + setup_cell = nb_setup_cell(nb, quarto_kernel_setup_options) nb.cells.insert(0, setup_cell) nb_cache = retrieve_nb_from_cache(nb, status, **quarto_kernel_setup_options) @@ -254,7 +254,8 @@ def handle_meta_object(obj): if cell.cell_type == 'code': total_code_cells += 1 # map cells to their labels - label = nb_cell_yaml_options(client.nb.metadata.kernelspec.language, cell).get('label', '') + language = client.nb.metadata.kernelspec.get("language", client.nb.metadata.language_info.name) + label = nb_cell_yaml_options(language, cell).get('label', '') cell_labels.append(label) # find max label length max_label_len = max(max_label_len, len(label)) @@ -350,7 +351,7 @@ def handle_meta_object(obj): nb_write(client.nb, input) # execute cleanup cell - cleanup_cell = nb_cleanup_cell(nb.metadata.kernelspec, resource_dir) + cleanup_cell = nb_cleanup_cell(nb, resource_dir) if cleanup_cell: kernel_supports_daemonization = True nb.cells.append(cleanup_cell) @@ -425,18 +426,20 @@ async def get_info(): def nb_write(nb, input): nbformat.write(nb, input, version = NB_FORMAT_VERSION) -def nb_setup_cell(kernelspec, options): +def nb_setup_cell(nb, options): options = dict(options) options["allow_empty"] = True - return nb_language_cell('setup', kernelspec, **options) + return nb_language_cell('setup', nb, **options) -def nb_cleanup_cell(kernelspec, resource_dir): - return nb_language_cell('cleanup', kernelspec, resource_dir, False) +def nb_cleanup_cell(nb, resource_dir): + return nb_language_cell('cleanup', nb, resource_dir, False) -def nb_language_cell(name, kernelspec, resource_dir, allow_empty, **args): - trace(json.dumps(kernelspec, indent=2)) +def nb_language_cell(name, nb, resource_dir, allow_empty, **args): + kernelspec = nb.metadata.kernelspec + language = nb.metadata.kernelspec.get("language", nb.metadata.language_info.name) + trace(json.dumps(nb.metadata, indent=2)) source = '' - lang_dir = os.path.join(resource_dir, 'jupyter', 'lang', kernelspec.language) + lang_dir = os.path.join(resource_dir, 'jupyter', 'lang', language) if os.path.isdir(lang_dir): cell_file = glob.glob(os.path.join(lang_dir, name + '.*')) # base64-encode the run_path given @@ -445,7 +448,7 @@ def nb_language_cell(name, kernelspec, resource_dir, allow_empty, **args): with open(cell_file[0], 'r') as file: source = file.read().format(**args) else: - trace(f'No {kernelspec.language} directory found in {lang_dir}') + trace(f'No {language} directory found in {lang_dir}') trace(f'Will look for explicit quarto setup cell information in kernelspec dir') try: with open(os.path.join(kernelspec.path, f"quarto_{name}_cell"), 'r') as file: @@ -500,8 +503,9 @@ def nb_kernel_dependencies(setup_cell): def cell_execute(client, cell, index, execution_count, eval_default, store_history): + language = client.nb.metadata.kernelspec.get("language", client.nb.metadata.language_info.name) # read cell options - cell_options = nb_cell_yaml_options(client.nb.metadata.kernelspec.language, cell) + cell_options = nb_cell_yaml_options(language, cell) # check options for eval and error eval = cell_options.get('eval', eval_default) @@ -560,7 +564,7 @@ def clear_user_expressions(): del metadata["user_expressions"] # find expressions in source - language = client.nb.metadata.kernelspec.language + language = client.nb.metadata.kernelspec.get("language", client.nb.metadata.language_info.name) source = ''.join(cell.source) expressions = re.findall( fr'(?:^|[^`])`{{{language}}}[ \t]([^`]+)`', @@ -623,7 +627,7 @@ def nb_parameterize(nb, params): # alias kernel name and language kernel_name = nb.metadata.kernelspec.name - language = nb.metadata.kernelspec.language + language = nb.metadata.kernelspec.get("language", nb.metadata.language_info.name) # find params index and note any tags/yaml on it (exit if no params) params_index = find_first_tagged_cell_index(nb, "parameters") @@ -701,7 +705,7 @@ def find_first_tagged_cell_index(nb, tag): return parameters_indices[0] def nb_strip_yaml_options(client, source): - yaml_lines = nb_cell_yaml_lines(client.nb.metadata.kernelspec.language, source) + yaml_lines = nb_cell_yaml_lines(client.nb.metadata.kernelspec.get("language", client.nb.metadata.language_info.name), source) num_yaml_lines = len(yaml_lines) if num_yaml_lines > 0: return "\n".join(source.splitlines()[num_yaml_lines:]) diff --git a/tests/docs/jupyter/issue-12374.ipynb b/tests/docs/jupyter/issue-12374.ipynb new file mode 100644 index 00000000000..968b6d8ade7 --- /dev/null +++ b/tests/docs/jupyter/issue-12374.ipynb @@ -0,0 +1,50 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KSSliJZInRNP", + "outputId": "7d826c0a-e6ab-4f76-f334-4cff966e6253" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello, world\n" + ] + } + ], + "source": [ + "print(\"Hello, world\")" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/tests/smoke/jupyter/issue-12374.test.ts b/tests/smoke/jupyter/issue-12374.test.ts new file mode 100644 index 00000000000..56c5ce67dc0 --- /dev/null +++ b/tests/smoke/jupyter/issue-12374.test.ts @@ -0,0 +1,25 @@ +/* + * issue-12374.test.ts + * + * https://github.com/quarto-dev/quarto-cli/issues/12374 + * + * Copyright (C) 2023 Posit Software, PBC + */ + +import { quarto } from "../../../src/quarto.ts"; +import { test } from "../../test.ts"; +import { assertEquals } from "testing/asserts"; +import { noErrors } from "../../verify.ts"; + +test({ + name: "jupyter:issue-12374.test.ts", + context: {}, + execute: async () => { + // https://github.com/quarto-dev/quarto-cli/issues/12374 + await quarto(["render", + "docs/jupyter/issue-12374.ipynb", + "--no-execute-daemon", "--execute"]); + }, + verify: [noErrors], + type: "smoke", +}); From b5188db9d673fd0e0aa9d17d4bbda22191119d3a Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Mon, 31 Mar 2025 13:18:38 -0400 Subject: [PATCH 2/4] jupyter - use helper fn --- src/resources/jupyter/notebook.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/resources/jupyter/notebook.py b/src/resources/jupyter/notebook.py index 44661eb95ed..53100088707 100644 --- a/src/resources/jupyter/notebook.py +++ b/src/resources/jupyter/notebook.py @@ -36,6 +36,11 @@ NB_FORMAT_VERSION = 4 +def get_language_from_nb_metadata(metadata): + ks_lang = metadata.kernelspec.get("language", None) + li_name = metadata.language_info.get("name", None) + return ks_lang or li_name + # exception to indicate the kernel needs restarting class RestartKernel(Exception): pass @@ -254,7 +259,7 @@ def handle_meta_object(obj): if cell.cell_type == 'code': total_code_cells += 1 # map cells to their labels - language = client.nb.metadata.kernelspec.get("language", client.nb.metadata.language_info.name) + language = get_language_from_nb_metadata(client.nb.metadata) label = nb_cell_yaml_options(language, cell).get('label', '') cell_labels.append(label) # find max label length @@ -436,7 +441,7 @@ def nb_cleanup_cell(nb, resource_dir): def nb_language_cell(name, nb, resource_dir, allow_empty, **args): kernelspec = nb.metadata.kernelspec - language = nb.metadata.kernelspec.get("language", nb.metadata.language_info.name) + language = get_language_from_nb_metadata(nb.metadata) trace(json.dumps(nb.metadata, indent=2)) source = '' lang_dir = os.path.join(resource_dir, 'jupyter', 'lang', language) @@ -503,7 +508,7 @@ def nb_kernel_dependencies(setup_cell): def cell_execute(client, cell, index, execution_count, eval_default, store_history): - language = client.nb.metadata.kernelspec.get("language", client.nb.metadata.language_info.name) + language = get_language_from_nb_metadata(client.nb.metadata) # read cell options cell_options = nb_cell_yaml_options(language, cell) @@ -564,7 +569,7 @@ def clear_user_expressions(): del metadata["user_expressions"] # find expressions in source - language = client.nb.metadata.kernelspec.get("language", client.nb.metadata.language_info.name) + language = get_language_from_nb_metadata(client.nb.metadata) source = ''.join(cell.source) expressions = re.findall( fr'(?:^|[^`])`{{{language}}}[ \t]([^`]+)`', @@ -627,7 +632,7 @@ def nb_parameterize(nb, params): # alias kernel name and language kernel_name = nb.metadata.kernelspec.name - language = nb.metadata.kernelspec.get("language", nb.metadata.language_info.name) + language = get_language_from_nb_metadata(nb.metadata) # find params index and note any tags/yaml on it (exit if no params) params_index = find_first_tagged_cell_index(nb, "parameters") @@ -705,7 +710,7 @@ def find_first_tagged_cell_index(nb, tag): return parameters_indices[0] def nb_strip_yaml_options(client, source): - yaml_lines = nb_cell_yaml_lines(client.nb.metadata.kernelspec.get("language", client.nb.metadata.language_info.name), source) + yaml_lines = nb_cell_yaml_lines(get_language_from_nb_metadata(client.nb.metadata), source) num_yaml_lines = len(yaml_lines) if num_yaml_lines > 0: return "\n".join(source.splitlines()[num_yaml_lines:]) From 4aa31cd43381cffecedd72d2d7ae56b2a4d2a57f Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Mon, 31 Mar 2025 13:29:50 -0400 Subject: [PATCH 3/4] language_info might not exist --- src/resources/jupyter/notebook.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/resources/jupyter/notebook.py b/src/resources/jupyter/notebook.py index 53100088707..d31e5212988 100644 --- a/src/resources/jupyter/notebook.py +++ b/src/resources/jupyter/notebook.py @@ -38,7 +38,10 @@ def get_language_from_nb_metadata(metadata): ks_lang = metadata.kernelspec.get("language", None) - li_name = metadata.language_info.get("name", None) + li_name = None + li = metadata.get("language_info", None) + if li: + li_name = metadata.language_info.get("name", None) return ks_lang or li_name # exception to indicate the kernel needs restarting From 24a366aacf110b137ed304a3b4df94477164129a Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Mon, 31 Mar 2025 14:52:02 -0400 Subject: [PATCH 4/4] fix optional chaining operators --- src/execute/jupyter/jupyter.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/execute/jupyter/jupyter.ts b/src/execute/jupyter/jupyter.ts index 97ff846bc6c..5578bb11306 100644 --- a/src/execute/jupyter/jupyter.ts +++ b/src/execute/jupyter/jupyter.ts @@ -188,6 +188,7 @@ export const jupyterEngine: ExecutionEngine = { // so we need to check for it and if not present // use the language_info.name field if ( + nbRaw.metadata.kernelspec && nbRaw.metadata.kernelspec.language === undefined && nbRaw.metadata.language_info?.name ) {