diff --git a/news/changelog-1.7.md b/news/changelog-1.7.md index 328969b6f18..3b908300467 100644 --- a/news/changelog-1.7.md +++ b/news/changelog-1.7.md @@ -123,6 +123,7 @@ All changes included in 1.7: ### `jupyter` - ([#12114](https://github.com/quarto-dev/quarto-cli/issues/12114)): `JUPYTERCACHE` environment variable from [Jupyter cache CLI](https://jupyter-cache.readthedocs.io/en/latest/using/cli.html) is now respected by Quarto when `cache: true` is used. This environment variable allows to change the path of the cache directory. +- ([#12374](https://github.com/quarto-dev/quarto-cli/issues/12374)): Detect language properly in Jupyter notebooks that lack the `language` field in their `kernelspec`s. ## Other Fixes and Improvements diff --git a/src/command/convert/jupyter.ts b/src/command/convert/jupyter.ts index d3156b21c06..3f56845fdce 100644 --- a/src/command/convert/jupyter.ts +++ b/src/command/convert/jupyter.ts @@ -47,7 +47,23 @@ export async function jupyterNotebookToMarkdown( ) { // read notebook & alias kernelspec const notebook = fixupFrontMatter(jupyterFromFile(file)); - const kernelspec = notebook.metadata.kernelspec; + let kernelspec = notebook.metadata.kernelspec; + + // https://github.com/quarto-dev/quarto-cli/issues/12374 + // narrow fix for .ipynbs that have a language_info field but no kernelspec.language + if ( + kernelspec.language === undefined && notebook.metadata.language_info?.name + ) { + kernelspec = { + ...kernelspec, + language: notebook.metadata.language_info?.name, + }; + } + if (kernelspec.language === undefined) { + throw new Error( + "No language found in kernelspec for notebook " + file, + ); + } // generate markdown const md: string[] = []; diff --git a/src/core/jupyter/types.ts b/src/core/jupyter/types.ts index e8a78a087a4..92e0862345a 100644 --- a/src/core/jupyter/types.ts +++ b/src/core/jupyter/types.ts @@ -74,6 +74,16 @@ export interface JupyterCapabilitiesEx extends JupyterCapabilities { venv?: boolean; } +// cf https://github.com/jupyter/nbformat/blob/main/nbformat/v4/nbformat.v4.schema.json +// note that this doesn't directly define the kernelspec as used in kernel.json +// but defines the kernelspec object used in .ipynb files +// +// note in addition that Quarto needs to know the language name which +// might not come from the kernelspec itself but will exist in a language_info +// field. When the kernelspec object in a jupyter notebook is missing the language field, +// this object's language field will come from the language_info.name field +// +// see https://github.com/quarto-dev/quarto-cli/issues/12374 export interface JupyterKernelspec { name: string; language: string; @@ -88,10 +98,20 @@ export interface JupyterAssets { supporting_dir: string; } +// cf https://github.com/jupyter/nbformat/blob/main/nbformat/v4/nbformat.v4.schema.json +export type JupyterLanguageInfo = { + name: string; + codemirror_mode?: string | Record; + file_extension?: string; + mimetype?: string; + pygments_lexer?: string; +}; + export interface JupyterNotebook { metadata: { kernelspec: JupyterKernelspec; widgets?: Record; + language_info?: JupyterLanguageInfo; [key: string]: unknown; }; cells: JupyterCell[]; diff --git a/src/execute/jupyter/jupyter-kernel.ts b/src/execute/jupyter/jupyter-kernel.ts index 4c8b04a88db..f980f0039f9 100644 --- a/src/execute/jupyter/jupyter-kernel.ts +++ b/src/execute/jupyter/jupyter-kernel.ts @@ -55,7 +55,8 @@ export async function executeKernelOneshot( } trace(options, "Executing notebook with oneshot kernel"); - const debug = !!options.format.execute[kExecuteDebug]; + const debug = !!options.format.execute[kExecuteDebug] || + (!!Deno.env.get("QUARTO_JUPYTER_DEBUG")); const result = await execJupyter( "execute", { ...options, debug }, diff --git a/src/execute/jupyter/jupyter.ts b/src/execute/jupyter/jupyter.ts index 45d6d933f20..5578bb11306 100644 --- a/src/execute/jupyter/jupyter.ts +++ b/src/execute/jupyter/jupyter.ts @@ -181,7 +181,20 @@ export const jupyterEngine: ExecutionEngine = { let nb: JupyterNotebook | undefined; if (isJupyterNotebook(file)) { const nbJSON = Deno.readTextFileSync(file); - nb = JSON.parse(nbJSON) as JupyterNotebook; + const nbRaw = JSON.parse(nbJSON); + + // https://github.com/quarto-dev/quarto-cli/issues/12374 + // kernelspecs are not guaranteed to have a language field + // so we need to check for it and if not present + // use the language_info.name field + if ( + nbRaw.metadata.kernelspec && + nbRaw.metadata.kernelspec.language === undefined && + nbRaw.metadata.language_info?.name + ) { + nbRaw.metadata.kernelspec.language = nbRaw.metadata.language_info.name; + } + nb = nbRaw as JupyterNotebook; } // cache check for percent script diff --git a/src/resources/jupyter/notebook.py b/src/resources/jupyter/notebook.py index 4d79f773598..d31e5212988 100644 --- a/src/resources/jupyter/notebook.py +++ b/src/resources/jupyter/notebook.py @@ -36,6 +36,14 @@ NB_FORMAT_VERSION = 4 +def get_language_from_nb_metadata(metadata): + ks_lang = metadata.kernelspec.get("language", None) + li_name = None + li = metadata.get("language_info", None) + if li: + li_name = metadata.language_info.get("name", None) + return ks_lang or li_name + # exception to indicate the kernel needs restarting class RestartKernel(Exception): pass @@ -192,7 +200,7 @@ def notebook_execute(options, status): nb_parameterize(nb, quarto_kernel_setup_options["params"]) # insert setup cell - setup_cell = nb_setup_cell(nb.metadata.kernelspec, quarto_kernel_setup_options) + setup_cell = nb_setup_cell(nb, quarto_kernel_setup_options) nb.cells.insert(0, setup_cell) nb_cache = retrieve_nb_from_cache(nb, status, **quarto_kernel_setup_options) @@ -254,7 +262,8 @@ def handle_meta_object(obj): if cell.cell_type == 'code': total_code_cells += 1 # map cells to their labels - label = nb_cell_yaml_options(client.nb.metadata.kernelspec.language, cell).get('label', '') + language = get_language_from_nb_metadata(client.nb.metadata) + label = nb_cell_yaml_options(language, cell).get('label', '') cell_labels.append(label) # find max label length max_label_len = max(max_label_len, len(label)) @@ -350,7 +359,7 @@ def handle_meta_object(obj): nb_write(client.nb, input) # execute cleanup cell - cleanup_cell = nb_cleanup_cell(nb.metadata.kernelspec, resource_dir) + cleanup_cell = nb_cleanup_cell(nb, resource_dir) if cleanup_cell: kernel_supports_daemonization = True nb.cells.append(cleanup_cell) @@ -425,18 +434,20 @@ async def get_info(): def nb_write(nb, input): nbformat.write(nb, input, version = NB_FORMAT_VERSION) -def nb_setup_cell(kernelspec, options): +def nb_setup_cell(nb, options): options = dict(options) options["allow_empty"] = True - return nb_language_cell('setup', kernelspec, **options) + return nb_language_cell('setup', nb, **options) -def nb_cleanup_cell(kernelspec, resource_dir): - return nb_language_cell('cleanup', kernelspec, resource_dir, False) +def nb_cleanup_cell(nb, resource_dir): + return nb_language_cell('cleanup', nb, resource_dir, False) -def nb_language_cell(name, kernelspec, resource_dir, allow_empty, **args): - trace(json.dumps(kernelspec, indent=2)) +def nb_language_cell(name, nb, resource_dir, allow_empty, **args): + kernelspec = nb.metadata.kernelspec + language = get_language_from_nb_metadata(nb.metadata) + trace(json.dumps(nb.metadata, indent=2)) source = '' - lang_dir = os.path.join(resource_dir, 'jupyter', 'lang', kernelspec.language) + lang_dir = os.path.join(resource_dir, 'jupyter', 'lang', language) if os.path.isdir(lang_dir): cell_file = glob.glob(os.path.join(lang_dir, name + '.*')) # base64-encode the run_path given @@ -445,7 +456,7 @@ def nb_language_cell(name, kernelspec, resource_dir, allow_empty, **args): with open(cell_file[0], 'r') as file: source = file.read().format(**args) else: - trace(f'No {kernelspec.language} directory found in {lang_dir}') + trace(f'No {language} directory found in {lang_dir}') trace(f'Will look for explicit quarto setup cell information in kernelspec dir') try: with open(os.path.join(kernelspec.path, f"quarto_{name}_cell"), 'r') as file: @@ -500,8 +511,9 @@ def nb_kernel_dependencies(setup_cell): def cell_execute(client, cell, index, execution_count, eval_default, store_history): + language = get_language_from_nb_metadata(client.nb.metadata) # read cell options - cell_options = nb_cell_yaml_options(client.nb.metadata.kernelspec.language, cell) + cell_options = nb_cell_yaml_options(language, cell) # check options for eval and error eval = cell_options.get('eval', eval_default) @@ -560,7 +572,7 @@ def clear_user_expressions(): del metadata["user_expressions"] # find expressions in source - language = client.nb.metadata.kernelspec.language + language = get_language_from_nb_metadata(client.nb.metadata) source = ''.join(cell.source) expressions = re.findall( fr'(?:^|[^`])`{{{language}}}[ \t]([^`]+)`', @@ -623,7 +635,7 @@ def nb_parameterize(nb, params): # alias kernel name and language kernel_name = nb.metadata.kernelspec.name - language = nb.metadata.kernelspec.language + language = get_language_from_nb_metadata(nb.metadata) # find params index and note any tags/yaml on it (exit if no params) params_index = find_first_tagged_cell_index(nb, "parameters") @@ -701,7 +713,7 @@ def find_first_tagged_cell_index(nb, tag): return parameters_indices[0] def nb_strip_yaml_options(client, source): - yaml_lines = nb_cell_yaml_lines(client.nb.metadata.kernelspec.language, source) + yaml_lines = nb_cell_yaml_lines(get_language_from_nb_metadata(client.nb.metadata), source) num_yaml_lines = len(yaml_lines) if num_yaml_lines > 0: return "\n".join(source.splitlines()[num_yaml_lines:]) diff --git a/tests/docs/jupyter/issue-12374.ipynb b/tests/docs/jupyter/issue-12374.ipynb new file mode 100644 index 00000000000..968b6d8ade7 --- /dev/null +++ b/tests/docs/jupyter/issue-12374.ipynb @@ -0,0 +1,50 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KSSliJZInRNP", + "outputId": "7d826c0a-e6ab-4f76-f334-4cff966e6253" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello, world\n" + ] + } + ], + "source": [ + "print(\"Hello, world\")" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/tests/smoke/jupyter/issue-12374.test.ts b/tests/smoke/jupyter/issue-12374.test.ts new file mode 100644 index 00000000000..56c5ce67dc0 --- /dev/null +++ b/tests/smoke/jupyter/issue-12374.test.ts @@ -0,0 +1,25 @@ +/* + * issue-12374.test.ts + * + * https://github.com/quarto-dev/quarto-cli/issues/12374 + * + * Copyright (C) 2023 Posit Software, PBC + */ + +import { quarto } from "../../../src/quarto.ts"; +import { test } from "../../test.ts"; +import { assertEquals } from "testing/asserts"; +import { noErrors } from "../../verify.ts"; + +test({ + name: "jupyter:issue-12374.test.ts", + context: {}, + execute: async () => { + // https://github.com/quarto-dev/quarto-cli/issues/12374 + await quarto(["render", + "docs/jupyter/issue-12374.ipynb", + "--no-execute-daemon", "--execute"]); + }, + verify: [noErrors], + type: "smoke", +});