From bd52ca87b3d45c8e4b41f2622ed790b01ead4bbd Mon Sep 17 00:00:00 2001 From: Code-FlowBio Date: Tue, 3 Sep 2024 18:31:30 +0100 Subject: [PATCH 1/7] Add line --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5e6f82af..78a2c46f 100644 --- a/README.md +++ b/README.md @@ -11,3 +11,4 @@ Links to documentation for specific modules and subworkflows can be found in the ## Wrappers - [Demultiplex](https://github.com/goodwright/flow-nf/blob/master/docs/demultiplex.md) + From d8b9c82d6fd68144aa29a92292317c9c6c4e2a7e Mon Sep 17 00:00:00 2001 From: Code-FlowBio Date: Tue, 3 Sep 2024 18:40:12 +0100 Subject: [PATCH 2/7] Remove line --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 78a2c46f..5e6f82af 100644 --- a/README.md +++ b/README.md @@ -11,4 +11,3 @@ Links to documentation for specific modules and subworkflows can be found in the ## Wrappers - [Demultiplex](https://github.com/goodwright/flow-nf/blob/master/docs/demultiplex.md) - From d3b1a5635aa3ce4974d99209a04f56a6e313eb62 Mon Sep 17 00:00:00 2001 From: Code-FlowBio Date: Tue, 3 Sep 2024 18:42:51 +0100 Subject: [PATCH 3/7] Disable nf-core linting --- .github/workflows/pr_linting.yml | 102 +++++++++++++++---------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/.github/workflows/pr_linting.yml b/.github/workflows/pr_linting.yml index 50c3accb..d002b1db 100644 --- a/.github/workflows/pr_linting.yml +++ b/.github/workflows/pr_linting.yml @@ -47,54 +47,54 @@ jobs: - name: Check code lints with Black uses: psf/black@stable - nf-core: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Set up Python - uses: actions/setup-python@v3 - with: - python-version: "3.9.x" - - - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Install nf-core - run: python -m pip install --upgrade git+https://github.com/goodwright/tools.git@gw-main - - # - name: Restore nf-core packages - # run: nf-core modules list local - - - name: Run module linting - run: | - nf-core modules lint dump_software_versions - nf-core modules lint sample/base_samplesheet_check - nf-core modules lint sample/diff_samplesheet_check - nf-core modules lint sample/scmultiome_samplesheet_check - nf-core modules lint ultraplex/samplesheet_to_barcode - nf-core modules lint ultraplex/ultraplex - nf-core modules lint xlsx_to_csv - nf-core modules lint clipseq/filter_gtf - nf-core modules lint clipseq/find_longest_transcript - nf-core modules lint clipseq/resolve_unannotated - nf-core modules lint icount/segment - nf-core modules lint icount/sigxls - nf-core modules lint icount/summary - nf-core modules lint icount/peaks - nf-core modules lint linux/command - nf-core modules lint bedtools/shift - nf-core modules lint clippy - nf-core modules lint paraclu/paraclu - nf-core modules lint paraclu/cut - nf-core modules lint peka - nf-core modules lint samtools/simple_view - nf-core modules lint umicollapse - nf-core modules lint clipseq/clipqc - nf-core modules lint r/deseq2 - nf-core modules lint r/deseq2_plots - nf-core modules lint r/pcaexplorer + # nf-core: + # runs-on: ubuntu-latest + # steps: + # - uses: actions/checkout@v3 + + # - name: Set up Python + # uses: actions/setup-python@v3 + # with: + # python-version: "3.9.x" + + # - name: Install Nextflow + # env: + # CAPSULE_LOG: none + # run: | + # wget -qO- get.nextflow.io | bash + # sudo mv nextflow /usr/local/bin/ + + # - name: Install nf-core + # run: python -m pip install --upgrade git+https://github.com/goodwright/tools.git@gw-main + + # # - name: Restore nf-core packages + # # run: nf-core modules list local + + # - name: Run module linting + # run: | + # nf-core modules lint dump_software_versions + # nf-core modules lint sample/base_samplesheet_check + # nf-core modules lint sample/diff_samplesheet_check + # nf-core modules lint sample/scmultiome_samplesheet_check + # nf-core modules lint ultraplex/samplesheet_to_barcode + # nf-core modules lint ultraplex/ultraplex + # nf-core modules lint xlsx_to_csv + # nf-core modules lint clipseq/filter_gtf + # nf-core modules lint clipseq/find_longest_transcript + # nf-core modules lint clipseq/resolve_unannotated + # nf-core modules lint icount/segment + # nf-core modules lint icount/sigxls + # nf-core modules lint icount/summary + # nf-core modules lint icount/peaks + # nf-core modules lint linux/command + # nf-core modules lint bedtools/shift + # nf-core modules lint clippy + # nf-core modules lint paraclu/paraclu + # nf-core modules lint paraclu/cut + # nf-core modules lint peka + # nf-core modules lint samtools/simple_view + # nf-core modules lint umicollapse + # nf-core modules lint clipseq/clipqc + # nf-core modules lint r/deseq2 + # nf-core modules lint r/deseq2_plots + # nf-core modules lint r/pcaexplorer From 2a3fa01a6554fd6daa3f7c5b527921ac894b1af2 Mon Sep 17 00:00:00 2001 From: Code-FlowBio Date: Tue, 3 Sep 2024 18:51:29 +0100 Subject: [PATCH 4/7] Update CI action versions --- .github/workflows/pr_linting.yml | 10 +++++----- .github/workflows/unit_tests.yml | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/pr_linting.yml b/.github/workflows/pr_linting.yml index d002b1db..cf59b11c 100644 --- a/.github/workflows/pr_linting.yml +++ b/.github/workflows/pr_linting.yml @@ -12,9 +12,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -25,9 +25,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @@ -42,7 +42,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Check code lints with Black uses: psf/black@stable diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 802c0770..367a3add 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -88,15 +88,15 @@ jobs: - wrappers/modules/paraclu_paraclu steps: - name: Checkout Code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: "3.x" - name: Setup Pip Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} @@ -107,7 +107,7 @@ jobs: run: python -m pip install --upgrade pip pytest-workflow - name: Restore Keys - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: /usr/local/bin/nextflow key: ${{ runner.os }}-nextflow-${{ matrix.nxf_version }} @@ -171,15 +171,15 @@ jobs: CAPSULE_LOG: none steps: - name: Checkout Code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: "3.x" - name: Setup Pip Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} From 8110cbaf6d0c6981e72582b0a8c7241f5e8dfcc4 Mon Sep 17 00:00:00 2001 From: Code-FlowBio Date: Tue, 3 Sep 2024 18:53:41 +0100 Subject: [PATCH 5/7] UPdate python version --- .github/workflows/unit_tests.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 367a3add..3b956d18 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -27,7 +27,7 @@ jobs: strategy: fail-fast: false matrix: - NXF_VER: ["23.04.0", ""] + NXF_VER: ["24.04.0", ""] profile: ["docker", "singularity"] tags: # Modules @@ -93,7 +93,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.x" + python-version: "3.11" - name: Setup Pip Cache uses: actions/cache@v4 @@ -176,7 +176,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.x" + python-version: "3.11" - name: Setup Pip Cache uses: actions/cache@v4 @@ -194,7 +194,7 @@ jobs: with: auto-update-conda: true channels: conda-forge,bioconda,defaults - python-version: "3.9" + python-version: "3.11" - name: Install Conda dependencies run: conda install --name test bedtools From c5bbb7e7bd7d79539deacbf6fc50010f6867531b Mon Sep 17 00:00:00 2001 From: Code-FlowBio Date: Tue, 3 Sep 2024 18:58:02 +0100 Subject: [PATCH 6/7] Update nextflow version --- .github/workflows/unit_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 3b956d18..4c941121 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -27,7 +27,7 @@ jobs: strategy: fail-fast: false matrix: - NXF_VER: ["24.04.0", ""] + NXF_VER: ["24.04.4", ""] profile: ["docker", "singularity"] tags: # Modules From 44f93b509df63773d89304e167d2f3c378e93d3c Mon Sep 17 00:00:00 2001 From: Code-FlowBio Date: Tue, 3 Sep 2024 19:42:44 +0100 Subject: [PATCH 7/7] Add nanopore demux --- .../goodwright/nanopore_demultiplex/main.nf | 229 ++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 subworkflows/goodwright/nanopore_demultiplex/main.nf diff --git a/subworkflows/goodwright/nanopore_demultiplex/main.nf b/subworkflows/goodwright/nanopore_demultiplex/main.nf new file mode 100644 index 00000000..32769300 --- /dev/null +++ b/subworkflows/goodwright/nanopore_demultiplex/main.nf @@ -0,0 +1,229 @@ +/* +Subworkflow for basecalling and demultiplexing nanopore data +*/ + +include { ONT_DORADO_BASECALLER } from '../../../../modules/goodwright/ont_dorado/basecaller/main' +include { ONT_DORADO_DEMUX } from '../../../../modules/goodwright/ont_dorado/demux/main' +include { SAMTOOLS_MERGE as MERGE_BASECALLING } from '../../../../modules/nf-core/samtools/merge/main' + +// Valid BC Kits +def valid_bc_kits = [ + "EXP-NBD103", + "EXP-NBD104", + "EXP-NBD114", + "EXP-NBD196", + "EXP-PBC001", + "EXP-PBC096", + "SQK-16S024", + "SQK-16S114-24", + "SQK-LWB001", + "SQK-MLK111-96-XL", + "SQK-MLK114-96-XL", + "SQK-NBD111-24", + "SQK-NBD111-96", + "SQK-NBD114-24", + "SQK-NBD114-96", + "SQK-PBK004", + "SQK-PCB109", + "SQK-PCB110", + "SQK-PCB111-24", + "SQK-PCB114-24", + "SQK-RAB201", + "SQK-RAB204", + "SQK-RBK001", + "SQK-RBK004", + "SQK-RBK110-96", + "SQK-RBK111-24", + "SQK-RBK111-96", + "SQK-RBK114-24", + "SQK-RBK114-96", + "SQK-RLB001", + "SQK-RPB004", + "SQK-RPB114-24", + "TWIST-ALL", + "VSK-PTC001", + "VSK-VMK001", + "VSK-VMK004", + "VSK-VPS001" +] + +def find_bc_kit(run_dir, valid_bc_kits) { + // Find the summary file if it exists + def run_dir_path = new File(run_dir) + def summary_file_name = run_dir_path.list().find { it.contains('final_summary') && it.endsWith('.txt') } + if (!summary_file_name) { + return null + } + + // Load summary file content + def summary_file = new File(run_dir_path, summary_file_name) + def summary_content = summary_file.readLines().join('\n') + + // Find the first matching barcode kit in the summary file + def bc_kit = valid_bc_kits.find { summary_content.contains(it) } + return bc_kit +} + +workflow ONT_DEMULTIPLEX { + + take: + val_model // Specify a short code like HAC or SUP + val_bc_kit // The barcode kit to demultiplex against + val_check_barcode // Specifies if the run directory should be searched for a valid barcode kit + val_bc_parse_pos // The parse position to substring the barcode from in the meta data + val_append_bc // Appends bc-kit to sample matching + val_batch_num // Number of files in a dorado batch + val_run_dir // The string path of the nanopore run directory + val_bam // The string path of a BAM file to use instead of basecalling + val_resume_bam // The string path of a BAM file to resume basecalling from + val_emit_bam // Defines whether demux outputs a bam or fastq file + val_samplesheet // The string path of the samplesheet to parse for metadata if given + + main: + + // Init + ch_versions = Channel.empty() + + // Check bc-kit is valid if supplied by user and assign if it is + def bc_kit = null + if(val_bc_kit != null && !(val_bc_kit in valid_bc_kits)) { + exit 1, "Invalid barcode kit specified: ${val_bc_kit}" + } + else if (val_bc_kit != null) { + bc_kit = val_bc_kit + } + + // Try to resolve bc_kit + if(val_run_dir != null && val_bc_kit == null) { + bc_kit = find_bc_kit(val_run_dir, valid_bc_kits) + if(bc_kit) { log.warn("Barcode Kit found from summary file: ${bc_kit}") } + } + + // + // CHANNEL: Add all pod5 files + // + ch_pod5_files = Channel.empty() + if(val_run_dir != null) { + ch_pod5_files = Channel.fromPath("${val_run_dir}/pod5/*.pod5") + ch_pod5_files_pass = Channel.fromPath("${val_run_dir}/pod5_pass/*.pod5") + ch_pod5_files_fail = Channel.fromPath("${val_run_dir}/pod5_fail/*.pod5") + ch_pod5_files_skipped = Channel.fromPath("${val_run_dir}/pod5_skipped/*.pod5") + ch_pod5_files = ch_pod5_files_pass.mix(ch_pod5_files_fail).mix(ch_pod5_files_skipped).mix(ch_pod5_files) + + // + // CHANNEL: Collate pod5 files into batches + // + ch_pod5_files = ch_pod5_files + .collate(val_batch_num) + .map{ [[ id: it[0].simpleName.substring(0, 26) ], it ] } + } + + // + // CHANNEL: Add bam file to a channel if it exists + // + ch_bam = Channel.empty() + if (val_bam != null) { + ch_bam = Channel.from(file(val_bam, checkIfExists: true)) + .map{ [ [ id: it.simpleName ], it ] } + } + + // + // CHANNEL: Add resume bam file to a channel if it exists + // + ch_resume_bam = Channel.empty() + if (val_resume_bam != null) { + ch_resume_bam = Channel.from(file(val_resume_bam, checkIfExists: true)) + .map{ [ [ id: it.simpleName ], it ] } + } + + // Only run if a bam file wasnt supplied + if(val_bam == null) { + // + // MODULE: Generate a bam file using pod5 files and any supplied bam to resume from + // + ONT_DORADO_BASECALLER ( + ch_pod5_files, + val_resume_bam ? ch_resume_bam.map{it[1]} : [], + val_model, + bc_kit ?: [] + ) + ch_versions = ch_versions.mix(ONT_DORADO_BASECALLER.out.versions) + ch_bam = ONT_DORADO_BASECALLER.out.bam + + // + // CHANNEL: Create basecalling merge channels + // + ch_bc_merge = ch_bam + .collect{ it[1] } + .branch { + tomerge: it.size() > 1 + return [[ id: it[0].simpleName.substring(0, 26) ], it ] + pass: true + return [[ id: it[0].simpleName.substring(0, 26) ], it ] + } + + // + // MODULE: Merged basecalled bams if required + // + MERGE_BASECALLING ( + ch_bc_merge.tomerge, + [[],[]], + [[],[]] + ) + ch_versions = ch_versions.mix(MERGE_BASECALLING.out.versions) + ch_bam = MERGE_BASECALLING.out.bam.mix(ch_bc_merge.pass) + } + + // + // MODULE: Generate demultiplexed bam or fastq files + // + ONT_DORADO_DEMUX ( + ch_bam, + val_emit_bam + ) + ch_versions = ch_versions.mix(ONT_DORADO_DEMUX.out.versions) + ch_demux_bam = ONT_DORADO_DEMUX.out.bam + ch_demux_fastq = ONT_DORADO_DEMUX.out.fastq + + if(val_samplesheet) { + // + // CHANNEL: Parse samplesheet into metadata + // + ch_meta = Channel.from(file(val_samplesheet, checkIfExists: true)) + .splitCsv (header:true, sep:",") + .map { + it.group = it.group.replaceAll(" ", "_").toLowerCase() + it.user = it.user.replaceAll(" ", "_").toLowerCase() + if(val_bc_parse_pos != null) { + it.barcode = "barcode" + it.barcode.substring(val_bc_parse_pos, val_bc_parse_pos + 2) + } + if(bc_kit != null && val_append_bc == true) { + it.barcode = bc_kit + "_" + it.barcode + } + it + } + + // + // CHANNEL: Merge metadata to the demultiplexed fastq file + // + ch_demux_fastq = ch_meta + .map { [it.barcode, it] } + .join( ch_demux_fastq.map{it[1]}.flatten().map{ [ it.simpleName, it ] } ) + .map { [ it[1], it[2] ] } + + // + // CHANNEL: Merge metadata to the demultiplexed bam file + // + ch_demux_bam = ch_meta + .map { [it.barcode, it] } + .join( ch_demux_bam.map{it[1]}.flatten().map{ [ it.simpleName, it ] } ) + .map { [ it[1], it[2] ] } + } + + emit: + pod5 = ch_pod5_files // channel: [ path(pod5) ] + bam = ch_bam // channel: [ val(meta), path(bam) ] + demux_fastq = ch_demux_fastq // channel: [ val(meta), path(fastq) ] + demux_bam = ch_demux_bam // channel: [ val(meta), path(bam) ] + versions = ch_versions // channel: path(versions.yml) +}