Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,20 @@ def main(process_name, samplesheet, output):
five_prime = df_samplesheet["5' Barcode Sequence"]
three_prime = df_samplesheet["3' Barcode Sequence"]
three_prime_adapter = df_samplesheet["3' Adapter Sequence"]

# If only three prime barcodes are present, treat them as five prime barcodes, print a message to
# the user so they know this is happening and to remind them to use the correct ultraplex settings
if (five_prime.str.len() == 0).all():
print(
"NOTE: No 5' barcode sequence found in sample sheet, 3' barcodes will be formatted as 5' barcodes \
for Ultraplex input, ensure that Ultraplex is run with --three_prime_only flag. If using TSOs, \
ensure that --tso_seq flag is also used and set to the correct sequence."
)
five_prime = three_prime
three_prime = pd.Series([""] * len(five_prime))
five_prime.name = "5' Barcode Sequence"
three_prime.name = "3' Barcode Sequence"

df_samplesheet = pd.concat([sample_names, five_prime, three_prime, three_prime_adapter], axis=1)

# Init for loop
Expand Down
8 changes: 4 additions & 4 deletions modules/goodwright/ultraplex/ultraplex/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process ULTRAPLEX {
tag "${meta.id}"
label "process_high"

conda "bioconda::ultraplex=1.2.5"
conda "bioconda::ultraplex=1.2.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ultraplex:1.2.5--py38h4a8c8d9_0' :
'biocontainers/ultraplex:1.2.5--py38h4a8c8d9_0' }"
'https://depot.galaxyproject.org/singularity/ultraplex:1.2.9--py39hf95cd2a_0' :
'biocontainers/ultraplex:1.2.9--py39hf95cd2a_0' }"

input:
tuple val(meta), path(reads)
Expand All @@ -22,7 +22,7 @@ process ULTRAPLEX {
task.ext.when == null || task.ext.when

script:
def VERSION = "1.2.5" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
def VERSION = "1.2.9" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"

Expand Down
81 changes: 71 additions & 10 deletions schema/demultiplex.json
Original file line number Diff line number Diff line change
@@ -1,31 +1,74 @@
{
"inputs": {
"file_options": {
"inputs": [
{
"name": "File options",
"description": "The files needed for demultiplexing.",
"properties": {
"modes": ["Single-end", "Paired-end"],
"params": {
"samplesheet": {
"name": "Annotation",
"type": "file",
"type": "data",
"pattern": "xlsx|csv",
"category": 2,
"required": true,
"modes": ["Single-end", "Paired-end"],
"description": "Sample annotation sheet."
},
"fastqs": {
"name": "FASTQ",
"type": "lane",
"pattern": "fq\\.gz$|fastq\\.gz$|fq$|fastq$",
"description": "The multiplexed FASTQ file(s) to demultiplex. You can provide one or multiple files to be concatenated, for both single and paired end data. At least one file must be provided.",
"required": true,
"type": "csv",
"output_headers": false,
"takes_filesets": true,
"fileset_category": 3,
"fileset_size": 1,
"modes": ["Single-end"],
"columns": [
{
"name": "fastq_1",
"type": "data",
"required": true,
"render": false,
"from_fileset": 1
}
]
},
"fastqs_": {
"param": "fastqs",
"name": "FASTQ",
"description": "The multiplexed FASTQ file(s) to demultiplex. You can provide one or multiple files to be concatenated, for both single and paired end data. At least one file must be provided.",
"required": true,
"description": "The multiplexed FASTQ file(s) to demultiplex. You can provide one or multiple files to be concatenated, for both single and paired end data. At least one file must be provided."
"type": "csv",
"output_headers": false,
"takes_filesets": true,
"fileset_category": 3,
"fileset_size": 2,
"modes": ["Paired-end"],
"columns": [
{
"name": "fastq_1",
"type": "data",
"required": true,
"render": false,
"from_fileset": 1
},
{
"name": "fastq_2",
"type": "data",
"required": true,
"render": false,
"from_fileset": 2
}
]
}
}
},
"ultraplex_options": {
{
"name": "Ultraplex options",
"description": "Advanced options for Ultraplex.",
"advanced": true,
"properties": {
"params": {
"fiveprimemismatches": {
"name": "5’ mismatches",
"description": "This option allows the user to specify how many mismatches are permitted when detecting which 5’ barcode a read contains. If set to zero, then the 5’ barcode must match the expected barcode perfectly. By default, this value is set to one mismatch.",
Expand Down Expand Up @@ -79,16 +122,34 @@
"description": "This option does not write out reads for which there was no barcode match, which may save time.",
"type": "boolean",
"required": false
},
"three_prime_only": {
"name": "3' barcodes only",
"description": "This is when you only have 3' barcodes. In this case, write each barcode on a new line in the barcode csv. Feel free to add names too like with 5' barcodes. The format of the barcode CSV should be exactly the same as if you only had 5' barcodes (see above). Barcode sequences should be as they would be in read 1 (just as above for 3' barcodes). Ultraplex uses the read 2 for demultiplexing here, and handles all reverse complementing internally.",
"type": "boolean",
"required": false
},
"tso_seq": {
"name": "TSO-seq",
"description": "This is used solely in conjunction with the option --three_prime_barcode. It is solely for TSOs that have a p5 sequence (i.e. give rise to the read 1). Give a sequence of Ns followed by Is. Ns are moved to the UMI. Is are trimmed from the read but not moved to the UMI. For example, if using a TSO of sequence (p5)NNNNNrGrGrG, you would specify --tso_seq NNNNNIII. This would move the five random bases to the UMI, but the three non-random bases from the rGs would be trimmed and ignored.",
"type": "string",
"required": false
}
}
}
},
],
"outputs": [
{
"name": "FASTQ Files",
"description": "The demultiplexed FASTQ files.",
"filetype": "fastq.gz",
"process": "ULTRAPLEX"
},
{
"name": "Ultraplex log",
"description": "Log file from demultiplexing",
"filetype": "log",
"process": "ULTRAPLEX"
}
]
}
16 changes: 8 additions & 8 deletions schema/icount_segment.json
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
{
"inputs": {
"genome_options": {
"inputs": [
{
"name": "File options",
"description": "The files needed for running.",
"properties": {
"params": {
"gtf": {
"name": "GTF",
"type": "file",
"pattern": "gtf",
"type": "data",
"pattern": "\\.gtf$",
"required": true,
"description": "A genome annotation file."
},
"fai": {
"name": "FAI",
"type": "file",
"pattern": "fai",
"type": "data",
"pattern": "\\.fai$",
"required": true,
"description": "A faidx genome index."
}
}
}
},
],
"outputs": [
{
"name": "Segmentation GTF",
Expand Down
1 change: 1 addition & 0 deletions tests/config/test_data.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ params {
clip_samplesheet_xlsx = "${gw_test_data_dir}/samplesheets/clip-samplesheet.xlsx"
clip_samplesheet_small = "${gw_test_data_dir}/samplesheets/clip-samplesheet-small.csv"
clip_samplesheet_adapter_mis = "${gw_test_data_dir}/samplesheets/clip-samplesheet-mismatch-adapter.csv"
clip_3bc_only = "${gw_test_data_dir}/samplesheets/clip-samplesheet-3prime-barcode-only.csv"

base_valid_single_pe = "${gw_test_data_dir}/samplesheets/base_samplesheet/valid-single-sample-pe.csv"
base_valid_single_se = "${gw_test_data_dir}/samplesheets/base_samplesheet/valid-single-sample-se.csv"
Expand Down
33 changes: 33 additions & 0 deletions tests/config/ultraplex-3prime-only.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
params {
fastq2 = null
fiveprimemismatches = 1
threeprimemismatches = 0
adapter2 = null
phredquality = 30
phred_quality_5_prime = 0
min_trim = 3
final_min_length = 0
keep_barcodes = false
ignore_no_match = false
three_prime_only = true
tso_seq = false
}

process {
withName: '.*ULTRAPLEX.*' {
ext.args = 'test'
ext.args = [
"-m5 ${params.fiveprimemismatches}",
"-m3 ${params.threeprimemismatches}",
params.adapter2 ? "-a2 ${params.adapter2}" : '',
"-q ${params.phredquality}",
"-q5 ${params.phred_quality_5_prime}",
"-mt ${params.min_trim}",
params.final_min_length != 0 ? "-l ${params.final_min_length}" : '',
params.keep_barcodes ? '-kbc' : '',
params.ignore_no_match ? '-inm' : '',
params.three_prime_only ? '--three_prime_only' : '',
params.tso_seq ? '--tso_seq' : ''
].flatten().unique(false).join(' ').trim()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Sample Name,5' Barcode Sequence,3' Barcode Sequence,3' Adapter Sequence
sample_clip_1,,NNGACNN,AGATCGGAAGAGCGGTTCAG
sample_clip_2,,NNTACNN,AGATCGGAAGAGCGGTTCAG
9 changes: 9 additions & 0 deletions tests/subworkflows/demultiplex/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,12 @@ workflow test_channel_samplesheet {
DEMULTIPLEX ( samplesheet, fastq )
DEMULTIPLEX.out.fastq | view
}

workflow test_threeprimebarcode_only {

samplesheet = file(params.goodwright_test_data['samplesheets']['clip_3bc_only'], checkIfExists: true)
fastq = file(params.goodwright_test_data['ultraplex']['multiplexed_fastq'], checkIfExists: true)

DEMULTIPLEX ( samplesheet, fastq )
DEMULTIPLEX.out.fastq | view
}
6 changes: 6 additions & 0 deletions tests/subworkflows/demultiplex/test_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,9 @@
tags:
- "subworkflows"
- "demultiplex"

- name: "test_sw_demultiplex_3bc_only"
command: nextflow run ./tests/subworkflows/demultiplex -c ./tests/config/ultraplex-3prime-only.config -c ./tests/config/nextflow.config -entry test_threeprimebarcode_only
tags:
- "subworkflows"
- "demultiplex"
7 changes: 7 additions & 0 deletions tests/wrappers/subworkflows/demultiplex/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,10 @@
- "wrappers"
- "wrappers/subworkflows"
- "wrappers/subworkflows/demultiplex"

- name: "test_wrappers_demultiplex_single_se_threeprimeonly"
command: nextflow run ./wrappers/subworkflows/demultiplex/main.nf -c tests/config/ultraplex-3prime-only.config -c ./tests/config/nextflow.config --samplesheet ./tests/data/samplesheets/clip-samplesheet-3prime-barcode-only.csv --fastqs ./tests/data/fastq_list/single_se_fastq.csv
tags:
- "wrappers"
- "wrappers/subworkflows"
- "wrappers/subworkflows/demultiplex"
10 changes: 7 additions & 3 deletions wrappers/subworkflows/demultiplex/nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ params {
phredquality = 30
phred_quality_5_prime = 0
min_trim = 3
final_min_length = 0
final_min_length = 16
keep_barcodes = false
ignore_no_match = false
three_prime_only = false
tso_seq = null
}

process {
Expand All @@ -21,9 +23,11 @@ process {
"-q ${params.phredquality}",
"-q5 ${params.phred_quality_5_prime}",
"-mt ${params.min_trim}",
params.final_min_length != 0 ? "-l ${params.final_min_length}" : '',
"-l ${params.final_min_length}",
params.keep_barcodes ? '-kbc' : '',
params.ignore_no_match ? '-inm' : ''
params.ignore_no_match ? '-inm' : '',
params.three_prime_only ? '--three_prime_only' : '',
params.tso_seq ? "--tso_seq ${params.tso_seq}" : ''
].flatten().unique(false).join(' ').trim()
}
}