Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions funannotate2/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@
choose_best_busco_species,
create_directories,
create_tmpdir,
ensure_busco_lineage,
find_files,
load_json,
lookup_taxonomy,
naming_slug,
get_odb_version,
validate_busco_lineage,
)

Expand Down Expand Up @@ -345,9 +345,6 @@ def annotate(args):
# busco proteome analysis
busco_all = os.path.join(misc_dir, "busco.results.json")
busco_annots = os.path.join(misc_dir, "annotations.busco.tsv")
odb_version = get_odb_version(
os.path.join(os.path.dirname(__file__), "downloads.json")
)
if not checkfile(busco_annots):
if not taxonomy:
# get taxonomy information
Expand All @@ -366,9 +363,12 @@ def annotate(args):
else:
# choose best busco species
busco_species = choose_best_busco_species(taxonomy)
busco_model_path = os.path.join(
env["FUNANNOTATE2_DB"], f"{busco_species}_{odb_version}"
)
# Ensure the BUSCO lineage is available under FUNANNOTATE2_DB,
# downloading it if needed. In dockerized usage the lineage from a
# previous train/predict run is gone when annotate starts in a fresh
# container, so this guards against the cryptic "<path> is not a
# directory" failure from buscolite further down.
busco_model_path = ensure_busco_lineage(busco_species, logger)

# run busco proteome screen
logger.info(
Expand Down
35 changes: 7 additions & 28 deletions funannotate2/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,12 @@
choose_best_busco_species,
create_directories,
create_tmpdir,
download,
ensure_busco_lineage,
find_files,
load_json,
lookup_taxonomy,
naming_slug,
runProcessJob,
runSubprocess,
which_path,
get_odb_version,
)


Expand Down Expand Up @@ -906,29 +903,7 @@ def sort_gff_line(line):
busco_tax = choose_best_busco_species(
{"superkingdom": taxonomy.get("superkingdom"), "kingdom": taxonomy.get("kingdom")}
)
# pull the latest odb version from downloads link
odb_version = get_odb_version(
os.path.join(os.path.dirname(__file__), "downloads.json")
)
busco_model_path = os.path.join(
env["FUNANNOTATE2_DB"], f"{busco_tax}_{odb_version}"
)
if not os.path.isdir(busco_model_path):
download_urls = load_json(
os.path.join(os.path.dirname(__file__), "downloads.json")
)
busco_url = download_urls["busco"][busco_tax][0]
busco_tgz = os.path.join(env["FUNANNOTATE2_DB"], os.path.basename(busco_url))
logger.info(f"Downloading {busco_tax}_{odb_version} model from {busco_url}")
download(busco_url, busco_tgz, wget=False)
if os.path.isfile(busco_tgz):
runSubprocess(
["tar", "-zxf", os.path.basename(busco_tgz)],
logger,
cwd=env["FUNANNOTATE2_DB"],
)
if os.path.isdir(busco_model_path):
os.remove(busco_tgz)
busco_model_path = ensure_busco_lineage(busco_tax, logger)

# now we can loop through the abinitio predictions and run busco for completion
# write this to file for re-use if consensus file already present?
Expand Down Expand Up @@ -1611,7 +1586,11 @@ def run_tool_with_error_tracking(tool_name, run_func, *args, **kwargs):
)

if tools_run:
logger.info(f"Successfully ran tools for {contig_name}: {', '.join(tools_run)}")
# Per-contig success is a debug-level detail; on assemblies with many
# contigs an info-level line per contig drowns the user-facing log.
# The downstream "<tool> predictions filtered: ..." summary lines
# already convey aggregate success at info level.
logger.debug(f"Successfully ran tools for {contig_name}: {', '.join(tools_run)}")

# Store error information in memory stats for tracking
if monitor_memory:
Expand Down
35 changes: 5 additions & 30 deletions funannotate2/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,11 @@
choose_best_augustus_species,
choose_best_busco_species,
create_directories,
download,
load_json,
ensure_busco_lineage,
lookup_taxonomy,
naming_slug,
runSubprocess,
which_path,
get_odb_version,
rename_gff_contigs,
validate_busco_lineage,
validate_augustus_species,
Expand Down Expand Up @@ -142,13 +140,10 @@ def train(args):
else:
# choose best busco species
busco_species = choose_best_busco_species(taxonomy)
# pull the latest odb version from downloads link
odb_version = get_odb_version(
os.path.join(os.path.dirname(__file__), "downloads.json")
)
busco_model_path = os.path.join(
env["FUNANNOTATE2_DB"], f"{busco_species}_{odb_version}"
)
# Ensure the BUSCO lineage exists under FUNANNOTATE2_DB (downloading
# it if necessary) and capture its on-disk path for buscolite + the
# params.json output below.
busco_model_path = ensure_busco_lineage(busco_species, logger)

# run buscolite on genome to get training set
filt_train_models = os.path.join(misc_dir, "training-models.final.gff3")
Expand All @@ -159,26 +154,6 @@ def train(args):
logger.info(
f"Choosing best busco species based on taxonomy: {busco_species}"
)
if not os.path.isdir(busco_model_path):
download_urls = load_json(
os.path.join(os.path.dirname(__file__), "downloads.json")
)
busco_url = download_urls["busco"][busco_species][0]
busco_tgz = os.path.join(
env["FUNANNOTATE2_DB"], os.path.basename(busco_url)
)
logger.info(
f"Downloading {busco_species}_{odb_version} model from {busco_url}"
)
download(busco_url, busco_tgz, wget=False)
if os.path.isfile(busco_tgz):
runSubprocess(
["tar", "-zxf", os.path.basename(busco_tgz)],
logger,
cwd=env["FUNANNOTATE2_DB"],
)
if os.path.isdir(busco_model_path):
os.remove(busco_tgz)
log("Running buscolite to generate training set using filtered genome")
buscolite(
TrainingGenomeFasta,
Expand Down
48 changes: 47 additions & 1 deletion funannotate2/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

import requests

from .config import augustus_species, busco_taxonomy
from .config import augustus_species, busco_taxonomy, env

# disable insecure warning
requests.packages.urllib3.disable_warnings()
Expand Down Expand Up @@ -159,6 +159,52 @@ def get_odb_version(downloads_json_file):
return sorted(odb_versions, reverse=True)[0]


def ensure_busco_lineage(species, logger):
"""
Ensure the BUSCO lineage `<species>_<odb_version>` is present under
FUNANNOTATE2_DB, downloading and extracting it from the URL in
downloads.json if missing. The check is a no-op when the directory
already exists, so callers can invoke this idempotently.

Parameters:
- species (str): BUSCO lineage species name (e.g. "fungi", "aspergillus").
- logger: Logger exposing .info / .critical (e.g. from startLogging).

Returns:
- str: Absolute path to the lineage directory.

Raises:
- SystemExit(1): If the directory could not be made present after the
download/extract attempt.
"""
downloads_json = os.path.join(os.path.dirname(__file__), "downloads.json")
odb_version = get_odb_version(downloads_json)
busco_model_path = os.path.join(
env["FUNANNOTATE2_DB"], f"{species}_{odb_version}"
)
if os.path.isdir(busco_model_path):
return busco_model_path
download_urls = load_json(downloads_json)
busco_url = download_urls["busco"][species][0]
busco_tgz = os.path.join(env["FUNANNOTATE2_DB"], os.path.basename(busco_url))
logger.info(f"Downloading {species}_{odb_version} model from {busco_url}")
download(busco_url, busco_tgz, wget=False)
if os.path.isfile(busco_tgz):
runSubprocess(
["tar", "-zxf", os.path.basename(busco_tgz)],
logger,
cwd=env["FUNANNOTATE2_DB"],
)
if os.path.isdir(busco_model_path):
os.remove(busco_tgz)
if not os.path.isdir(busco_model_path):
logger.critical(
f"Unable to download/extract BUSCO lineage to {busco_model_path}"
)
raise SystemExit(1)
return busco_model_path


def download(url, name, wget=False, timeout=60, retries=3):
"""
Download a file from a given URL with improved error handling and retries.
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/test_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,8 @@ def fake_run_snap(*args, **kwargs):
debug_messages = [call.args[0] for call in logger.debug.call_args_list]

assert "snap tool output" in info_messages
assert "Successfully ran tools for scaffold_1.fasta: snap" in info_messages
assert "Successfully ran tools for scaffold_1.fasta: snap" in debug_messages
assert not any("Successfully ran tools" in message for message in info_messages)
assert not any("Processing contig" in message for message in info_messages)
assert not any("memory prediction for scaffold_1.fasta" in message for message in info_messages)
assert not any("Processing contig" in message for message in debug_messages)
Expand Down
Loading