-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfunctions.py
More file actions
121 lines (87 loc) · 3.48 KB
/
Copy pathfunctions.py
File metadata and controls
121 lines (87 loc) · 3.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import re
import pymupdf
from gtts import gTTS
from moviepy import ImageClip, AudioFileClip, VideoFileClip, concatenate_videoclips
import os
import subprocess
import parsing
from openai import OpenAI
#extracting each image from the slides
def extract_slides(slide_path, output_dir="slide_images", skip_first=True):
doc = pymupdf.open(slide_path)
paths = []
start_index = 1 if skip_first else 0
for i, page in enumerate(doc):
if i < start_index:
continue
pix = page.get_pixmap(dpi=200)
slide_num = i + 1
if slide_num <= 10:
img_path = f"{output_dir}\\slide_0{i}.png"
else:
img_path = f"{output_dir}\\slide_{i}.png"
if not os.path.exists(img_path):
pix.save(img_path)
paths.append(img_path)
return paths
#generating voice for each a portion of text (each frame from the script)
def make_audio_gtts(text, output_path):
tts = gTTS(text)
tts.save(output_path)
#experimental, for future use
def make_audio_openai(text, output_path, voice="alloy", model="tts-1"):
"""
Generate audio using OpenAI TTS API
Args:
text (str): Text to convert to speech
output_path (str): Path where audio file will be saved
voice (str): Voice to use - options: alloy, echo, fable, onyx, nova, shimmer
model (str): Model to use - options: tts-1 (faster), tts-1-hd (higher quality)
"""
client = OpenAI() # Make sure OPENAI_API_KEY is set in environment variables
with client.audio.speech.with_streaming_response.create(
model=model,
voice=voice,
input=text,
) as response:
response.stream_to_file(output_path)
#generate each clip from image + audio
def make_clip(slide_image_path, audio_path, output_dir):
audio_clip = AudioFileClip(audio_path)
slide_image_clip = ImageClip(slide_image_path)
slide_image_clip = slide_image_clip.with_duration(audio_clip.duration)
final_clip = slide_image_clip.with_audio(audio_clip)
final_clip.write_videofile(output_dir, fps=24)
#using ffmpeg instead of moviepy for faster performance
def concat_clips(clip_folder, final_video_folder, final_video_name):
clip_list = []
for clip in os.listdir(clip_folder):
clip_path = os.path.join(clip_folder, clip)
clip_list.append(clip_path)
#clip_list.sort()
def natural_sort_key(s):
return [int(text) if text.isdigit() else text.lower() for text in re.split (r'(\d+)', s)]
clip_list.sort(key=natural_sort_key)
concat_file = os.path.join(clip_folder, 'concat_list.txt')
with open(concat_file, 'w') as f:
for clip in clip_list:
#absolute path for ffmpeg
abs_path = os.path.abspath(clip).replace('\\', '/')
f.write(f"file '{abs_path}'\n")
#os.makedirs(final_video_folder, exist_ok=True)
output_path = f"{final_video_folder}\\final_video_{final_video_name}.mp4"
subprocess.run([
'ffmpeg', '-f', 'concat', '-safe', '0',
'-i', concat_file,
'-c', 'copy', # Copy streams without re-encoding
output_path
])
os.remove(concat_file)
#testing
if __name__ == '__main__':
script_path = 'script.md'
#make_audio_gtts(result[0], r"test\audio_clip_result[0].mp3")
#make_clip(r"slide_images\slide_3.png", r"test\audio_clip_overview.mp3")
#img_paths = extract_slides('slides.pdf', output_dir='slide_images')
#print(img_paths)
#concat_clips('audio_image_clips', 'final_video')