Skip target audio (#656)

This commit is contained in:
Henry Ruhs 2023-06-29 12:43:05 +02:00 committed by GitHub
parent c6a138701a
commit eca4f0f127
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 20 additions and 19 deletions

View File

@ -36,14 +36,14 @@ options:
-t TARGET_PATH, --target TARGET_PATH select an target image or video
-o OUTPUT_PATH, --output OUTPUT_PATH select output file or directory
--frame-processor FRAME_PROCESSOR [FRAME_PROCESSOR ...] frame processors (choices: face_swapper, face_enhancer, ...)
--keep-fps keep original fps
--keep-audio keep original audio
--keep-fps keep target fps
--keep-frames keep temporary frames
--skip-audio skip target audio
--many-faces process every face
--video-encoder {libx264,libx265,libvpx-vp9} adjust output video encoder
--video-quality [0-51] adjust output video quality
--max-memory MAX_MEMORY maximum amount of RAM in GB
--execution-provider {cpu} [{cpu} ...] available execution provider (choices: cpu, ...)
--execution-provider {coreml,cpu} [{coreml,cpu} ...] available execution provider (choices: cpu, ...)
--execution-threads EXECUTION_THREADS number of execution threads
-v, --version show program's version number and exit
```

View File

@ -4,7 +4,7 @@ onnx==1.14.0
insightface==0.7.3
psutil==5.9.5
tk==0.1.0
customtkinter==5.1.3
customtkinter==5.2.0
torch==2.0.1
torchvision==0.15.2
onnxruntime==1.15.0

View File

@ -6,7 +6,7 @@ onnx==1.14.0
insightface==0.7.3
psutil==5.9.5
tk==0.1.0
customtkinter==5.1.3
customtkinter==5.2.0
pillow==9.5.0
torch==2.0.1+cu118; sys_platform != 'darwin'
torch==2.0.1; sys_platform == 'darwin'

View File

@ -38,10 +38,10 @@ def parse_args() -> None:
program.add_argument('-t', '--target', help='select an target image or video', dest='target_path')
program.add_argument('-o', '--output', help='select output file or directory', dest='output_path')
program.add_argument('--frame-processor', help='frame processors (choices: face_swapper, face_enhancer, ...)', dest='frame_processor', default=['face_swapper'], nargs='+')
program.add_argument('--keep-fps', help='keep original fps', dest='keep_fps', action='store_true', default=False)
program.add_argument('--keep-audio', help='keep original audio', dest='keep_audio', action='store_true', default=True)
program.add_argument('--keep-frames', help='keep temporary frames', dest='keep_frames', action='store_true', default=False)
program.add_argument('--many-faces', help='process every face', dest='many_faces', action='store_true', default=False)
program.add_argument('--keep-fps', help='keep target fps', dest='keep_fps', action='store_true')
program.add_argument('--keep-frames', help='keep temporary frames', dest='keep_frames', action='store_true')
program.add_argument('--skip-audio', help='skip target audio', dest='skip_audio', action='store_true')
program.add_argument('--many-faces', help='process every face', dest='many_faces', action='store_true')
program.add_argument('--video-encoder', help='adjust output video encoder', dest='video_encoder', default='libx264', choices=['libx264', 'libx265', 'libvpx-vp9'])
program.add_argument('--video-quality', help='adjust output video quality', dest='video_quality', type=int, default=18, choices=range(52), metavar='[0-51]')
program.add_argument('--max-memory', help='maximum amount of RAM in GB', dest='max_memory', type=int, default=suggest_max_memory())
@ -57,8 +57,8 @@ def parse_args() -> None:
roop.globals.frame_processors = args.frame_processor
roop.globals.headless = args.source_path or args.target_path or args.output_path
roop.globals.keep_fps = args.keep_fps
roop.globals.keep_audio = args.keep_audio
roop.globals.keep_frames = args.keep_frames
roop.globals.skip_audio = args.skip_audio
roop.globals.many_faces = args.many_faces
roop.globals.video_encoder = args.video_encoder
roop.globals.video_quality = args.video_quality
@ -178,14 +178,15 @@ def start() -> None:
update_status('Creating video with 30.0 fps...')
create_video(roop.globals.target_path)
# handle audio
if roop.globals.keep_audio:
if roop.globals.skip_audio:
move_temp(roop.globals.target_path, roop.globals.output_path)
update_status('Skipping audio...')
else:
if roop.globals.keep_fps:
update_status('Restoring audio...')
else:
update_status('Restoring audio might cause issues as fps are not kept...')
restore_audio(roop.globals.target_path, roop.globals.output_path)
else:
move_temp(roop.globals.target_path, roop.globals.output_path)
# clean and validate
clean_temp(roop.globals.target_path)
if is_video(roop.globals.target_path):

View File

@ -5,8 +5,8 @@ target_path = None
output_path = None
frame_processors: List[str] = []
keep_fps = None
keep_audio = None
keep_frames = None
skip_audio = None
many_faces = None
video_encoder = None
video_quality = None

View File

@ -67,16 +67,16 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C
target_button.place(relx=0.6, rely=0.4, relwidth=0.3, relheight=0.1)
keep_fps_value = ctk.BooleanVar(value=roop.globals.keep_fps)
keep_fps_checkbox = ctk.CTkSwitch(root, text='Keep fps', variable=keep_fps_value, cursor='hand2', command=lambda: setattr(roop.globals, 'keep_fps', not roop.globals.keep_fps))
keep_fps_checkbox = ctk.CTkSwitch(root, text='Keep target fps', variable=keep_fps_value, cursor='hand2', command=lambda: setattr(roop.globals, 'keep_fps', not roop.globals.keep_fps))
keep_fps_checkbox.place(relx=0.1, rely=0.6)
keep_frames_value = ctk.BooleanVar(value=roop.globals.keep_frames)
keep_frames_switch = ctk.CTkSwitch(root, text='Keep frames', variable=keep_frames_value, cursor='hand2', command=lambda: setattr(roop.globals, 'keep_frames', keep_frames_value.get()))
keep_frames_switch = ctk.CTkSwitch(root, text='Keep temporary frames', variable=keep_frames_value, cursor='hand2', command=lambda: setattr(roop.globals, 'keep_frames', keep_frames_value.get()))
keep_frames_switch.place(relx=0.1, rely=0.65)
keep_audio_value = ctk.BooleanVar(value=roop.globals.keep_audio)
keep_audio_switch = ctk.CTkSwitch(root, text='Keep audio', variable=keep_audio_value, cursor='hand2', command=lambda: setattr(roop.globals, 'keep_audio', keep_audio_value.get()))
keep_audio_switch.place(relx=0.6, rely=0.6)
skip_audio_value = ctk.BooleanVar(value=roop.globals.skip_audio)
skip_audio_switch = ctk.CTkSwitch(root, text='Skip target audio', variable=skip_audio_value, cursor='hand2', command=lambda: setattr(roop.globals, 'skip_audio', skip_audio_value.get()))
skip_audio_switch.place(relx=0.6, rely=0.6)
many_faces_value = ctk.BooleanVar(value=roop.globals.many_faces)
many_faces_switch = ctk.CTkSwitch(root, text='Many faces', variable=many_faces_value, cursor='hand2', command=lambda: setattr(roop.globals, 'many_faces', many_faces_value.get()))