Migrate gpu flags to execution flags

2025-12-06 18:08:29 +00:00 · 2023-06-12 23:39:52 +02:00 · 2023-06-12 23:39:52 +02:00 · 623cbaa77d
commit 623cbaa77d
parent d18eb796e7
5 changed files with 42 additions and 70 deletions
--- a/roop/analyser.py
+++ b/roop/analyser.py
@ -9,7 +9,7 @@ def get_face_analyser() -> Any:
    global FACE_ANALYSER

    if FACE_ANALYSER is None:
-        FACE_ANALYSER = insightface.app.FaceAnalysis(name='buffalo_l', providers=roop.globals.providers)
+        FACE_ANALYSER = insightface.app.FaceAnalysis(name='buffalo_l', providers=roop.globals.execution_providers)
        FACE_ANALYSER.prepare(ctx_id=0, det_size=(640, 640))
    return FACE_ANALYSER

--- a/roop/core.py
+++ b/roop/core.py
@ -3,7 +3,7 @@
 import os
 import sys
 # single thread doubles performance of gpu-mode - needs to be set before torch import
-if any(arg.startswith('--gpu-vendor') for arg in sys.argv):
+if any(arg.startswith('--execution-provider') for arg in sys.argv):
    os.environ['OMP_NUM_THREADS'] = '1'
 # reduce tensorflow log level
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
@ -15,6 +15,7 @@ import shutil
 import argparse
 import psutil
 import torch
+import onnxruntime
 import tensorflow
 import multiprocessing
 from opennsfw2 import predict_video_frames, predict_image
@ -27,7 +28,7 @@ import roop.enhancer
 from roop.utilities import has_image_extension, is_image, is_video, detect_fps, create_video, extract_frames, get_temp_frame_paths, restore_audio, create_temp, move_temp, clean_temp
 from roop.analyser import get_one_face

-if 'ROCMExecutionProvider' in roop.globals.providers:
+if 'ROCMExecutionProvider' in roop.globals.execution_providers:
    del torch

 warnings.simplefilter(action='ignore', category=FutureWarning)
@ -48,9 +49,8 @@ def parse_args() -> None:
    parser.add_argument('--video-quality', help='adjust output video quality', dest='video_quality', type=int, default=18)
    parser.add_argument('--max-memory', help='maximum amount of RAM in GB to be used', dest='max_memory', type=int, default=suggest_max_memory())
    parser.add_argument('--cpu-cores', help='number of CPU cores to use', dest='cpu_cores', type=int, default=suggest_cpu_cores())
-    parser.add_argument('--execution-provider', help='execution provider', dest='execution_provider', default='cpu', choices=['cpu', 'directml'])
-    parser.add_argument('--gpu-threads', help='number of threads to be use for the GPU', dest='gpu_threads', type=int, default=suggest_gpu_threads())
-    parser.add_argument('--gpu-vendor', help='select your GPU vendor', dest='gpu_vendor', choices=['apple', 'amd', 'nvidia'])
+    parser.add_argument('--execution-provider', help='execution provider', dest='execution_provider', default=['CPUExecutionProvider'], choices=onnxruntime.get_available_providers(), nargs='+')
+    parser.add_argument('--execution-threads', help='number of threads to be use for the GPU', dest='execution_threads', type=int, default=suggest_execution_threads())

    args = parser.parse_known_args()[0]

@ -67,15 +67,8 @@ def parse_args() -> None:
    roop.globals.video_quality = args.video_quality
    roop.globals.max_memory = args.max_memory
    roop.globals.cpu_cores = args.cpu_cores
-    roop.globals.gpu_threads = args.gpu_threads
-
-    if args.execution_provider == 'directml':
-        roop.globals.providers = ['DmlExecutionProvider']
-        roop.globals.gpu_vendor = 'other'
-    if args.gpu_vendor:
-        roop.globals.gpu_vendor = args.gpu_vendor
-    else:
-        roop.globals.providers = ['CPUExecutionProvider']
+    roop.globals.execution_providers = args.execution_provider
+    roop.globals.execution_threads = args.execution_threads


 def suggest_max_memory() -> int:
@ -84,20 +77,20 @@ def suggest_max_memory() -> int:
    return 16


-def suggest_gpu_threads() -> int:
-    if 'DmlExecutionProvider' in roop.globals.providers:
-        return 1
-    if 'ROCMExecutionProvider' in roop.globals.providers:
-        return 2
-    return 8
-
-
 def suggest_cpu_cores() -> int:
    if platform.system().lower() == 'darwin':
        return 2
    return int(max(psutil.cpu_count() / 2, 1))


+def suggest_execution_threads() -> int:
+    if 'DmlExecutionProvider' in roop.globals.execution_providers:
+        return 1
+    if 'ROCMExecutionProvider' in roop.globals.execution_providers:
+        return 2
+    return 8
+
+
 def limit_resources() -> None:
    # prevent tensorflow memory leak
    gpus = tensorflow.config.experimental.list_physical_devices('GPU')
@ -117,7 +110,7 @@ def limit_resources() -> None:


 def release_resources() -> None:
-    if roop.globals.gpu_vendor == 'nvidia':
+    if 'CUDAExecutionProvider' in roop.globals.execution_providers:
        torch.cuda.empty_cache()


@ -125,40 +118,23 @@ def pre_check() -> None:
    if sys.version_info < (3, 9):
        quit('Python version is not supported - please upgrade to 3.9 or higher.')
    if not shutil.which('ffmpeg'):
-        quit('ffmpeg is not installed!')
-    if roop.globals.gpu_vendor == 'apple':
-        if 'CoreMLExecutionProvider' not in roop.globals.providers:
-            quit('You are using --gpu=apple flag but CoreML is not available or properly installed on your system.')
-    if roop.globals.gpu_vendor == 'amd':
-        if 'ROCMExecutionProvider' not in roop.globals.providers:
-            quit('You are using --gpu=amd flag but ROCM is not available or properly installed on your system.')
-    if roop.globals.gpu_vendor == 'nvidia':
-        if not torch.cuda.is_available():
-            quit('You are using --gpu=nvidia flag but CUDA is not available or properly installed on your system.')
-        if torch.version.cuda > '11.8':
-            quit(f'CUDA version {torch.version.cuda} is not supported - please downgrade to 11.8')
-        if torch.version.cuda < '11.4':
-            quit(f'CUDA version {torch.version.cuda} is not supported - please upgrade to 11.8')
-        if torch.backends.cudnn.version() < 8220:
-            quit(f'CUDNN version { torch.backends.cudnn.version()} is not supported - please upgrade to 8.9.1')
-        if torch.backends.cudnn.version() > 8910:
-            quit(f'CUDNN version { torch.backends.cudnn.version()} is not supported - please downgrade to 8.9.1')
+        quit('ffmpeg is not installed.')


 def conditional_process_video(source_path: str, temp_frame_paths: List[str], process_video) -> None:
    pool_amount = len(temp_frame_paths) // roop.globals.cpu_cores
-    if pool_amount > 2 and roop.globals.cpu_cores > 1 and roop.globals.gpu_vendor is None:
+    if pool_amount > 2 and roop.globals.cpu_cores > 1 and roop.globals.execution_providers == ['CPUExecutionProvider']:
        POOL = multiprocessing.Pool(roop.globals.cpu_cores, maxtasksperchild=1)
        pools = []
        for i in range(0, len(temp_frame_paths), pool_amount):
-            pool = POOL.apply_async(process_video, args=(source_path, temp_frame_paths[i:i + pool_amount], 'cpu'))
+            pool = POOL.apply_async(process_video, args=(source_path, temp_frame_paths[i:i + pool_amount], 'multi-processing'))
            pools.append(pool)
        for pool in pools:
            pool.get()
        POOL.close()
        POOL.join()
    else:
-         process_video(roop.globals.source_path, temp_frame_paths, 'gpu')
+         process_video(roop.globals.source_path, temp_frame_paths, 'multi-threading')


 def update_status(message: str) -> None:
@ -186,7 +162,7 @@ def start() -> None:
        if 'face-swapper' in roop.globals.frame_processors:
            update_status('Swapping in progress...')
            roop.swapper.process_image(roop.globals.source_path, roop.globals.target_path, roop.globals.output_path)
-        if roop.globals.gpu_vendor == 'nvidia' and 'face-enhancer' in roop.globals.frame_processors:
+        if 'CUDAExecutionProvider' in roop.globals.execution_providers and 'face-enhancer' in roop.globals.frame_processors:
            update_status('Enhancing in progress...')
            roop.enhancer.process_image(roop.globals.source_path, roop.globals.target_path, roop.globals.output_path)
        if is_image(roop.globals.target_path):
@ -207,9 +183,9 @@ def start() -> None:
        update_status('Swapping in progress...')
        conditional_process_video(roop.globals.source_path, temp_frame_paths, roop.swapper.process_video)
    release_resources()
-    # limit to one gpu thread
-    roop.globals.gpu_threads = 1
-    if roop.globals.gpu_vendor == 'nvidia' and 'face-enhancer' in roop.globals.frame_processors:
+    # limit to one execution thread
+    roop.globals.execution_threads = 1
+    if 'CUDAExecutionProvider' in roop.globals.execution_providers and 'face-enhancer' in roop.globals.frame_processors:
        update_status('Enhancing in progress...')
        conditional_process_video(roop.globals.source_path, temp_frame_paths, roop.enhancer.process_video)
    release_resources()
--- a/roop/enhancer.py
+++ b/roop/enhancer.py
@ -10,7 +10,7 @@ from codeformer.basicsr.utils import img2tensor, tensor2img
 import roop.globals
 from roop.utilities import conditional_download, resolve_relative_path

-if 'ROCMExecutionProvider' in roop.globals.providers:
+if 'ROCMExecutionProvider' in roop.globals.execution_providers:
    del torch

 CODE_FORMER = None
@ -137,11 +137,11 @@ def process_frames(source_path: str, frame_paths: list[str], progress=None) -> N

 def multi_process_frame(source_img, frame_paths, progress) -> None:
    threads = []
-    frames_per_thread = len(frame_paths) // roop.globals.gpu_threads
-    remaining_frames = len(frame_paths) % roop.globals.gpu_threads
+    frames_per_thread = len(frame_paths) // roop.globals.execution_threads
+    remaining_frames = len(frame_paths) % roop.globals.execution_threads
    start_index = 0
    # create threads by frames
-    for _ in range(roop.globals.gpu_threads):
+    for _ in range(roop.globals.execution_threads):
        end_index = start_index + frames_per_thread
        if remaining_frames > 0:
            end_index += 1
@ -160,9 +160,9 @@ def process_video(source_path: str, frame_paths: list[str], mode: str) -> None:
    progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'
    total = len(frame_paths)
    with tqdm(total=total, desc='Processing', unit='frame', dynamic_ncols=True, bar_format=progress_bar_format) as progress:
-        if mode == 'cpu':
+        if mode == 'multi-processing':
            progress.set_postfix({'mode': mode, 'cores': roop.globals.cpu_cores, 'memory': roop.globals.max_memory})
            process_frames(source_path, frame_paths, progress)
-        elif mode == 'gpu':
-            progress.set_postfix({'mode': mode, 'threads': roop.globals.gpu_threads, 'memory': roop.globals.max_memory})
+        elif mode == 'multi-threading':
+            progress.set_postfix({'mode': mode, 'threads': roop.globals.execution_threads, 'memory': roop.globals.max_memory})
            multi_process_frame(source_path, frame_paths, progress)
--- a/roop/globals.py
+++ b/roop/globals.py
@ -3,7 +3,7 @@ import onnxruntime
 source_path = None
 target_path = None
 output_path = None
-frame_processors = None
+frame_processors = []
 keep_fps = None
 keep_audio = None
 keep_frames = None
@ -12,11 +12,7 @@ video_encoder = None
 video_quality = None
 max_memory = None
 cpu_cores = None
-gpu_threads = None
-gpu_vendor = None
+execution_providers = []
+execution_threads = None
 headless = None
 log_level = 'error'
-providers = onnxruntime.get_available_providers()
-
-if 'TensorrtExecutionProvider' in providers:
-    providers.remove('TensorrtExecutionProvider')
--- a/roop/swapper.py
+++ b/roop/swapper.py
@ -23,7 +23,7 @@ def get_face_swapper() -> None:
    with THREAD_LOCK:
        if FACE_SWAPPER is None:
            model_path = resolve_relative_path('../models/inswapper_128.onnx')
-            FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.providers)
+            FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.execution_providers)
    return FACE_SWAPPER


@ -62,11 +62,11 @@ def process_frames(source_path: str, temp_frame_paths: List[str], progress=None)

 def multi_process_frame(source_path: str, temp_frame_paths: List[str], progress) -> None:
    threads = []
-    frames_per_thread = len(temp_frame_paths) // roop.globals.gpu_threads
-    remaining_frames = len(temp_frame_paths) % roop.globals.gpu_threads
+    frames_per_thread = len(temp_frame_paths) // roop.globals.execution_threads
+    remaining_frames = len(temp_frame_paths) % roop.globals.execution_threads
    start_index = 0
    # create threads by frames
-    for _ in range(roop.globals.gpu_threads):
+    for _ in range(roop.globals.execution_threads):
        end_index = start_index + frames_per_thread
        if remaining_frames > 0:
            end_index += 1
@ -92,9 +92,9 @@ def process_video(source_path: str, temp_frame_paths: List[str], mode: str) -> N
    progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'
    total = len(temp_frame_paths)
    with tqdm(total=total, desc='Processing', unit='frame', dynamic_ncols=True, bar_format=progress_bar_format) as progress:
-        if mode == 'cpu':
+        if mode == 'multi-processing':
            progress.set_postfix({'mode': mode, 'cores': roop.globals.cpu_cores, 'memory': roop.globals.max_memory})
            process_frames(source_path, temp_frame_paths, progress)
-        elif mode == 'gpu':
-            progress.set_postfix({'mode': mode, 'threads': roop.globals.gpu_threads, 'memory': roop.globals.max_memory})
+        elif mode == 'multi-threading':
+            progress.set_postfix({'mode': mode, 'threads': roop.globals.execution_threads, 'memory': roop.globals.max_memory})
            multi_process_frame(source_path, temp_frame_paths, progress)