Migrate gpu flags to execution flags

This commit is contained in:
henryruhs 2023-06-12 23:39:52 +02:00
parent d18eb796e7
commit 623cbaa77d
5 changed files with 42 additions and 70 deletions

View File

@ -9,7 +9,7 @@ def get_face_analyser() -> Any:
global FACE_ANALYSER
if FACE_ANALYSER is None:
FACE_ANALYSER = insightface.app.FaceAnalysis(name='buffalo_l', providers=roop.globals.providers)
FACE_ANALYSER = insightface.app.FaceAnalysis(name='buffalo_l', providers=roop.globals.execution_providers)
FACE_ANALYSER.prepare(ctx_id=0, det_size=(640, 640))
return FACE_ANALYSER

View File

@ -3,7 +3,7 @@
import os
import sys
# single thread doubles performance of gpu-mode - needs to be set before torch import
if any(arg.startswith('--gpu-vendor') for arg in sys.argv):
if any(arg.startswith('--execution-provider') for arg in sys.argv):
os.environ['OMP_NUM_THREADS'] = '1'
# reduce tensorflow log level
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
@ -15,6 +15,7 @@ import shutil
import argparse
import psutil
import torch
import onnxruntime
import tensorflow
import multiprocessing
from opennsfw2 import predict_video_frames, predict_image
@ -27,7 +28,7 @@ import roop.enhancer
from roop.utilities import has_image_extension, is_image, is_video, detect_fps, create_video, extract_frames, get_temp_frame_paths, restore_audio, create_temp, move_temp, clean_temp
from roop.analyser import get_one_face
if 'ROCMExecutionProvider' in roop.globals.providers:
if 'ROCMExecutionProvider' in roop.globals.execution_providers:
del torch
warnings.simplefilter(action='ignore', category=FutureWarning)
@ -48,9 +49,8 @@ def parse_args() -> None:
parser.add_argument('--video-quality', help='adjust output video quality', dest='video_quality', type=int, default=18)
parser.add_argument('--max-memory', help='maximum amount of RAM in GB to be used', dest='max_memory', type=int, default=suggest_max_memory())
parser.add_argument('--cpu-cores', help='number of CPU cores to use', dest='cpu_cores', type=int, default=suggest_cpu_cores())
parser.add_argument('--execution-provider', help='execution provider', dest='execution_provider', default='cpu', choices=['cpu', 'directml'])
parser.add_argument('--gpu-threads', help='number of threads to be use for the GPU', dest='gpu_threads', type=int, default=suggest_gpu_threads())
parser.add_argument('--gpu-vendor', help='select your GPU vendor', dest='gpu_vendor', choices=['apple', 'amd', 'nvidia'])
parser.add_argument('--execution-provider', help='execution provider', dest='execution_provider', default=['CPUExecutionProvider'], choices=onnxruntime.get_available_providers(), nargs='+')
parser.add_argument('--execution-threads', help='number of threads to be use for the GPU', dest='execution_threads', type=int, default=suggest_execution_threads())
args = parser.parse_known_args()[0]
@ -67,15 +67,8 @@ def parse_args() -> None:
roop.globals.video_quality = args.video_quality
roop.globals.max_memory = args.max_memory
roop.globals.cpu_cores = args.cpu_cores
roop.globals.gpu_threads = args.gpu_threads
if args.execution_provider == 'directml':
roop.globals.providers = ['DmlExecutionProvider']
roop.globals.gpu_vendor = 'other'
if args.gpu_vendor:
roop.globals.gpu_vendor = args.gpu_vendor
else:
roop.globals.providers = ['CPUExecutionProvider']
roop.globals.execution_providers = args.execution_provider
roop.globals.execution_threads = args.execution_threads
def suggest_max_memory() -> int:
@ -84,20 +77,20 @@ def suggest_max_memory() -> int:
return 16
def suggest_gpu_threads() -> int:
if 'DmlExecutionProvider' in roop.globals.providers:
return 1
if 'ROCMExecutionProvider' in roop.globals.providers:
return 2
return 8
def suggest_cpu_cores() -> int:
if platform.system().lower() == 'darwin':
return 2
return int(max(psutil.cpu_count() / 2, 1))
def suggest_execution_threads() -> int:
if 'DmlExecutionProvider' in roop.globals.execution_providers:
return 1
if 'ROCMExecutionProvider' in roop.globals.execution_providers:
return 2
return 8
def limit_resources() -> None:
# prevent tensorflow memory leak
gpus = tensorflow.config.experimental.list_physical_devices('GPU')
@ -117,7 +110,7 @@ def limit_resources() -> None:
def release_resources() -> None:
if roop.globals.gpu_vendor == 'nvidia':
if 'CUDAExecutionProvider' in roop.globals.execution_providers:
torch.cuda.empty_cache()
@ -125,40 +118,23 @@ def pre_check() -> None:
if sys.version_info < (3, 9):
quit('Python version is not supported - please upgrade to 3.9 or higher.')
if not shutil.which('ffmpeg'):
quit('ffmpeg is not installed!')
if roop.globals.gpu_vendor == 'apple':
if 'CoreMLExecutionProvider' not in roop.globals.providers:
quit('You are using --gpu=apple flag but CoreML is not available or properly installed on your system.')
if roop.globals.gpu_vendor == 'amd':
if 'ROCMExecutionProvider' not in roop.globals.providers:
quit('You are using --gpu=amd flag but ROCM is not available or properly installed on your system.')
if roop.globals.gpu_vendor == 'nvidia':
if not torch.cuda.is_available():
quit('You are using --gpu=nvidia flag but CUDA is not available or properly installed on your system.')
if torch.version.cuda > '11.8':
quit(f'CUDA version {torch.version.cuda} is not supported - please downgrade to 11.8')
if torch.version.cuda < '11.4':
quit(f'CUDA version {torch.version.cuda} is not supported - please upgrade to 11.8')
if torch.backends.cudnn.version() < 8220:
quit(f'CUDNN version { torch.backends.cudnn.version()} is not supported - please upgrade to 8.9.1')
if torch.backends.cudnn.version() > 8910:
quit(f'CUDNN version { torch.backends.cudnn.version()} is not supported - please downgrade to 8.9.1')
quit('ffmpeg is not installed.')
def conditional_process_video(source_path: str, temp_frame_paths: List[str], process_video) -> None:
pool_amount = len(temp_frame_paths) // roop.globals.cpu_cores
if pool_amount > 2 and roop.globals.cpu_cores > 1 and roop.globals.gpu_vendor is None:
if pool_amount > 2 and roop.globals.cpu_cores > 1 and roop.globals.execution_providers == ['CPUExecutionProvider']:
POOL = multiprocessing.Pool(roop.globals.cpu_cores, maxtasksperchild=1)
pools = []
for i in range(0, len(temp_frame_paths), pool_amount):
pool = POOL.apply_async(process_video, args=(source_path, temp_frame_paths[i:i + pool_amount], 'cpu'))
pool = POOL.apply_async(process_video, args=(source_path, temp_frame_paths[i:i + pool_amount], 'multi-processing'))
pools.append(pool)
for pool in pools:
pool.get()
POOL.close()
POOL.join()
else:
process_video(roop.globals.source_path, temp_frame_paths, 'gpu')
process_video(roop.globals.source_path, temp_frame_paths, 'multi-threading')
def update_status(message: str) -> None:
@ -186,7 +162,7 @@ def start() -> None:
if 'face-swapper' in roop.globals.frame_processors:
update_status('Swapping in progress...')
roop.swapper.process_image(roop.globals.source_path, roop.globals.target_path, roop.globals.output_path)
if roop.globals.gpu_vendor == 'nvidia' and 'face-enhancer' in roop.globals.frame_processors:
if 'CUDAExecutionProvider' in roop.globals.execution_providers and 'face-enhancer' in roop.globals.frame_processors:
update_status('Enhancing in progress...')
roop.enhancer.process_image(roop.globals.source_path, roop.globals.target_path, roop.globals.output_path)
if is_image(roop.globals.target_path):
@ -207,9 +183,9 @@ def start() -> None:
update_status('Swapping in progress...')
conditional_process_video(roop.globals.source_path, temp_frame_paths, roop.swapper.process_video)
release_resources()
# limit to one gpu thread
roop.globals.gpu_threads = 1
if roop.globals.gpu_vendor == 'nvidia' and 'face-enhancer' in roop.globals.frame_processors:
# limit to one execution thread
roop.globals.execution_threads = 1
if 'CUDAExecutionProvider' in roop.globals.execution_providers and 'face-enhancer' in roop.globals.frame_processors:
update_status('Enhancing in progress...')
conditional_process_video(roop.globals.source_path, temp_frame_paths, roop.enhancer.process_video)
release_resources()

View File

@ -10,7 +10,7 @@ from codeformer.basicsr.utils import img2tensor, tensor2img
import roop.globals
from roop.utilities import conditional_download, resolve_relative_path
if 'ROCMExecutionProvider' in roop.globals.providers:
if 'ROCMExecutionProvider' in roop.globals.execution_providers:
del torch
CODE_FORMER = None
@ -137,11 +137,11 @@ def process_frames(source_path: str, frame_paths: list[str], progress=None) -> N
def multi_process_frame(source_img, frame_paths, progress) -> None:
threads = []
frames_per_thread = len(frame_paths) // roop.globals.gpu_threads
remaining_frames = len(frame_paths) % roop.globals.gpu_threads
frames_per_thread = len(frame_paths) // roop.globals.execution_threads
remaining_frames = len(frame_paths) % roop.globals.execution_threads
start_index = 0
# create threads by frames
for _ in range(roop.globals.gpu_threads):
for _ in range(roop.globals.execution_threads):
end_index = start_index + frames_per_thread
if remaining_frames > 0:
end_index += 1
@ -160,9 +160,9 @@ def process_video(source_path: str, frame_paths: list[str], mode: str) -> None:
progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'
total = len(frame_paths)
with tqdm(total=total, desc='Processing', unit='frame', dynamic_ncols=True, bar_format=progress_bar_format) as progress:
if mode == 'cpu':
if mode == 'multi-processing':
progress.set_postfix({'mode': mode, 'cores': roop.globals.cpu_cores, 'memory': roop.globals.max_memory})
process_frames(source_path, frame_paths, progress)
elif mode == 'gpu':
progress.set_postfix({'mode': mode, 'threads': roop.globals.gpu_threads, 'memory': roop.globals.max_memory})
elif mode == 'multi-threading':
progress.set_postfix({'mode': mode, 'threads': roop.globals.execution_threads, 'memory': roop.globals.max_memory})
multi_process_frame(source_path, frame_paths, progress)

View File

@ -3,7 +3,7 @@ import onnxruntime
source_path = None
target_path = None
output_path = None
frame_processors = None
frame_processors = []
keep_fps = None
keep_audio = None
keep_frames = None
@ -12,11 +12,7 @@ video_encoder = None
video_quality = None
max_memory = None
cpu_cores = None
gpu_threads = None
gpu_vendor = None
execution_providers = []
execution_threads = None
headless = None
log_level = 'error'
providers = onnxruntime.get_available_providers()
if 'TensorrtExecutionProvider' in providers:
providers.remove('TensorrtExecutionProvider')

View File

@ -23,7 +23,7 @@ def get_face_swapper() -> None:
with THREAD_LOCK:
if FACE_SWAPPER is None:
model_path = resolve_relative_path('../models/inswapper_128.onnx')
FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.providers)
FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.execution_providers)
return FACE_SWAPPER
@ -62,11 +62,11 @@ def process_frames(source_path: str, temp_frame_paths: List[str], progress=None)
def multi_process_frame(source_path: str, temp_frame_paths: List[str], progress) -> None:
threads = []
frames_per_thread = len(temp_frame_paths) // roop.globals.gpu_threads
remaining_frames = len(temp_frame_paths) % roop.globals.gpu_threads
frames_per_thread = len(temp_frame_paths) // roop.globals.execution_threads
remaining_frames = len(temp_frame_paths) % roop.globals.execution_threads
start_index = 0
# create threads by frames
for _ in range(roop.globals.gpu_threads):
for _ in range(roop.globals.execution_threads):
end_index = start_index + frames_per_thread
if remaining_frames > 0:
end_index += 1
@ -92,9 +92,9 @@ def process_video(source_path: str, temp_frame_paths: List[str], mode: str) -> N
progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'
total = len(temp_frame_paths)
with tqdm(total=total, desc='Processing', unit='frame', dynamic_ncols=True, bar_format=progress_bar_format) as progress:
if mode == 'cpu':
if mode == 'multi-processing':
progress.set_postfix({'mode': mode, 'cores': roop.globals.cpu_cores, 'memory': roop.globals.max_memory})
process_frames(source_path, temp_frame_paths, progress)
elif mode == 'gpu':
progress.set_postfix({'mode': mode, 'threads': roop.globals.gpu_threads, 'memory': roop.globals.max_memory})
elif mode == 'multi-threading':
progress.set_postfix({'mode': mode, 'threads': roop.globals.execution_threads, 'memory': roop.globals.max_memory})
multi_process_frame(source_path, temp_frame_paths, progress)