mirror of
https://github.com/s0md3v/roop.git
synced 2025-12-06 18:08:29 +00:00
Migrate gpu flags to execution flags
This commit is contained in:
parent
d18eb796e7
commit
623cbaa77d
@ -9,7 +9,7 @@ def get_face_analyser() -> Any:
|
||||
global FACE_ANALYSER
|
||||
|
||||
if FACE_ANALYSER is None:
|
||||
FACE_ANALYSER = insightface.app.FaceAnalysis(name='buffalo_l', providers=roop.globals.providers)
|
||||
FACE_ANALYSER = insightface.app.FaceAnalysis(name='buffalo_l', providers=roop.globals.execution_providers)
|
||||
FACE_ANALYSER.prepare(ctx_id=0, det_size=(640, 640))
|
||||
return FACE_ANALYSER
|
||||
|
||||
|
||||
72
roop/core.py
72
roop/core.py
@ -3,7 +3,7 @@
|
||||
import os
|
||||
import sys
|
||||
# single thread doubles performance of gpu-mode - needs to be set before torch import
|
||||
if any(arg.startswith('--gpu-vendor') for arg in sys.argv):
|
||||
if any(arg.startswith('--execution-provider') for arg in sys.argv):
|
||||
os.environ['OMP_NUM_THREADS'] = '1'
|
||||
# reduce tensorflow log level
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
||||
@ -15,6 +15,7 @@ import shutil
|
||||
import argparse
|
||||
import psutil
|
||||
import torch
|
||||
import onnxruntime
|
||||
import tensorflow
|
||||
import multiprocessing
|
||||
from opennsfw2 import predict_video_frames, predict_image
|
||||
@ -27,7 +28,7 @@ import roop.enhancer
|
||||
from roop.utilities import has_image_extension, is_image, is_video, detect_fps, create_video, extract_frames, get_temp_frame_paths, restore_audio, create_temp, move_temp, clean_temp
|
||||
from roop.analyser import get_one_face
|
||||
|
||||
if 'ROCMExecutionProvider' in roop.globals.providers:
|
||||
if 'ROCMExecutionProvider' in roop.globals.execution_providers:
|
||||
del torch
|
||||
|
||||
warnings.simplefilter(action='ignore', category=FutureWarning)
|
||||
@ -48,9 +49,8 @@ def parse_args() -> None:
|
||||
parser.add_argument('--video-quality', help='adjust output video quality', dest='video_quality', type=int, default=18)
|
||||
parser.add_argument('--max-memory', help='maximum amount of RAM in GB to be used', dest='max_memory', type=int, default=suggest_max_memory())
|
||||
parser.add_argument('--cpu-cores', help='number of CPU cores to use', dest='cpu_cores', type=int, default=suggest_cpu_cores())
|
||||
parser.add_argument('--execution-provider', help='execution provider', dest='execution_provider', default='cpu', choices=['cpu', 'directml'])
|
||||
parser.add_argument('--gpu-threads', help='number of threads to be use for the GPU', dest='gpu_threads', type=int, default=suggest_gpu_threads())
|
||||
parser.add_argument('--gpu-vendor', help='select your GPU vendor', dest='gpu_vendor', choices=['apple', 'amd', 'nvidia'])
|
||||
parser.add_argument('--execution-provider', help='execution provider', dest='execution_provider', default=['CPUExecutionProvider'], choices=onnxruntime.get_available_providers(), nargs='+')
|
||||
parser.add_argument('--execution-threads', help='number of threads to be use for the GPU', dest='execution_threads', type=int, default=suggest_execution_threads())
|
||||
|
||||
args = parser.parse_known_args()[0]
|
||||
|
||||
@ -67,15 +67,8 @@ def parse_args() -> None:
|
||||
roop.globals.video_quality = args.video_quality
|
||||
roop.globals.max_memory = args.max_memory
|
||||
roop.globals.cpu_cores = args.cpu_cores
|
||||
roop.globals.gpu_threads = args.gpu_threads
|
||||
|
||||
if args.execution_provider == 'directml':
|
||||
roop.globals.providers = ['DmlExecutionProvider']
|
||||
roop.globals.gpu_vendor = 'other'
|
||||
if args.gpu_vendor:
|
||||
roop.globals.gpu_vendor = args.gpu_vendor
|
||||
else:
|
||||
roop.globals.providers = ['CPUExecutionProvider']
|
||||
roop.globals.execution_providers = args.execution_provider
|
||||
roop.globals.execution_threads = args.execution_threads
|
||||
|
||||
|
||||
def suggest_max_memory() -> int:
|
||||
@ -84,20 +77,20 @@ def suggest_max_memory() -> int:
|
||||
return 16
|
||||
|
||||
|
||||
def suggest_gpu_threads() -> int:
|
||||
if 'DmlExecutionProvider' in roop.globals.providers:
|
||||
return 1
|
||||
if 'ROCMExecutionProvider' in roop.globals.providers:
|
||||
return 2
|
||||
return 8
|
||||
|
||||
|
||||
def suggest_cpu_cores() -> int:
|
||||
if platform.system().lower() == 'darwin':
|
||||
return 2
|
||||
return int(max(psutil.cpu_count() / 2, 1))
|
||||
|
||||
|
||||
def suggest_execution_threads() -> int:
|
||||
if 'DmlExecutionProvider' in roop.globals.execution_providers:
|
||||
return 1
|
||||
if 'ROCMExecutionProvider' in roop.globals.execution_providers:
|
||||
return 2
|
||||
return 8
|
||||
|
||||
|
||||
def limit_resources() -> None:
|
||||
# prevent tensorflow memory leak
|
||||
gpus = tensorflow.config.experimental.list_physical_devices('GPU')
|
||||
@ -117,7 +110,7 @@ def limit_resources() -> None:
|
||||
|
||||
|
||||
def release_resources() -> None:
|
||||
if roop.globals.gpu_vendor == 'nvidia':
|
||||
if 'CUDAExecutionProvider' in roop.globals.execution_providers:
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
@ -125,40 +118,23 @@ def pre_check() -> None:
|
||||
if sys.version_info < (3, 9):
|
||||
quit('Python version is not supported - please upgrade to 3.9 or higher.')
|
||||
if not shutil.which('ffmpeg'):
|
||||
quit('ffmpeg is not installed!')
|
||||
if roop.globals.gpu_vendor == 'apple':
|
||||
if 'CoreMLExecutionProvider' not in roop.globals.providers:
|
||||
quit('You are using --gpu=apple flag but CoreML is not available or properly installed on your system.')
|
||||
if roop.globals.gpu_vendor == 'amd':
|
||||
if 'ROCMExecutionProvider' not in roop.globals.providers:
|
||||
quit('You are using --gpu=amd flag but ROCM is not available or properly installed on your system.')
|
||||
if roop.globals.gpu_vendor == 'nvidia':
|
||||
if not torch.cuda.is_available():
|
||||
quit('You are using --gpu=nvidia flag but CUDA is not available or properly installed on your system.')
|
||||
if torch.version.cuda > '11.8':
|
||||
quit(f'CUDA version {torch.version.cuda} is not supported - please downgrade to 11.8')
|
||||
if torch.version.cuda < '11.4':
|
||||
quit(f'CUDA version {torch.version.cuda} is not supported - please upgrade to 11.8')
|
||||
if torch.backends.cudnn.version() < 8220:
|
||||
quit(f'CUDNN version { torch.backends.cudnn.version()} is not supported - please upgrade to 8.9.1')
|
||||
if torch.backends.cudnn.version() > 8910:
|
||||
quit(f'CUDNN version { torch.backends.cudnn.version()} is not supported - please downgrade to 8.9.1')
|
||||
quit('ffmpeg is not installed.')
|
||||
|
||||
|
||||
def conditional_process_video(source_path: str, temp_frame_paths: List[str], process_video) -> None:
|
||||
pool_amount = len(temp_frame_paths) // roop.globals.cpu_cores
|
||||
if pool_amount > 2 and roop.globals.cpu_cores > 1 and roop.globals.gpu_vendor is None:
|
||||
if pool_amount > 2 and roop.globals.cpu_cores > 1 and roop.globals.execution_providers == ['CPUExecutionProvider']:
|
||||
POOL = multiprocessing.Pool(roop.globals.cpu_cores, maxtasksperchild=1)
|
||||
pools = []
|
||||
for i in range(0, len(temp_frame_paths), pool_amount):
|
||||
pool = POOL.apply_async(process_video, args=(source_path, temp_frame_paths[i:i + pool_amount], 'cpu'))
|
||||
pool = POOL.apply_async(process_video, args=(source_path, temp_frame_paths[i:i + pool_amount], 'multi-processing'))
|
||||
pools.append(pool)
|
||||
for pool in pools:
|
||||
pool.get()
|
||||
POOL.close()
|
||||
POOL.join()
|
||||
else:
|
||||
process_video(roop.globals.source_path, temp_frame_paths, 'gpu')
|
||||
process_video(roop.globals.source_path, temp_frame_paths, 'multi-threading')
|
||||
|
||||
|
||||
def update_status(message: str) -> None:
|
||||
@ -186,7 +162,7 @@ def start() -> None:
|
||||
if 'face-swapper' in roop.globals.frame_processors:
|
||||
update_status('Swapping in progress...')
|
||||
roop.swapper.process_image(roop.globals.source_path, roop.globals.target_path, roop.globals.output_path)
|
||||
if roop.globals.gpu_vendor == 'nvidia' and 'face-enhancer' in roop.globals.frame_processors:
|
||||
if 'CUDAExecutionProvider' in roop.globals.execution_providers and 'face-enhancer' in roop.globals.frame_processors:
|
||||
update_status('Enhancing in progress...')
|
||||
roop.enhancer.process_image(roop.globals.source_path, roop.globals.target_path, roop.globals.output_path)
|
||||
if is_image(roop.globals.target_path):
|
||||
@ -207,9 +183,9 @@ def start() -> None:
|
||||
update_status('Swapping in progress...')
|
||||
conditional_process_video(roop.globals.source_path, temp_frame_paths, roop.swapper.process_video)
|
||||
release_resources()
|
||||
# limit to one gpu thread
|
||||
roop.globals.gpu_threads = 1
|
||||
if roop.globals.gpu_vendor == 'nvidia' and 'face-enhancer' in roop.globals.frame_processors:
|
||||
# limit to one execution thread
|
||||
roop.globals.execution_threads = 1
|
||||
if 'CUDAExecutionProvider' in roop.globals.execution_providers and 'face-enhancer' in roop.globals.frame_processors:
|
||||
update_status('Enhancing in progress...')
|
||||
conditional_process_video(roop.globals.source_path, temp_frame_paths, roop.enhancer.process_video)
|
||||
release_resources()
|
||||
|
||||
@ -10,7 +10,7 @@ from codeformer.basicsr.utils import img2tensor, tensor2img
|
||||
import roop.globals
|
||||
from roop.utilities import conditional_download, resolve_relative_path
|
||||
|
||||
if 'ROCMExecutionProvider' in roop.globals.providers:
|
||||
if 'ROCMExecutionProvider' in roop.globals.execution_providers:
|
||||
del torch
|
||||
|
||||
CODE_FORMER = None
|
||||
@ -137,11 +137,11 @@ def process_frames(source_path: str, frame_paths: list[str], progress=None) -> N
|
||||
|
||||
def multi_process_frame(source_img, frame_paths, progress) -> None:
|
||||
threads = []
|
||||
frames_per_thread = len(frame_paths) // roop.globals.gpu_threads
|
||||
remaining_frames = len(frame_paths) % roop.globals.gpu_threads
|
||||
frames_per_thread = len(frame_paths) // roop.globals.execution_threads
|
||||
remaining_frames = len(frame_paths) % roop.globals.execution_threads
|
||||
start_index = 0
|
||||
# create threads by frames
|
||||
for _ in range(roop.globals.gpu_threads):
|
||||
for _ in range(roop.globals.execution_threads):
|
||||
end_index = start_index + frames_per_thread
|
||||
if remaining_frames > 0:
|
||||
end_index += 1
|
||||
@ -160,9 +160,9 @@ def process_video(source_path: str, frame_paths: list[str], mode: str) -> None:
|
||||
progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'
|
||||
total = len(frame_paths)
|
||||
with tqdm(total=total, desc='Processing', unit='frame', dynamic_ncols=True, bar_format=progress_bar_format) as progress:
|
||||
if mode == 'cpu':
|
||||
if mode == 'multi-processing':
|
||||
progress.set_postfix({'mode': mode, 'cores': roop.globals.cpu_cores, 'memory': roop.globals.max_memory})
|
||||
process_frames(source_path, frame_paths, progress)
|
||||
elif mode == 'gpu':
|
||||
progress.set_postfix({'mode': mode, 'threads': roop.globals.gpu_threads, 'memory': roop.globals.max_memory})
|
||||
elif mode == 'multi-threading':
|
||||
progress.set_postfix({'mode': mode, 'threads': roop.globals.execution_threads, 'memory': roop.globals.max_memory})
|
||||
multi_process_frame(source_path, frame_paths, progress)
|
||||
|
||||
@ -3,7 +3,7 @@ import onnxruntime
|
||||
source_path = None
|
||||
target_path = None
|
||||
output_path = None
|
||||
frame_processors = None
|
||||
frame_processors = []
|
||||
keep_fps = None
|
||||
keep_audio = None
|
||||
keep_frames = None
|
||||
@ -12,11 +12,7 @@ video_encoder = None
|
||||
video_quality = None
|
||||
max_memory = None
|
||||
cpu_cores = None
|
||||
gpu_threads = None
|
||||
gpu_vendor = None
|
||||
execution_providers = []
|
||||
execution_threads = None
|
||||
headless = None
|
||||
log_level = 'error'
|
||||
providers = onnxruntime.get_available_providers()
|
||||
|
||||
if 'TensorrtExecutionProvider' in providers:
|
||||
providers.remove('TensorrtExecutionProvider')
|
||||
|
||||
@ -23,7 +23,7 @@ def get_face_swapper() -> None:
|
||||
with THREAD_LOCK:
|
||||
if FACE_SWAPPER is None:
|
||||
model_path = resolve_relative_path('../models/inswapper_128.onnx')
|
||||
FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.providers)
|
||||
FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.execution_providers)
|
||||
return FACE_SWAPPER
|
||||
|
||||
|
||||
@ -62,11 +62,11 @@ def process_frames(source_path: str, temp_frame_paths: List[str], progress=None)
|
||||
|
||||
def multi_process_frame(source_path: str, temp_frame_paths: List[str], progress) -> None:
|
||||
threads = []
|
||||
frames_per_thread = len(temp_frame_paths) // roop.globals.gpu_threads
|
||||
remaining_frames = len(temp_frame_paths) % roop.globals.gpu_threads
|
||||
frames_per_thread = len(temp_frame_paths) // roop.globals.execution_threads
|
||||
remaining_frames = len(temp_frame_paths) % roop.globals.execution_threads
|
||||
start_index = 0
|
||||
# create threads by frames
|
||||
for _ in range(roop.globals.gpu_threads):
|
||||
for _ in range(roop.globals.execution_threads):
|
||||
end_index = start_index + frames_per_thread
|
||||
if remaining_frames > 0:
|
||||
end_index += 1
|
||||
@ -92,9 +92,9 @@ def process_video(source_path: str, temp_frame_paths: List[str], mode: str) -> N
|
||||
progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'
|
||||
total = len(temp_frame_paths)
|
||||
with tqdm(total=total, desc='Processing', unit='frame', dynamic_ncols=True, bar_format=progress_bar_format) as progress:
|
||||
if mode == 'cpu':
|
||||
if mode == 'multi-processing':
|
||||
progress.set_postfix({'mode': mode, 'cores': roop.globals.cpu_cores, 'memory': roop.globals.max_memory})
|
||||
process_frames(source_path, temp_frame_paths, progress)
|
||||
elif mode == 'gpu':
|
||||
progress.set_postfix({'mode': mode, 'threads': roop.globals.gpu_threads, 'memory': roop.globals.max_memory})
|
||||
elif mode == 'multi-threading':
|
||||
progress.set_postfix({'mode': mode, 'threads': roop.globals.execution_threads, 'memory': roop.globals.max_memory})
|
||||
multi_process_frame(source_path, temp_frame_paths, progress)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user