--single-face-in-many-faces

more digits for frame filename
2025-12-06 18:08:29 +00:00 · 2023-06-23 23:25:03 +03:00 · 2023-06-23 23:25:03 +03:00 · 3b2da8f08d
commit 3b2da8f08d
parent f72216a0fb
6 changed files with 83 additions and 26 deletions
--- a/roop/core.py
+++ b/roop/core.py
@ -37,6 +37,8 @@ def parse_args() -> None:
    program.add_argument('-s', '--source', help='select an source image', dest='source_path')
    program.add_argument('-t', '--target', help='select an target image or video', dest='target_path')
    program.add_argument('-o', '--output', help='select output file or directory', dest='output_path')
+    program.add_argument('--single-face-in-many-faces', help='use with --target-face', dest='single_face_in_many_faces', action='store_true', default=False)
+    program.add_argument('--target-face', help='select an target face image (better with the face from target image/video)', dest='target_face_path')
    program.add_argument('--frame-processor', help='frame processors (choices: face_swapper, face_enhancer, ...)', dest='frame_processor', default=['face_swapper'], nargs='+')
    program.add_argument('--keep-fps', help='keep original fps', dest='keep_fps', action='store_true', default=False)
    program.add_argument('--keep-audio', help='keep original audio', dest='keep_audio', action='store_true', default=True)
@ -54,6 +56,8 @@ def parse_args() -> None:
    roop.globals.source_path = args.source_path
    roop.globals.target_path = args.target_path
    roop.globals.output_path = normalize_output_path(roop.globals.source_path, roop.globals.target_path, args.output_path)
+    roop.globals.target_face_path = args.target_face_path
+    roop.globals.single_face_in_many_faces = args.single_face_in_many_faces
    roop.globals.frame_processors = args.frame_processor
    roop.globals.headless = args.source_path or args.target_path or args.output_path
    roop.globals.keep_fps = args.keep_fps
@ -165,7 +169,7 @@ def start() -> None:
    temp_frame_paths = get_temp_frame_paths(roop.globals.target_path)
    for frame_processor in get_frame_processors_modules(roop.globals.frame_processors):
        update_status('Progressing...', frame_processor.NAME)
-        frame_processor.process_video(roop.globals.source_path, temp_frame_paths)
+        frame_processor.process_video(roop.globals.source_path, roop.globals.target_face_path, temp_frame_paths)
        frame_processor.post_process()
        release_resources()
    # handles fps
--- a/roop/globals.py
+++ b/roop/globals.py
@ -1,6 +1,9 @@
 from typing import List

 source_path = None
+single_face_in_many_faces = None
+threshold_value = 0.2
+target_face_path = None
 target_path = None
 output_path = None
 frame_processors: List[str] = []
--- a/roop/processors/frame/core.py
+++ b/roop/processors/frame/core.py
@ -42,13 +42,13 @@ def get_frame_processors_modules(frame_processors: List[str]) -> List[ModuleType
    return FRAME_PROCESSORS_MODULES


-def multi_process_frame(source_path: str, temp_frame_paths: List[str], process_frames: Callable[[str, List[str], Any], None], update: Callable[[], None]) -> None:
+def multi_process_frame(source_path: str, target_face_path: str, temp_frame_paths: List[str], process_frames: Callable[[str, List[str], Any], None], update: Callable[[], None]) -> None:
    with ThreadPoolExecutor(max_workers=roop.globals.execution_threads) as executor:
        futures = []
        queue = create_queue(temp_frame_paths)
        queue_per_future = len(temp_frame_paths) // roop.globals.execution_threads
        while not queue.empty():
-            future = executor.submit(process_frames, source_path, pick_queue(queue, queue_per_future), update)
+            future = executor.submit(process_frames, source_path, target_face_path, pick_queue(queue, queue_per_future), update)
            futures.append(future)
        for future in as_completed(futures):
            future.result()
@ -69,11 +69,11 @@ def pick_queue(queue: Queue[str], queue_per_future: int) -> List[str]:
    return queues


-def process_video(source_path: str, frame_paths: list[str], process_frames: Callable[[str, List[str], Any], None]) -> None:
+def process_video(source_path: str, target_face_path: str, frame_paths: list[str], process_frames: Callable[[str, List[str], Any], None]) -> None:
    progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'
    total = len(frame_paths)
    with tqdm(total=total, desc='Processing', unit='frame', dynamic_ncols=True, bar_format=progress_bar_format) as progress:
-        multi_process_frame(source_path, frame_paths, process_frames, lambda: update_progress(progress))
+        multi_process_frame(source_path, target_face_path, frame_paths, process_frames, lambda: update_progress(progress))


 def update_progress(progress: Any = None) -> None:
--- a/roop/processors/frame/face_swapper.py
+++ b/roop/processors/frame/face_swapper.py
@ -2,6 +2,8 @@ from typing import Any, List, Callable
 import cv2
 import insightface
 import threading
+import numpy as np
+from numpy.linalg import norm

 import roop.globals
 import roop.processors.frame.core
@ -54,24 +56,38 @@ def swap_face(source_face: Face, target_face: Face, temp_frame: Frame) -> Frame:
    return get_face_swapper().get(temp_frame, target_face, source_face, paste_back=True)


-def process_frame(source_face: Face, temp_frame: Frame) -> Frame:
-    if roop.globals.many_faces:
+def is_similar(target_face: Face, found_face: Face) -> bool:
+    similarity = np.dot(target_face.embedding, found_face.embedding) / (norm(target_face.embedding) * norm(found_face.embedding))
+    if similarity > roop.globals.threshold_value:
+        return True
+    return False    
+
+
+def process_frame(source_face: Face, target_face: Face, temp_frame: Frame) -> Frame:
+    if roop.globals.single_face_in_many_faces:
        many_faces = get_many_faces(temp_frame)
        if many_faces:
-            for target_face in many_faces:
-                temp_frame = swap_face(source_face, target_face, temp_frame)
+            for found_face in many_faces:
+                if is_similar(target_face, found_face):
+                    temp_frame = swap_face(source_face, found_face, temp_frame)
+    elif roop.globals.many_faces:
+        many_faces = get_many_faces(temp_frame)
+        if many_faces:
+            for found_face in many_faces:
+                temp_frame = swap_face(source_face, found_face, temp_frame)
    else:
-        target_face = get_one_face(temp_frame)
-        if target_face:
-            temp_frame = swap_face(source_face, target_face, temp_frame)
+        found_face = get_one_face(temp_frame)
+        if found_face:
+            temp_frame = swap_face(source_face, found_face, temp_frame)
    return temp_frame


-def process_frames(source_path: str, temp_frame_paths: List[str], update: Callable[[], None]) -> None:
+def process_frames(source_path: str, target_face_path: str, temp_frame_paths: List[str], update: Callable[[], None]) -> None:
    source_face = get_one_face(cv2.imread(source_path))
+    target_face = get_one_face(cv2.imread(target_face_path))
    for temp_frame_path in temp_frame_paths:
        temp_frame = cv2.imread(temp_frame_path)
-        result = process_frame(source_face, temp_frame)
+        result = process_frame(source_face, target_face, temp_frame)
        cv2.imwrite(temp_frame_path, result)
        if update:
            update()
@ -84,5 +100,5 @@ def process_image(source_path: str, target_path: str, output_path: str) -> None:
    cv2.imwrite(output_path, result)


-def process_video(source_path: str, temp_frame_paths: List[str]) -> None:
-    roop.processors.frame.core.process_video(source_path, temp_frame_paths, process_frames)
+def process_video(source_path: str, target_face_path: str, temp_frame_paths: List[str]) -> None:
+    roop.processors.frame.core.process_video(source_path, target_face_path, temp_frame_paths, process_frames)
--- a/roop/ui.py
+++ b/roop/ui.py
@ -28,6 +28,7 @@ RECENT_DIRECTORY_OUTPUT = None
 preview_label = None
 preview_slider = None
 source_label = None
+target_face_label = None
 target_label = None
 status_label = None

@ -42,7 +43,7 @@ def init(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.CTk:


 def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.CTk:
-    global source_label, target_label, status_label
+    global source_label, target_face_label, target_label, status_label

    ctk.deactivate_automatic_dpi_awareness()
    ctk.set_appearance_mode('system')
@ -55,16 +56,22 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C
    root.protocol('WM_DELETE_WINDOW', lambda: destroy())

    source_label = ctk.CTkLabel(root, text=None)
-    source_label.place(relx=0.1, rely=0.1, relwidth=0.3, relheight=0.25)
+    source_label.place(relx=0.1, rely=0.1, relwidth=0.2, relheight=0.25)
+
+    target_face_label = ctk.CTkLabel(root, text=None)
+    target_face_label.place(relx=0.4, rely=0.1, relwidth=0.2, relheight=0.25)

    target_label = ctk.CTkLabel(root, text=None)
-    target_label.place(relx=0.6, rely=0.1, relwidth=0.3, relheight=0.25)
+    target_label.place(relx=0.7, rely=0.1, relwidth=0.2, relheight=0.25)

    source_button = ctk.CTkButton(root, text='Select a face', cursor='hand2', command=lambda: select_source_path())
-    source_button.place(relx=0.1, rely=0.4, relwidth=0.3, relheight=0.1)
+    source_button.place(relx=0.1, rely=0.4, relwidth=0.2, relheight=0.1)
+
+    target_face_button = ctk.CTkButton(root, text='Select a target face', cursor='hand2', command=lambda: select_target_face_path())
+    target_face_button.place(relx=0.4, rely=0.4, relwidth=0.2, relheight=0.1)

    target_button = ctk.CTkButton(root, text='Select a target', cursor='hand2', command=lambda: select_target_path())
-    target_button.place(relx=0.6, rely=0.4, relwidth=0.3, relheight=0.1)
+    target_button.place(relx=0.7, rely=0.4, relwidth=0.2, relheight=0.1)

    keep_fps_value = ctk.BooleanVar(value=roop.globals.keep_fps)
    keep_fps_checkbox = ctk.CTkSwitch(root, text='Keep fps', variable=keep_fps_value, cursor='hand2', command=lambda: setattr(roop.globals, 'keep_fps', not roop.globals.keep_fps))
@ -76,11 +83,19 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C

    keep_audio_value = ctk.BooleanVar(value=roop.globals.keep_audio)
    keep_audio_switch = ctk.CTkSwitch(root, text='Keep audio', variable=keep_audio_value, cursor='hand2', command=lambda: setattr(roop.globals, 'keep_audio', keep_audio_value.get()))
-    keep_audio_switch.place(relx=0.6, rely=0.6)
+    keep_audio_switch.place(relx=0.4, rely=0.6)

    many_faces_value = ctk.BooleanVar(value=roop.globals.many_faces)
    many_faces_switch = ctk.CTkSwitch(root, text='Many faces', variable=many_faces_value, cursor='hand2', command=lambda: setattr(roop.globals, 'many_faces', many_faces_value.get()))
-    many_faces_switch.place(relx=0.6, rely=0.65)
+    many_faces_switch.place(relx=0.4, rely=0.65)
+
+    single_face_in_many_faces_value = ctk.BooleanVar(value=roop.globals.single_face_in_many_faces)
+    single_face_in_many_faces_switch = ctk.CTkSwitch(root, text='Single face in many faces', variable=single_face_in_many_faces_value, cursor='hand2', command=lambda: setattr(roop.globals, 'single_face_in_many_faces', single_face_in_many_faces_value.get()))
+    single_face_in_many_faces_switch.place(relx=0.6, rely=0.6)
+
+    threshold_slider = ctk.CTkSlider(root, from_=0, to=1, command=lambda threshold_value: update_threshold(threshold_value))
+    threshold_slider.place(relx=0.6, rely=0.65, relwidth=0.3, relheight=0.025)
+    threshold_slider.set(roop.globals.threshold_value)

    start_button = ctk.CTkButton(root, text='Start', cursor='hand2', command=lambda: select_output_path(start))
    start_button.place(relx=0.15, rely=0.75, relwidth=0.2, relheight=0.05)
@ -160,6 +175,21 @@ def select_target_path() -> None:
        target_label.configure(image=None)


+def select_target_face_path() -> None:
+    global RECENT_DIRECTORY_SOURCE
+
+    PREVIEW.withdraw()
+    target_face_path = ctk.filedialog.askopenfilename(title='select an target face image', initialdir=RECENT_DIRECTORY_SOURCE)
+    if is_image(target_face_path):
+        roop.globals.target_face_path = target_face_path
+        RECENT_DIRECTORY_SOURCE = os.path.dirname(roop.globals.target_face_path)
+        image = render_image_preview(roop.globals.target_face_path, (200, 200))
+        target_face_label.configure(image=image)
+    else:
+        roop.globals.target_face_path = None
+        target_face_label.configure(image=None)
+
+
 def select_output_path(start: Callable[[], None]) -> None:
    global RECENT_DIRECTORY_OUTPUT

@ -196,6 +226,9 @@ def render_video_preview(video_path: str, size: Tuple[int, int], frame_number: i
    cv2.destroyAllWindows()


+def update_threshold(threshold: int = 0) -> None:
+    roop.globals.threshold_value = threshold
+
 def toggle_preview() -> None:
    if PREVIEW.state() == 'normal':
        PREVIEW.withdraw()
@ -223,7 +256,8 @@ def update_preview(frame_number: int = 0) -> None:
        for frame_processor in get_frame_processors_modules(roop.globals.frame_processors):
            temp_frame = frame_processor.process_frame(
                get_one_face(cv2.imread(roop.globals.source_path)),
-                temp_frame
+                get_one_face(cv2.imread(roop.globals.target_face_path)),
+                temp_frame,
            )
        image = Image.fromarray(cv2.cvtColor(temp_frame, cv2.COLOR_BGR2RGB))
        image = ImageOps.contain(image, (PREVIEW_MAX_WIDTH, PREVIEW_MAX_HEIGHT), Image.LANCZOS)
--- a/roop/utilities.py
+++ b/roop/utilities.py
@ -44,13 +44,13 @@ def detect_fps(target_path: str) -> float:

 def extract_frames(target_path: str) -> None:
    temp_directory_path = get_temp_directory_path(target_path)
-    run_ffmpeg(['-i', target_path, '-pix_fmt', 'rgb24', os.path.join(temp_directory_path, '%04d.png')])
+    run_ffmpeg(['-i', target_path, '-pix_fmt', 'rgb24', os.path.join(temp_directory_path, '%08d.png')])


 def create_video(target_path: str, fps: float = 30.0) -> None:
    temp_output_path = get_temp_output_path(target_path)
    temp_directory_path = get_temp_directory_path(target_path)
-    run_ffmpeg(['-r', str(fps), '-i', os.path.join(temp_directory_path, '%04d.png'), '-c:v', roop.globals.video_encoder, '-crf', str(roop.globals.video_quality), '-pix_fmt', 'yuv420p', '-vf', 'colorspace=bt709:iall=bt601-6-625:fast=1', '-y', temp_output_path])
+    run_ffmpeg(['-r', str(fps), '-i', os.path.join(temp_directory_path, '%08d.png'), '-c:v', roop.globals.video_encoder, '-crf', str(roop.globals.video_quality), '-pix_fmt', 'yuv420p', '-vf', 'colorspace=bt709:iall=bt601-6-625:fast=1', '-y', temp_output_path])


 def restore_audio(target_path: str, output_path: str) -> None: