From 3b2da8f08dadef7838534aa5a3bfc310a414b51a Mon Sep 17 00:00:00 2001
From: Jester69 <jestersmail@mail.ru>
Date: Fri, 23 Jun 2023 23:25:03 +0300
Subject: [PATCH] --single-face-in-many-faces more digits for frame filename

---
 roop/core.py                          |  6 +++-
 roop/globals.py                       |  3 ++
 roop/processors/frame/core.py         |  8 ++---
 roop/processors/frame/face_swapper.py | 38 ++++++++++++++------
 roop/ui.py                            | 50 ++++++++++++++++++++++-----
 roop/utilities.py                     |  4 +--
 6 files changed, 83 insertions(+), 26 deletions(-)

diff --git a/roop/core.py b/roop/core.py
index b70d854..6faa979 100755
--- a/roop/core.py
+++ b/roop/core.py
@@ -37,6 +37,8 @@ def parse_args() -> None:
     program.add_argument('-s', '--source', help='select an source image', dest='source_path')
     program.add_argument('-t', '--target', help='select an target image or video', dest='target_path')
     program.add_argument('-o', '--output', help='select output file or directory', dest='output_path')
+    program.add_argument('--single-face-in-many-faces', help='use with --target-face', dest='single_face_in_many_faces', action='store_true', default=False)
+    program.add_argument('--target-face', help='select an target face image (better with the face from target image/video)', dest='target_face_path')
     program.add_argument('--frame-processor', help='frame processors (choices: face_swapper, face_enhancer, ...)', dest='frame_processor', default=['face_swapper'], nargs='+')
     program.add_argument('--keep-fps', help='keep original fps', dest='keep_fps', action='store_true', default=False)
     program.add_argument('--keep-audio', help='keep original audio', dest='keep_audio', action='store_true', default=True)
@@ -54,6 +56,8 @@ def parse_args() -> None:
     roop.globals.source_path = args.source_path
     roop.globals.target_path = args.target_path
     roop.globals.output_path = normalize_output_path(roop.globals.source_path, roop.globals.target_path, args.output_path)
+    roop.globals.target_face_path = args.target_face_path
+    roop.globals.single_face_in_many_faces = args.single_face_in_many_faces
     roop.globals.frame_processors = args.frame_processor
     roop.globals.headless = args.source_path or args.target_path or args.output_path
     roop.globals.keep_fps = args.keep_fps
@@ -165,7 +169,7 @@ def start() -> None:
     temp_frame_paths = get_temp_frame_paths(roop.globals.target_path)
     for frame_processor in get_frame_processors_modules(roop.globals.frame_processors):
         update_status('Progressing...', frame_processor.NAME)
-        frame_processor.process_video(roop.globals.source_path, temp_frame_paths)
+        frame_processor.process_video(roop.globals.source_path, roop.globals.target_face_path, temp_frame_paths)
         frame_processor.post_process()
         release_resources()
     # handles fps
diff --git a/roop/globals.py b/roop/globals.py
index 77fd391..59651ff 100644
--- a/roop/globals.py
+++ b/roop/globals.py
@@ -1,6 +1,9 @@
 from typing import List
 
 source_path = None
+single_face_in_many_faces = None
+threshold_value = 0.2
+target_face_path = None
 target_path = None
 output_path = None
 frame_processors: List[str] = []
diff --git a/roop/processors/frame/core.py b/roop/processors/frame/core.py
index c225f9d..efb94f0 100644
--- a/roop/processors/frame/core.py
+++ b/roop/processors/frame/core.py
@@ -42,13 +42,13 @@ def get_frame_processors_modules(frame_processors: List[str]) -> List[ModuleType
     return FRAME_PROCESSORS_MODULES
 
 
-def multi_process_frame(source_path: str, temp_frame_paths: List[str], process_frames: Callable[[str, List[str], Any], None], update: Callable[[], None]) -> None:
+def multi_process_frame(source_path: str, target_face_path: str, temp_frame_paths: List[str], process_frames: Callable[[str, List[str], Any], None], update: Callable[[], None]) -> None:
     with ThreadPoolExecutor(max_workers=roop.globals.execution_threads) as executor:
         futures = []
         queue = create_queue(temp_frame_paths)
         queue_per_future = len(temp_frame_paths) // roop.globals.execution_threads
         while not queue.empty():
-            future = executor.submit(process_frames, source_path, pick_queue(queue, queue_per_future), update)
+            future = executor.submit(process_frames, source_path, target_face_path, pick_queue(queue, queue_per_future), update)
             futures.append(future)
         for future in as_completed(futures):
             future.result()
@@ -69,11 +69,11 @@ def pick_queue(queue: Queue[str], queue_per_future: int) -> List[str]:
     return queues
 
 
-def process_video(source_path: str, frame_paths: list[str], process_frames: Callable[[str, List[str], Any], None]) -> None:
+def process_video(source_path: str, target_face_path: str, frame_paths: list[str], process_frames: Callable[[str, List[str], Any], None]) -> None:
     progress_bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'
     total = len(frame_paths)
     with tqdm(total=total, desc='Processing', unit='frame', dynamic_ncols=True, bar_format=progress_bar_format) as progress:
-        multi_process_frame(source_path, frame_paths, process_frames, lambda: update_progress(progress))
+        multi_process_frame(source_path, target_face_path, frame_paths, process_frames, lambda: update_progress(progress))
 
 
 def update_progress(progress: Any = None) -> None:
diff --git a/roop/processors/frame/face_swapper.py b/roop/processors/frame/face_swapper.py
index c53b5b8..9f6375e 100644
--- a/roop/processors/frame/face_swapper.py
+++ b/roop/processors/frame/face_swapper.py
@@ -2,6 +2,8 @@ from typing import Any, List, Callable
 import cv2
 import insightface
 import threading
+import numpy as np
+from numpy.linalg import norm
 
 import roop.globals
 import roop.processors.frame.core
@@ -54,24 +56,38 @@ def swap_face(source_face: Face, target_face: Face, temp_frame: Frame) -> Frame:
     return get_face_swapper().get(temp_frame, target_face, source_face, paste_back=True)
 
 
-def process_frame(source_face: Face, temp_frame: Frame) -> Frame:
-    if roop.globals.many_faces:
+def is_similar(target_face: Face, found_face: Face) -> bool:
+    similarity = np.dot(target_face.embedding, found_face.embedding) / (norm(target_face.embedding) * norm(found_face.embedding))
+    if similarity > roop.globals.threshold_value:
+        return True
+    return False    
+
+
+def process_frame(source_face: Face, target_face: Face, temp_frame: Frame) -> Frame:
+    if roop.globals.single_face_in_many_faces:
         many_faces = get_many_faces(temp_frame)
         if many_faces:
-            for target_face in many_faces:
-                temp_frame = swap_face(source_face, target_face, temp_frame)
+            for found_face in many_faces:
+                if is_similar(target_face, found_face):
+                    temp_frame = swap_face(source_face, found_face, temp_frame)
+    elif roop.globals.many_faces:
+        many_faces = get_many_faces(temp_frame)
+        if many_faces:
+            for found_face in many_faces:
+                temp_frame = swap_face(source_face, found_face, temp_frame)
     else:
-        target_face = get_one_face(temp_frame)
-        if target_face:
-            temp_frame = swap_face(source_face, target_face, temp_frame)
+        found_face = get_one_face(temp_frame)
+        if found_face:
+            temp_frame = swap_face(source_face, found_face, temp_frame)
     return temp_frame
 
 
-def process_frames(source_path: str, temp_frame_paths: List[str], update: Callable[[], None]) -> None:
+def process_frames(source_path: str, target_face_path: str, temp_frame_paths: List[str], update: Callable[[], None]) -> None:
     source_face = get_one_face(cv2.imread(source_path))
+    target_face = get_one_face(cv2.imread(target_face_path))
     for temp_frame_path in temp_frame_paths:
         temp_frame = cv2.imread(temp_frame_path)
-        result = process_frame(source_face, temp_frame)
+        result = process_frame(source_face, target_face, temp_frame)
         cv2.imwrite(temp_frame_path, result)
         if update:
             update()
@@ -84,5 +100,5 @@ def process_image(source_path: str, target_path: str, output_path: str) -> None:
     cv2.imwrite(output_path, result)
 
 
-def process_video(source_path: str, temp_frame_paths: List[str]) -> None:
-    roop.processors.frame.core.process_video(source_path, temp_frame_paths, process_frames)
+def process_video(source_path: str, target_face_path: str, temp_frame_paths: List[str]) -> None:
+    roop.processors.frame.core.process_video(source_path, target_face_path, temp_frame_paths, process_frames)
diff --git a/roop/ui.py b/roop/ui.py
index ba693da..320fab4 100644
--- a/roop/ui.py
+++ b/roop/ui.py
@@ -28,6 +28,7 @@ RECENT_DIRECTORY_OUTPUT = None
 preview_label = None
 preview_slider = None
 source_label = None
+target_face_label = None
 target_label = None
 status_label = None
 
@@ -42,7 +43,7 @@ def init(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.CTk:
 
 
 def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.CTk:
-    global source_label, target_label, status_label
+    global source_label, target_face_label, target_label, status_label
 
     ctk.deactivate_automatic_dpi_awareness()
     ctk.set_appearance_mode('system')
@@ -55,16 +56,22 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C
     root.protocol('WM_DELETE_WINDOW', lambda: destroy())
 
     source_label = ctk.CTkLabel(root, text=None)
-    source_label.place(relx=0.1, rely=0.1, relwidth=0.3, relheight=0.25)
+    source_label.place(relx=0.1, rely=0.1, relwidth=0.2, relheight=0.25)
+
+    target_face_label = ctk.CTkLabel(root, text=None)
+    target_face_label.place(relx=0.4, rely=0.1, relwidth=0.2, relheight=0.25)
 
     target_label = ctk.CTkLabel(root, text=None)
-    target_label.place(relx=0.6, rely=0.1, relwidth=0.3, relheight=0.25)
+    target_label.place(relx=0.7, rely=0.1, relwidth=0.2, relheight=0.25)
 
     source_button = ctk.CTkButton(root, text='Select a face', cursor='hand2', command=lambda: select_source_path())
-    source_button.place(relx=0.1, rely=0.4, relwidth=0.3, relheight=0.1)
+    source_button.place(relx=0.1, rely=0.4, relwidth=0.2, relheight=0.1)
+
+    target_face_button = ctk.CTkButton(root, text='Select a target face', cursor='hand2', command=lambda: select_target_face_path())
+    target_face_button.place(relx=0.4, rely=0.4, relwidth=0.2, relheight=0.1)
 
     target_button = ctk.CTkButton(root, text='Select a target', cursor='hand2', command=lambda: select_target_path())
-    target_button.place(relx=0.6, rely=0.4, relwidth=0.3, relheight=0.1)
+    target_button.place(relx=0.7, rely=0.4, relwidth=0.2, relheight=0.1)
 
     keep_fps_value = ctk.BooleanVar(value=roop.globals.keep_fps)
     keep_fps_checkbox = ctk.CTkSwitch(root, text='Keep fps', variable=keep_fps_value, cursor='hand2', command=lambda: setattr(roop.globals, 'keep_fps', not roop.globals.keep_fps))
@@ -76,11 +83,19 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C
 
     keep_audio_value = ctk.BooleanVar(value=roop.globals.keep_audio)
     keep_audio_switch = ctk.CTkSwitch(root, text='Keep audio', variable=keep_audio_value, cursor='hand2', command=lambda: setattr(roop.globals, 'keep_audio', keep_audio_value.get()))
-    keep_audio_switch.place(relx=0.6, rely=0.6)
+    keep_audio_switch.place(relx=0.4, rely=0.6)
 
     many_faces_value = ctk.BooleanVar(value=roop.globals.many_faces)
     many_faces_switch = ctk.CTkSwitch(root, text='Many faces', variable=many_faces_value, cursor='hand2', command=lambda: setattr(roop.globals, 'many_faces', many_faces_value.get()))
-    many_faces_switch.place(relx=0.6, rely=0.65)
+    many_faces_switch.place(relx=0.4, rely=0.65)
+
+    single_face_in_many_faces_value = ctk.BooleanVar(value=roop.globals.single_face_in_many_faces)
+    single_face_in_many_faces_switch = ctk.CTkSwitch(root, text='Single face in many faces', variable=single_face_in_many_faces_value, cursor='hand2', command=lambda: setattr(roop.globals, 'single_face_in_many_faces', single_face_in_many_faces_value.get()))
+    single_face_in_many_faces_switch.place(relx=0.6, rely=0.6)
+
+    threshold_slider = ctk.CTkSlider(root, from_=0, to=1, command=lambda threshold_value: update_threshold(threshold_value))
+    threshold_slider.place(relx=0.6, rely=0.65, relwidth=0.3, relheight=0.025)
+    threshold_slider.set(roop.globals.threshold_value)
 
     start_button = ctk.CTkButton(root, text='Start', cursor='hand2', command=lambda: select_output_path(start))
     start_button.place(relx=0.15, rely=0.75, relwidth=0.2, relheight=0.05)
@@ -160,6 +175,21 @@ def select_target_path() -> None:
         target_label.configure(image=None)
 
 
+def select_target_face_path() -> None:
+    global RECENT_DIRECTORY_SOURCE
+
+    PREVIEW.withdraw()
+    target_face_path = ctk.filedialog.askopenfilename(title='select an target face image', initialdir=RECENT_DIRECTORY_SOURCE)
+    if is_image(target_face_path):
+        roop.globals.target_face_path = target_face_path
+        RECENT_DIRECTORY_SOURCE = os.path.dirname(roop.globals.target_face_path)
+        image = render_image_preview(roop.globals.target_face_path, (200, 200))
+        target_face_label.configure(image=image)
+    else:
+        roop.globals.target_face_path = None
+        target_face_label.configure(image=None)
+
+
 def select_output_path(start: Callable[[], None]) -> None:
     global RECENT_DIRECTORY_OUTPUT
 
@@ -196,6 +226,9 @@ def render_video_preview(video_path: str, size: Tuple[int, int], frame_number: i
     cv2.destroyAllWindows()
 
 
+def update_threshold(threshold: int = 0) -> None:
+    roop.globals.threshold_value = threshold
+
 def toggle_preview() -> None:
     if PREVIEW.state() == 'normal':
         PREVIEW.withdraw()
@@ -223,7 +256,8 @@ def update_preview(frame_number: int = 0) -> None:
         for frame_processor in get_frame_processors_modules(roop.globals.frame_processors):
             temp_frame = frame_processor.process_frame(
                 get_one_face(cv2.imread(roop.globals.source_path)),
-                temp_frame
+                get_one_face(cv2.imread(roop.globals.target_face_path)),
+                temp_frame,
             )
         image = Image.fromarray(cv2.cvtColor(temp_frame, cv2.COLOR_BGR2RGB))
         image = ImageOps.contain(image, (PREVIEW_MAX_WIDTH, PREVIEW_MAX_HEIGHT), Image.LANCZOS)
diff --git a/roop/utilities.py b/roop/utilities.py
index 90c8d98..9e7bf43 100644
--- a/roop/utilities.py
+++ b/roop/utilities.py
@@ -44,13 +44,13 @@ def detect_fps(target_path: str) -> float:
 
 def extract_frames(target_path: str) -> None:
     temp_directory_path = get_temp_directory_path(target_path)
-    run_ffmpeg(['-i', target_path, '-pix_fmt', 'rgb24', os.path.join(temp_directory_path, '%04d.png')])
+    run_ffmpeg(['-i', target_path, '-pix_fmt', 'rgb24', os.path.join(temp_directory_path, '%08d.png')])
 
 
 def create_video(target_path: str, fps: float = 30.0) -> None:
     temp_output_path = get_temp_output_path(target_path)
     temp_directory_path = get_temp_directory_path(target_path)
-    run_ffmpeg(['-r', str(fps), '-i', os.path.join(temp_directory_path, '%04d.png'), '-c:v', roop.globals.video_encoder, '-crf', str(roop.globals.video_quality), '-pix_fmt', 'yuv420p', '-vf', 'colorspace=bt709:iall=bt601-6-625:fast=1', '-y', temp_output_path])
+    run_ffmpeg(['-r', str(fps), '-i', os.path.join(temp_directory_path, '%08d.png'), '-c:v', roop.globals.video_encoder, '-crf', str(roop.globals.video_quality), '-pix_fmt', 'yuv420p', '-vf', 'colorspace=bt709:iall=bt601-6-625:fast=1', '-y', temp_output_path])
 
 
 def restore_audio(target_path: str, output_path: str) -> None: