Lock face by position of a face reference

2025-12-06 18:08:29 +00:00 · 2023-06-30 22:14:58 +02:00 · 2023-06-30 22:14:58 +02:00 · bc2093d4d6
commit bc2093d4d6
parent b104741e68
6 changed files with 48 additions and 14 deletions
--- a/README.md
+++ b/README.md
@ -40,6 +40,7 @@ options:
  --keep-frames                                            keep temporary frames
  --skip-audio                                             skip target audio
  --many-faces                                             process every face
+  --face-position FACE_POSITION                            position of the target face
  --video-encoder {libx264,libx265,libvpx-vp9}             adjust output video encoder
  --video-quality [0-51]                                   adjust output video quality
  --max-memory MAX_MEMORY                                  maximum amount of RAM in GB
--- a/roop/core.py
+++ b/roop/core.py
@ -42,6 +42,7 @@ def parse_args() -> None:
    program.add_argument('--keep-frames', help='keep temporary frames', dest='keep_frames', action='store_true')
    program.add_argument('--skip-audio', help='skip target audio', dest='skip_audio', action='store_true')
    program.add_argument('--many-faces', help='process every face', dest='many_faces', action='store_true')
+    program.add_argument('--face-position', help='position of the target face', dest='face_position', type=int, default=0)
    program.add_argument('--video-encoder', help='adjust output video encoder', dest='video_encoder', default='libx264', choices=['libx264', 'libx265', 'libvpx-vp9'])
    program.add_argument('--video-quality', help='adjust output video quality', dest='video_quality', type=int, default=18, choices=range(52), metavar='[0-51]')
    program.add_argument('--max-memory', help='maximum amount of RAM in GB', dest='max_memory', type=int, default=suggest_max_memory())
@ -60,6 +61,7 @@ def parse_args() -> None:
    roop.globals.keep_frames = args.keep_frames
    roop.globals.skip_audio = args.skip_audio
    roop.globals.many_faces = args.many_faces
+    roop.globals.face_position = args.face_position
    roop.globals.video_encoder = args.video_encoder
    roop.globals.video_quality = args.video_quality
    roop.globals.max_memory = args.max_memory
--- a/roop/face_analyser.py
+++ b/roop/face_analyser.py
@ -1,6 +1,7 @@
 import threading
 from typing import Any, Optional, List
 import insightface
+import numpy

 import roop.globals
 from roop.typing import Frame, Face
@ -8,6 +9,8 @@ from roop.typing import Frame, Face
 FACE_ANALYSER = None
 THREAD_LOCK = threading.Lock()

+MAX_DISTANCE = 0.85
+

 def get_face_analyser() -> Any:
    global FACE_ANALYSER
@ -19,16 +22,24 @@ def get_face_analyser() -> Any:
    return FACE_ANALYSER


-def get_one_face(frame: Frame) -> Optional[Face]:
+def get_one_face(frame: Frame, position: int = 0) -> Optional[Face]:
    faces = get_many_faces(frame)
    try:
-        return faces[0]
-    except ValueError:
-        return None
+        return faces[position]
+    except IndexError:
+        return faces[-1]


 def get_many_faces(frame: Frame) -> Optional[List[Face]]:
-    try:
+    if frame:
        return get_face_analyser().get(frame)
-    except IndexError:
-        return None
+    return None
+
+
+def find_similar_face(frame: Frame, reference_face: Face) -> Optional[Face]:
+    faces = get_many_faces(frame)
+    for face in faces:
+        distance = numpy.sum(numpy.square(face.normed_embedding - reference_face.normed_embedding))
+        if distance < MAX_DISTANCE:
+            return face
+    return None
--- a/roop/globals.py
+++ b/roop/globals.py
@ -8,6 +8,7 @@ keep_fps = None
 keep_frames = None
 skip_audio = None
 many_faces = None
+face_position = None
 video_encoder = None
 video_quality = None
 max_memory = None
--- a/roop/processors/frame/face_swapper.py
+++ b/roop/processors/frame/face_swapper.py
@ -6,7 +6,7 @@ import threading
 import roop.globals
 import roop.processors.frame.core
 from roop.core import update_status
-from roop.face_analyser import get_one_face, get_many_faces
+from roop.face_analyser import get_one_face, get_many_faces, find_similar_face
 from roop.typing import Face, Frame
 from roop.utilities import conditional_download, resolve_relative_path, is_image, is_video

@ -14,6 +14,8 @@ FACE_SWAPPER = None
 THREAD_LOCK = threading.Lock()
 NAME = 'ROOP.FACE-SWAPPER'

+reference_face = None
+

 def get_face_swapper() -> Any:
    global FACE_SWAPPER
@ -45,42 +47,51 @@ def pre_start() -> bool:


 def post_process() -> None:
-    global FACE_SWAPPER
+    global FACE_SWAPPER, reference_face

    FACE_SWAPPER = None
+    reference_face = None


 def swap_face(source_face: Face, target_face: Face, temp_frame: Frame) -> Frame:
    return get_face_swapper().get(temp_frame, target_face, source_face, paste_back=True)


-def process_frame(source_face: Face, temp_frame: Frame) -> Frame:
+def process_frame(source_face: Face, reference_face: Face, temp_frame: Frame) -> Frame:
    if roop.globals.many_faces:
        many_faces = get_many_faces(temp_frame)
        if many_faces:
            for target_face in many_faces:
                temp_frame = swap_face(source_face, target_face, temp_frame)
    else:
-        target_face = get_one_face(temp_frame)
+        target_face = find_similar_face(temp_frame, reference_face)
        if target_face:
            temp_frame = swap_face(source_face, target_face, temp_frame)
    return temp_frame


 def process_frames(source_path: str, temp_frame_paths: List[str], update: Callable[[], None]) -> None:
+    global reference_face
+
    source_face = get_one_face(cv2.imread(source_path))
+    if not reference_face:
+        reference_face = get_one_face(cv2.imread(temp_frame_paths[0]),  roop.globals.face_position)
    for temp_frame_path in temp_frame_paths:
        temp_frame = cv2.imread(temp_frame_path)
-        result = process_frame(source_face, temp_frame)
+        result = process_frame(source_face, reference_face, temp_frame)
        cv2.imwrite(temp_frame_path, result)
        if update:
            update()


 def process_image(source_path: str, target_path: str, output_path: str) -> None:
+    global reference_face
+
    source_face = get_one_face(cv2.imread(source_path))
    target_frame = cv2.imread(target_path)
-    result = process_frame(source_face, target_frame)
+    if not reference_face:
+        reference_face = get_one_face(cv2.imread(target_frame),  roop.globals.face_position)
+    result = process_frame(source_face, reference_face, target_frame)
    cv2.imwrite(output_path, result)


--- a/roop/ui.py
+++ b/roop/ui.py
@ -31,6 +31,8 @@ source_label = None
 target_label = None
 status_label = None

+reference_face = None
+

 def init(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.CTk:
    global ROOT, PREVIEW
@ -216,13 +218,19 @@ def init_preview() -> None:


 def update_preview(frame_number: int = 0) -> None:
+    global reference_face
+
    if roop.globals.source_path and roop.globals.target_path:
        temp_frame = get_video_frame(roop.globals.target_path, frame_number)
        if predict_frame(temp_frame):
            quit()
+        source_face = get_one_face(cv2.imread(roop.globals.source_path))
+        if not reference_face:
+            reference_face = get_one_face(temp_frame, roop.globals.face_position)
        for frame_processor in get_frame_processors_modules(roop.globals.frame_processors):
            temp_frame = frame_processor.process_frame(
-                get_one_face(cv2.imread(roop.globals.source_path)),
+                source_face,
+                reference_face,
                temp_frame
            )
        image = Image.fromarray(cv2.cvtColor(temp_frame, cv2.COLOR_BGR2RGB))