Source code for miop.rectification

# Original code from Stefan Toeberg (2023)
# Copyright (c) 2023, Stefan Toeberg.
#
# Modified by Maxime Paschoud (2025)
# Copyright (c) 2025, Maxime Paschoud.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
#
# (http://opensource.org/licenses/BSD-3-Clause)
#
# __author__ = "Stefan Toeberg, LUH: IMR; Maxime Paschoud, ETHZ"
#  

from pipeline import DAGNode
import numpy as np
import cv2 as cv

[docs] class Rectification(DAGNode): """ Performs stereo image rectification based on affine fundamental matrices. This class computes rectifying transformations (similarity or rigid), applies them to stereo image pairs, and returns rectified image pairs along with transformation matrices. Attributes ---------- rectification_transforms : list of dict List of transformation matrices for each image pair. Each dict contains 'T1', 'T2', and 'Ht'. rectified_pairs : list of np.ndarray List of rectified image pairs, each as a 2-element NumPy array of images. """ def __init__(self): """ Initializes the Rectification node. """ super().__init__()
[docs] def eval(self, epipolar_sets, img_collection): """ Applies rectifying transformations to each stereo image pair in a dataset. Parameters ---------- epipolar_sets : list of dict Each dict contains at least a fundamental matrix under key `'F'`. img_collection : object Object that holds image pairs in the `pairs` attribute. Each pair is a 2-tuple of image objects with `.image` fields. Returns ------- tuple A tuple of: - rectification_transforms : list of dict Each dict contains matrices `'T1'`, `'T2'`, and `'Ht'` (post-translation). - rectified_pairs : list of np.ndarray Each element is an array of two rectified images. """ rectified_pairs = [] rectification_transforms = [] for j, pair in enumerate(img_collection.pairs): T1, T2 = self.rectifying_similarities(epipolar_sets[j]['F']) rectified_img1, rectified_img2, Ht = self.transform_images(T1, T2, pair[0].image, pair[1].image) # Rectify matches # H1 = Ht.dot(S1) # H2 = Ht.dot(S2) # rectified_pts1, rectified_pts2 = self.transform_matches(epipolar_sets[j]['pts1'], epipolar_sets[j]['pts2'], H1, H2) rectification_transforms.append({'T1': T1, 'T2': T2, 'Ht': Ht}) rectified_pairs.append(np.array([rectified_img1, rectified_img2])) self.rectification_transforms = rectification_transforms self.rectified_pairs = rectified_pairs return rectification_transforms, rectified_pairs
[docs] def transform_matches(self, pts1, pts2, H1, H2): """ Applies rectifying homographies to matched keypoints. Parameters ---------- pts1 : np.ndarray Matched keypoints from image 1, shape (n, 2). pts2 : np.ndarray Matched keypoints from image 2, shape (n, 2). H1 : np.ndarray Homography for image 1. H2 : np.ndarray Homography for image 2. Returns ------- tuple pts1r, pts2r : np.ndarray Rectified keypoints in both images, shape (n, 3). """ # transform matches into the rectified image pts1r = self.homogenize(self.transform_2D(self.homogenize(pts1).T, H1)).T pts2r = self.homogenize(self.transform_2D(self.homogenize(pts2).T, H2)).T return pts1r, pts2r
[docs] def rectifying_similarities(self, F): """ Computes rectifying similarity transforms from an affine fundamental matrix. Based on Shimshoni's geometric interpretation of weak-perspective motion. Parameters ---------- F : np.ndarray 3x3 affine fundamental matrix. Returns ------- tuple S1, S2 : np.ndarray Similarity transformations for image 1 and image 2. Raises ------ AssertionError If F is not affine or not rank 2 or not 3x3. """ # check that the input matrix is an affine fundamental matrix assert np.shape(F) == (3, 3) assert np.linalg.matrix_rank(F) == 2 assert F[0, 0] == 0 assert F[0, 1] == 0 assert F[1, 0] == 0 assert F[1, 1] == 0 # notations, THE SAME CONVENTION as in H&Z c = F[2, 0] d = F[2, 1] a = F[0, 2] b = F[1, 2] e = F[2, 2] # rotations r = np.sqrt(a * a + b * b) s = np.sqrt(c * c + d * d) R2 = (1.0 / r) * np.array([[-b, a], [-a, -b]]) R1 = (1.0 / s) * np.array([[d, -c], [c, d]]) # zoom and translation z = s / r # compute translation from e and only for the image that is the reference # img 2 gets scaled to match img 1 # (scaling is not divided onto both images and only applied to image 2) t = e / s # output similarities S1 = np.zeros((3, 3)) S1[0:2, 0:2] = R1 S1[1, 2] = 0 S1[2, 2] = 1 S2 = np.zeros((3, 3)) S2[0:2, 0:2] = (1.0 / z) * R2 S2[1, 2] = -t S2[2, 2] = 1 if S2[0, 0] < 0: mir = np.array(np.mat("-1 -1 -1; -1 -1 -1; 1 1 1")) S1 = S1 * mir S2 = S2 * mir return S1, S2
[docs] def homogenize(self, m): """ Converts 2D point coordinates to homogeneous coordinates. Parameters ---------- m : np.ndarray 2D coordinates, shape (n, 2) or (2, n) Returns ------- np.ndarray Homogeneous coordinates, shape (n, 3) or (3, n) to match input format. """ # transpose marker initialized as 0 T = 0 # transpose point array if of the form (nx2) if np.shape(m)[1] == 2 or np.shape(m)[1] == 3: m = np.transpose(m) T = 1 # add ones i = np.shape(m)[1] ones = np.ones((1, i)) mh = np.vstack((m, ones)) # if array was transposed retranspose if T == 1: mh = np.transpose(mh) return mh
[docs] def transform_2D(self, pts, H): """ Applies a homography to 2D homogeneous points. Parameters ---------- pts : np.ndarray Homogeneous 2D points (3, n). H : np.ndarray Homography matrix (3, 3). Returns ------- np.ndarray Transformed 2D points (2, n). """ pts_T_un = H.dot(pts) pts_T = pts_T_un[0:2, :] / np.array((pts_T_un[2, :], pts_T_un[2, :])) return pts_T
[docs] def rectifying_rigid_transformations(self, F, pts1, pts2): """ Computes rigid rectifying transformations from an affine F and matched points. Parameters ---------- F : np.ndarray 3x3 affine fundamental matrix. pts1 : np.ndarray 3xn homogeneous point array from image 1. pts2 : np.ndarray 3xn homogeneous point array from image 2. Returns ------- tuple S1, S2 : np.ndarray Rigid rectifying transformation matrices for the two images. Raises ------ AssertionError If F is not affine or not rank 2. Exception If rotation matrix signs cannot be resolved. """ # check that the input matrix is an affine fundamental matrix assert np.shape(F) == (3, 3) assert np.linalg.matrix_rank(F) == 2 assert F[0, 0] == 0 assert F[0, 1] == 0 assert F[1, 0] == 0 assert F[1, 1] == 0 # notations, same conventions as in H&Z c = F[2, 0] d = F[2, 1] a = F[0, 2] b = F[1, 2] # computing the translation by the transformed point correspondences, e # is actually not needed e = F[2, 2] if e < 0: F = -F # rotations s = np.sqrt(a * a + b * b) r = np.sqrt(c * c + d * d) if a < 0 and b < 0: R1 = (1.0 / r) * np.array([[d, -c], [c, d]]) R2 = (1.0 / s) * np.array([[-b, a], [-a, -b]]) elif c < 0 and d < 0: R1 = (1.0 / r) * np.array([[-d, c], [-c, -d]]) R2 = (1.0 / s) * np.array([[b, -a], [a, b]]) elif a < 0 and d < 0: R1 = (1.0 / r) * np.array([[-d, c], [-c, -d]]) R2 = (1.0 / s) * np.array([[b, -a], [a, b]]) elif b < 0 and c < 0: R1 = (1.0 / r) * np.array([[d, -c], [c, d]]) R2 = (1.0 / s) * np.array([[-b, a], [-a, -b]]) else: raise Exception("something is wrong with the rotation matrices sign") # rectifying transformation/rotation matrices without translation T1 = np.zeros((3, 3)) T1[0:2, 0:2] = R1 T1[1, 2] = 0 T1[2, 2] = 1 T2 = np.zeros((3, 3)) T2[0:2, 0:2] = R2 T2[1, 2] = 0 T2[2, 2] = 1 # determine the necessary translation to align the epipolar lines pts_t1 = T1.dot(pts1) pts_t2 = T2.dot(pts2) translation = pts_t1[1, :] - pts_t2[1, :] t = np.mean(translation) # rigid transformation matrices containing rotations and translations S1 = np.zeros((3, 3)) S1[0:2, 0:2] = R1 S1[1, 2] = 0 S1[2, 2] = 1 S2 = np.zeros((3, 3)) S2[0:2, 0:2] = R2 S2[1, 2] = t S2[2, 2] = 1 return S1, S2
[docs] def transform_images(self, H1, H2, img1, img2): """ Applies given homographies to images and aligns them using a shared bounding box. Parameters ---------- H1 : np.ndarray 3x3 homography for image 1. H2 : np.ndarray 3x3 homography for image 2. img1 : np.ndarray Original image 1. img2 : np.ndarray Original image 2. Returns ------- tuple rectified_img1, rectified_img2 : np.ndarray Warped images. Ht : np.ndarray Translation matrix used to shift both images into shared frame. """ h, w, c = img1.shape h2, w2, c2 = img2.shape corners = np.float32([[0, 0], [0, h], [w, 0], [w, h]]).reshape(-1, 1, 2) corners2 = np.float32( [[0, 0], [0, h2], [w2, 0], [w2, h2]]).reshape(-1, 1, 2) cornersTl = cv.perspectiveTransform(corners, H1) cornersTr = cv.perspectiveTransform(corners2, H2) # Spaltenanzahl - min_wl uMin = int(np.floor(min(cornersTl[0, :, 0], cornersTl[1, :, 0], cornersTr[0, :, 0], cornersTr[1, :, 0], ) ) ) # max_w uMax = int(np.ceil(max(cornersTl[2, :, 0], cornersTl[3, :, 0], cornersTr[2, :, 0], cornersTr[3, :, 0], ) ) ) # Zeilenanzahl - min_hl vMin = int(np.floor(min(cornersTl[0, :, 1], cornersTl[2, :, 1], cornersTr[0, :, 1], cornersTr[2, :, 1], ) ) ) # max_hl vMax = int(np.ceil(max(cornersTl[1, :, 1], cornersTl[3, :, 1], cornersTr[1, :, 1], cornersTr[3, :, 1], ) ) ) t = [-uMin, -vMin] Ht = np.array([[1, 0, t[0]], [0, 1, t[1]], [0, 0, 1]]) img_1T = cv.warpPerspective( img1, np.float32(Ht.dot(H1)), (uMax - uMin, vMax - vMin), flags=cv.INTER_LINEAR ) img_2T = cv.warpPerspective( img2, np.float32(Ht.dot(H2)), (uMax - uMin, vMax - vMin), flags=cv.INTER_LINEAR ) return img_1T, img_2T, np.float32(Ht)