Source code for miop.rectification
# Original code from Stefan Toeberg (2023)
# Copyright (c) 2023, Stefan Toeberg.
#
# Modified by Maxime Paschoud (2025)
# Copyright (c) 2025, Maxime Paschoud.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
#
# (http://opensource.org/licenses/BSD-3-Clause)
#
# __author__ = "Stefan Toeberg, LUH: IMR; Maxime Paschoud, ETHZ"
#
from pipeline import DAGNode
import numpy as np
import cv2 as cv
[docs]
class Rectification(DAGNode):
"""
Performs stereo image rectification based on affine fundamental matrices.
This class computes rectifying transformations (similarity or rigid), applies them to stereo image pairs, and returns rectified image pairs along with transformation matrices.
Attributes
----------
rectification_transforms : list of dict
List of transformation matrices for each image pair. Each dict contains 'T1', 'T2', and 'Ht'.
rectified_pairs : list of np.ndarray
List of rectified image pairs, each as a 2-element NumPy array of images.
"""
def __init__(self):
"""
Initializes the Rectification node.
"""
super().__init__()
[docs]
def eval(self, epipolar_sets, img_collection):
"""
Applies rectifying transformations to each stereo image pair in a dataset.
Parameters
----------
epipolar_sets : list of dict
Each dict contains at least a fundamental matrix under key `'F'`.
img_collection : object
Object that holds image pairs in the `pairs` attribute. Each pair is a 2-tuple of image objects with `.image` fields.
Returns
-------
tuple
A tuple of:
- rectification_transforms : list of dict
Each dict contains matrices `'T1'`, `'T2'`, and `'Ht'` (post-translation).
- rectified_pairs : list of np.ndarray
Each element is an array of two rectified images.
"""
rectified_pairs = []
rectification_transforms = []
for j, pair in enumerate(img_collection.pairs):
T1, T2 = self.rectifying_similarities(epipolar_sets[j]['F'])
rectified_img1, rectified_img2, Ht = self.transform_images(T1, T2, pair[0].image, pair[1].image)
# Rectify matches
# H1 = Ht.dot(S1)
# H2 = Ht.dot(S2)
# rectified_pts1, rectified_pts2 = self.transform_matches(epipolar_sets[j]['pts1'], epipolar_sets[j]['pts2'], H1, H2)
rectification_transforms.append({'T1': T1, 'T2': T2, 'Ht': Ht})
rectified_pairs.append(np.array([rectified_img1, rectified_img2]))
self.rectification_transforms = rectification_transforms
self.rectified_pairs = rectified_pairs
return rectification_transforms, rectified_pairs
[docs]
def transform_matches(self, pts1, pts2, H1, H2):
"""
Applies rectifying homographies to matched keypoints.
Parameters
----------
pts1 : np.ndarray
Matched keypoints from image 1, shape (n, 2).
pts2 : np.ndarray
Matched keypoints from image 2, shape (n, 2).
H1 : np.ndarray
Homography for image 1.
H2 : np.ndarray
Homography for image 2.
Returns
-------
tuple
pts1r, pts2r : np.ndarray
Rectified keypoints in both images, shape (n, 3).
"""
# transform matches into the rectified image
pts1r = self.homogenize(self.transform_2D(self.homogenize(pts1).T, H1)).T
pts2r = self.homogenize(self.transform_2D(self.homogenize(pts2).T, H2)).T
return pts1r, pts2r
[docs]
def rectifying_similarities(self, F):
"""
Computes rectifying similarity transforms from an affine fundamental matrix.
Based on Shimshoni's geometric interpretation of weak-perspective motion.
Parameters
----------
F : np.ndarray
3x3 affine fundamental matrix.
Returns
-------
tuple
S1, S2 : np.ndarray
Similarity transformations for image 1 and image 2.
Raises
------
AssertionError
If F is not affine or not rank 2 or not 3x3.
"""
# check that the input matrix is an affine fundamental matrix
assert np.shape(F) == (3, 3)
assert np.linalg.matrix_rank(F) == 2
assert F[0, 0] == 0
assert F[0, 1] == 0
assert F[1, 0] == 0
assert F[1, 1] == 0
# notations, THE SAME CONVENTION as in H&Z
c = F[2, 0]
d = F[2, 1]
a = F[0, 2]
b = F[1, 2]
e = F[2, 2]
# rotations
r = np.sqrt(a * a + b * b)
s = np.sqrt(c * c + d * d)
R2 = (1.0 / r) * np.array([[-b, a], [-a, -b]])
R1 = (1.0 / s) * np.array([[d, -c], [c, d]])
# zoom and translation
z = s / r
# compute translation from e and only for the image that is the reference
# img 2 gets scaled to match img 1
# (scaling is not divided onto both images and only applied to image 2)
t = e / s
# output similarities
S1 = np.zeros((3, 3))
S1[0:2, 0:2] = R1
S1[1, 2] = 0
S1[2, 2] = 1
S2 = np.zeros((3, 3))
S2[0:2, 0:2] = (1.0 / z) * R2
S2[1, 2] = -t
S2[2, 2] = 1
if S2[0, 0] < 0:
mir = np.array(np.mat("-1 -1 -1; -1 -1 -1; 1 1 1"))
S1 = S1 * mir
S2 = S2 * mir
return S1, S2
[docs]
def homogenize(self, m):
"""
Converts 2D point coordinates to homogeneous coordinates.
Parameters
----------
m : np.ndarray
2D coordinates, shape (n, 2) or (2, n)
Returns
-------
np.ndarray
Homogeneous coordinates, shape (n, 3) or (3, n) to match input format.
"""
# transpose marker initialized as 0
T = 0
# transpose point array if of the form (nx2)
if np.shape(m)[1] == 2 or np.shape(m)[1] == 3:
m = np.transpose(m)
T = 1
# add ones
i = np.shape(m)[1]
ones = np.ones((1, i))
mh = np.vstack((m, ones))
# if array was transposed retranspose
if T == 1:
mh = np.transpose(mh)
return mh
[docs]
def transform_2D(self, pts, H):
"""
Applies a homography to 2D homogeneous points.
Parameters
----------
pts : np.ndarray
Homogeneous 2D points (3, n).
H : np.ndarray
Homography matrix (3, 3).
Returns
-------
np.ndarray
Transformed 2D points (2, n).
"""
pts_T_un = H.dot(pts)
pts_T = pts_T_un[0:2, :] / np.array((pts_T_un[2, :], pts_T_un[2, :]))
return pts_T
[docs]
def rectifying_rigid_transformations(self, F, pts1, pts2):
"""
Computes rigid rectifying transformations from an affine F and matched points.
Parameters
----------
F : np.ndarray
3x3 affine fundamental matrix.
pts1 : np.ndarray
3xn homogeneous point array from image 1.
pts2 : np.ndarray
3xn homogeneous point array from image 2.
Returns
-------
tuple
S1, S2 : np.ndarray
Rigid rectifying transformation matrices for the two images.
Raises
------
AssertionError
If F is not affine or not rank 2.
Exception
If rotation matrix signs cannot be resolved.
"""
# check that the input matrix is an affine fundamental matrix
assert np.shape(F) == (3, 3)
assert np.linalg.matrix_rank(F) == 2
assert F[0, 0] == 0
assert F[0, 1] == 0
assert F[1, 0] == 0
assert F[1, 1] == 0
# notations, same conventions as in H&Z
c = F[2, 0]
d = F[2, 1]
a = F[0, 2]
b = F[1, 2]
# computing the translation by the transformed point correspondences, e
# is actually not needed
e = F[2, 2]
if e < 0:
F = -F
# rotations
s = np.sqrt(a * a + b * b)
r = np.sqrt(c * c + d * d)
if a < 0 and b < 0:
R1 = (1.0 / r) * np.array([[d, -c], [c, d]])
R2 = (1.0 / s) * np.array([[-b, a], [-a, -b]])
elif c < 0 and d < 0:
R1 = (1.0 / r) * np.array([[-d, c], [-c, -d]])
R2 = (1.0 / s) * np.array([[b, -a], [a, b]])
elif a < 0 and d < 0:
R1 = (1.0 / r) * np.array([[-d, c], [-c, -d]])
R2 = (1.0 / s) * np.array([[b, -a], [a, b]])
elif b < 0 and c < 0:
R1 = (1.0 / r) * np.array([[d, -c], [c, d]])
R2 = (1.0 / s) * np.array([[-b, a], [-a, -b]])
else:
raise Exception("something is wrong with the rotation matrices sign")
# rectifying transformation/rotation matrices without translation
T1 = np.zeros((3, 3))
T1[0:2, 0:2] = R1
T1[1, 2] = 0
T1[2, 2] = 1
T2 = np.zeros((3, 3))
T2[0:2, 0:2] = R2
T2[1, 2] = 0
T2[2, 2] = 1
# determine the necessary translation to align the epipolar lines
pts_t1 = T1.dot(pts1)
pts_t2 = T2.dot(pts2)
translation = pts_t1[1, :] - pts_t2[1, :]
t = np.mean(translation)
# rigid transformation matrices containing rotations and translations
S1 = np.zeros((3, 3))
S1[0:2, 0:2] = R1
S1[1, 2] = 0
S1[2, 2] = 1
S2 = np.zeros((3, 3))
S2[0:2, 0:2] = R2
S2[1, 2] = t
S2[2, 2] = 1
return S1, S2
[docs]
def transform_images(self, H1, H2, img1, img2):
"""
Applies given homographies to images and aligns them using a shared bounding box.
Parameters
----------
H1 : np.ndarray
3x3 homography for image 1.
H2 : np.ndarray
3x3 homography for image 2.
img1 : np.ndarray
Original image 1.
img2 : np.ndarray
Original image 2.
Returns
-------
tuple
rectified_img1, rectified_img2 : np.ndarray
Warped images.
Ht : np.ndarray
Translation matrix used to shift both images into shared frame.
"""
h, w, c = img1.shape
h2, w2, c2 = img2.shape
corners = np.float32([[0, 0], [0, h], [w, 0], [w, h]]).reshape(-1, 1, 2)
corners2 = np.float32(
[[0, 0], [0, h2], [w2, 0], [w2, h2]]).reshape(-1, 1, 2)
cornersTl = cv.perspectiveTransform(corners, H1)
cornersTr = cv.perspectiveTransform(corners2, H2)
# Spaltenanzahl - min_wl
uMin = int(np.floor(min(cornersTl[0, :, 0],
cornersTl[1, :, 0],
cornersTr[0, :, 0],
cornersTr[1, :, 0],
)
)
)
# max_w
uMax = int(np.ceil(max(cornersTl[2, :, 0],
cornersTl[3, :, 0],
cornersTr[2, :, 0],
cornersTr[3, :, 0],
)
)
)
# Zeilenanzahl - min_hl
vMin = int(np.floor(min(cornersTl[0, :, 1],
cornersTl[2, :, 1],
cornersTr[0, :, 1],
cornersTr[2, :, 1],
)
)
)
# max_hl
vMax = int(np.ceil(max(cornersTl[1, :, 1],
cornersTl[3, :, 1],
cornersTr[1, :, 1],
cornersTr[3, :, 1],
)
)
)
t = [-uMin, -vMin]
Ht = np.array([[1, 0, t[0]], [0, 1, t[1]], [0, 0, 1]])
img_1T = cv.warpPerspective(
img1, np.float32(Ht.dot(H1)), (uMax - uMin, vMax - vMin), flags=cv.INTER_LINEAR
)
img_2T = cv.warpPerspective(
img2, np.float32(Ht.dot(H2)), (uMax - uMin, vMax - vMin), flags=cv.INTER_LINEAR
)
return img_1T, img_2T, np.float32(Ht)