Source code for miop.rectification

# Original code from Stefan Toeberg (2023)
# Copyright (c) 2023, Stefan Toeberg.
#
# Modified by Maxime Paschoud (2025)
# Copyright (c) 2025, Maxime Paschoud.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
#
# (http://opensource.org/licenses/BSD-3-Clause)
#
# __author__ = "Stefan Toeberg, LUH: IMR; Maxime Paschoud, ETHZ"
#  

from pipeline import DAGNode
import numpy as np
import cv2 as cv


[docs]
class Rectification(DAGNode):

    """
    Performs stereo image rectification based on affine fundamental matrices.

    This class computes rectifying transformations (similarity or rigid), applies them to stereo image pairs, and returns rectified image pairs along with transformation matrices.

    Attributes
    ----------
    rectification_transforms : list of dict
        List of transformation matrices for each image pair. Each dict contains 'T1', 'T2', and 'Ht'.
    rectified_pairs : list of np.ndarray
        List of rectified image pairs, each as a 2-element NumPy array of images.
    """

    def __init__(self):

        """
        Initializes the Rectification node.
        """

        super().__init__()


[docs]
    def eval(self, epipolar_sets, img_collection):
        """
        Applies rectifying transformations to each stereo image pair in a dataset.

        Parameters
        ----------
        epipolar_sets : list of dict
            Each dict contains at least a fundamental matrix under key `'F'`.
        img_collection : object
            Object that holds image pairs in the `pairs` attribute. Each pair is a 2-tuple of image objects with `.image` fields.

        Returns
        -------
        tuple
            A tuple of:
            
            - rectification_transforms : list of dict
                Each dict contains matrices `'T1'`, `'T2'`, and `'Ht'` (post-translation).
            - rectified_pairs : list of np.ndarray
                Each element is an array of two rectified images.
        """

        rectified_pairs = []
        rectification_transforms = []
        for j, pair in enumerate(img_collection.pairs):
            T1, T2 = self.rectifying_similarities(epipolar_sets[j]['F'])
            rectified_img1, rectified_img2, Ht = self.transform_images(T1, T2, pair[0].image, pair[1].image)

            # Rectify matches
            # H1 = Ht.dot(S1)
            # H2 = Ht.dot(S2)
            # rectified_pts1, rectified_pts2 = self.transform_matches(epipolar_sets[j]['pts1'], epipolar_sets[j]['pts2'], H1, H2)

            rectification_transforms.append({'T1': T1, 'T2': T2, 'Ht': Ht})
            rectified_pairs.append(np.array([rectified_img1, rectified_img2]))

        self.rectification_transforms = rectification_transforms
        self.rectified_pairs = rectified_pairs
        
        return rectification_transforms, rectified_pairs


        

[docs]
    def transform_matches(self, pts1, pts2, H1, H2):

        """
        Applies rectifying homographies to matched keypoints.

        Parameters
        ----------
        pts1 : np.ndarray
            Matched keypoints from image 1, shape (n, 2).
        pts2 : np.ndarray
            Matched keypoints from image 2, shape (n, 2).
        H1 : np.ndarray
            Homography for image 1.
        H2 : np.ndarray
            Homography for image 2.

        Returns
        -------
        tuple
            pts1r, pts2r : np.ndarray
                Rectified keypoints in both images, shape (n, 3).
        """

        # transform matches into the rectified image
        pts1r = self.homogenize(self.transform_2D(self.homogenize(pts1).T, H1)).T
        pts2r = self.homogenize(self.transform_2D(self.homogenize(pts2).T, H2)).T

        return pts1r, pts2r



[docs]
    def rectifying_similarities(self, F):
        """
        Computes rectifying similarity transforms from an affine fundamental matrix.

        Based on Shimshoni's geometric interpretation of weak-perspective motion.

        Parameters
        ----------
        F : np.ndarray
            3x3 affine fundamental matrix.

        Returns
        -------
        tuple
            S1, S2 : np.ndarray
                Similarity transformations for image 1 and image 2.

        Raises
        ------
        AssertionError
            If F is not affine or not rank 2 or not 3x3.
        """

        # check that the input matrix is an affine fundamental matrix
        assert np.shape(F) == (3, 3)
        assert np.linalg.matrix_rank(F) == 2
        assert F[0, 0] == 0
        assert F[0, 1] == 0
        assert F[1, 0] == 0
        assert F[1, 1] == 0

        # notations, THE SAME CONVENTION as in H&Z
        c = F[2, 0]
        d = F[2, 1]
        a = F[0, 2]
        b = F[1, 2]
        e = F[2, 2]

        # rotations
        r = np.sqrt(a * a + b * b)
        s = np.sqrt(c * c + d * d)
        R2 = (1.0 / r) * np.array([[-b, a], [-a, -b]])
        R1 = (1.0 / s) * np.array([[d, -c], [c, d]])

        # zoom and translation
        z = s / r

        # compute translation from e and only for the image that is the reference
        # img 2 gets scaled to match img 1
        # (scaling is not divided onto both images and only applied to image 2)
        t = e / s

        # output similarities
        S1 = np.zeros((3, 3))
        S1[0:2, 0:2] = R1
        S1[1, 2] = 0
        S1[2, 2] = 1

        S2 = np.zeros((3, 3))
        S2[0:2, 0:2] = (1.0 / z) * R2
        S2[1, 2] = -t
        S2[2, 2] = 1

        if S2[0, 0] < 0:
            mir = np.array(np.mat("-1 -1 -1; -1 -1 -1; 1 1 1"))
            S1 = S1 * mir
            S2 = S2 * mir

        return S1, S2



[docs]
    def homogenize(self, m):
        """
        Converts 2D point coordinates to homogeneous coordinates.

        Parameters
        ----------
        m : np.ndarray
            2D coordinates, shape (n, 2) or (2, n)

        Returns
        -------
        np.ndarray
            Homogeneous coordinates, shape (n, 3) or (3, n) to match input format.
        """

        # transpose marker initialized as 0
        T = 0
        # transpose point array if of the form (nx2)
        if np.shape(m)[1] == 2 or np.shape(m)[1] == 3:
            m = np.transpose(m)
            T = 1
        # add ones
        i = np.shape(m)[1]
        ones = np.ones((1, i))
        mh = np.vstack((m, ones))
        # if array was transposed retranspose
        if T == 1:
            mh = np.transpose(mh)
        return mh

        

[docs]
    def transform_2D(self, pts, H):
        
        """
        Applies a homography to 2D homogeneous points.

        Parameters
        ----------
        pts : np.ndarray
            Homogeneous 2D points (3, n).
        H : np.ndarray
            Homography matrix (3, 3).

        Returns
        -------
        np.ndarray
            Transformed 2D points (2, n).
        """

        pts_T_un = H.dot(pts)
        pts_T = pts_T_un[0:2, :] / np.array((pts_T_un[2, :], pts_T_un[2, :]))
        return pts_T




[docs]
    def rectifying_rigid_transformations(self, F, pts1, pts2):
        """
        Computes rigid rectifying transformations from an affine F and matched points.

        Parameters
        ----------
        F : np.ndarray
            3x3 affine fundamental matrix.
        pts1 : np.ndarray
            3xn homogeneous point array from image 1.
        pts2 : np.ndarray
            3xn homogeneous point array from image 2.

        Returns
        -------
        tuple
            S1, S2 : np.ndarray
                Rigid rectifying transformation matrices for the two images.

        Raises
        ------
        AssertionError
            If F is not affine or not rank 2.
        Exception
            If rotation matrix signs cannot be resolved.
        """

        # check that the input matrix is an affine fundamental matrix
        assert np.shape(F) == (3, 3)
        assert np.linalg.matrix_rank(F) == 2
        assert F[0, 0] == 0
        assert F[0, 1] == 0
        assert F[1, 0] == 0
        assert F[1, 1] == 0

        # notations, same conventions as in H&Z
        c = F[2, 0]
        d = F[2, 1]
        a = F[0, 2]
        b = F[1, 2]
        # computing the translation by the transformed point correspondences, e
        # is actually not needed
        e = F[2, 2]
        if e < 0:
            F = -F

        # rotations
        s = np.sqrt(a * a + b * b)
        r = np.sqrt(c * c + d * d)
        if a < 0 and b < 0:
            R1 = (1.0 / r) * np.array([[d, -c], [c, d]])
            R2 = (1.0 / s) * np.array([[-b, a], [-a, -b]])
        elif c < 0 and d < 0:
            R1 = (1.0 / r) * np.array([[-d, c], [-c, -d]])
            R2 = (1.0 / s) * np.array([[b, -a], [a, b]])
        elif a < 0 and d < 0:
            R1 = (1.0 / r) * np.array([[-d, c], [-c, -d]])
            R2 = (1.0 / s) * np.array([[b, -a], [a, b]])
        elif b < 0 and c < 0:
            R1 = (1.0 / r) * np.array([[d, -c], [c, d]])
            R2 = (1.0 / s) * np.array([[-b, a], [-a, -b]])
        else:
            raise Exception("something is wrong with the rotation matrices sign")

        # rectifying transformation/rotation matrices without translation
        T1 = np.zeros((3, 3))
        T1[0:2, 0:2] = R1
        T1[1, 2] = 0
        T1[2, 2] = 1

        T2 = np.zeros((3, 3))
        T2[0:2, 0:2] = R2
        T2[1, 2] = 0
        T2[2, 2] = 1

        # determine the necessary translation to align the epipolar lines
        pts_t1 = T1.dot(pts1)
        pts_t2 = T2.dot(pts2)
        translation = pts_t1[1, :] - pts_t2[1, :]
        t = np.mean(translation)

        # rigid transformation matrices containing rotations and translations
        S1 = np.zeros((3, 3))
        S1[0:2, 0:2] = R1
        S1[1, 2] = 0
        S1[2, 2] = 1

        S2 = np.zeros((3, 3))
        S2[0:2, 0:2] = R2
        S2[1, 2] = t
        S2[2, 2] = 1

        return S1, S2



[docs]
    def transform_images(self, H1, H2, img1, img2):
        """
        Applies given homographies to images and aligns them using a shared bounding box.

        Parameters
        ----------
        H1 : np.ndarray
            3x3 homography for image 1.
        H2 : np.ndarray
            3x3 homography for image 2.
        img1 : np.ndarray
            Original image 1.
        img2 : np.ndarray
            Original image 2.

        Returns
        -------
        tuple
            rectified_img1, rectified_img2 : np.ndarray
                Warped images.
            Ht : np.ndarray
                Translation matrix used to shift both images into shared frame.
        """

        h, w, c = img1.shape
        h2, w2, c2 = img2.shape

        corners = np.float32([[0, 0], [0, h], [w, 0], [w, h]]).reshape(-1, 1, 2)
        corners2 = np.float32(
            [[0, 0], [0, h2], [w2, 0], [w2, h2]]).reshape(-1, 1, 2)

        cornersTl = cv.perspectiveTransform(corners, H1)
        cornersTr = cv.perspectiveTransform(corners2, H2)

        # Spaltenanzahl - min_wl
        uMin = int(np.floor(min(cornersTl[0, :, 0],
                                cornersTl[1, :, 0],
                                cornersTr[0, :, 0],
                                cornersTr[1, :, 0],
                                )
                            )
                )

        # max_w
        uMax = int(np.ceil(max(cornersTl[2, :, 0],
                            cornersTl[3, :, 0],
                            cornersTr[2, :, 0],
                            cornersTr[3, :, 0],
                            )
                        )
                )

        # Zeilenanzahl - min_hl
        vMin = int(np.floor(min(cornersTl[0, :, 1],
                                cornersTl[2, :, 1],
                                cornersTr[0, :, 1],
                                cornersTr[2, :, 1],
                                )
                            )
                )

        # max_hl
        vMax = int(np.ceil(max(cornersTl[1, :, 1],
                            cornersTl[3, :, 1],
                            cornersTr[1, :, 1],
                            cornersTr[3, :, 1],
                            )
                        )
                )

        t = [-uMin, -vMin]
        Ht = np.array([[1, 0, t[0]], [0, 1, t[1]], [0, 0, 1]])

        img_1T = cv.warpPerspective(
            img1, np.float32(Ht.dot(H1)), (uMax - uMin, vMax - vMin), flags=cv.INTER_LINEAR
        )

        img_2T = cv.warpPerspective(
            img2, np.float32(Ht.dot(H2)), (uMax - uMin, vMax - vMin), flags=cv.INTER_LINEAR
        )

        return img_1T, img_2T, np.float32(Ht)