From 9a9d172d59e922b074956f32cc7f6ebea0819fc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Sch=C3=B6nberger?= <joschonb@microsoft.com>
Date: Wed, 12 Jul 2023 22:55:37 +0300
Subject: [PATCH] Refactor sim3

---
 src/colmap/base/reconstruction.cc             | 124 ++++++++++-
 src/colmap/base/reconstruction.h              | 115 ----------
 src/colmap/base/synthetic.cc                  |   1 +
 src/colmap/estimators/absolute_pose_test.cc   |   8 +-
 .../estimators/coordinate_frame_test.cc       |   6 +-
 .../generalized_absolute_pose_test.cc         |   5 +-
 .../generalized_relative_pose_test.cc         |   5 +-
 src/colmap/exe/model.cc                       |  39 ++--
 src/colmap/geometry/similarity_transform.cc   | 196 +++++++++---------
 src/colmap/geometry/similarity_transform.h    |  54 +++--
 .../geometry/similarity_transform_test.cc     |  99 ++++-----
 src/colmap/optim/loransac_test.cc             |   3 +-
 src/colmap/optim/ransac_test.cc               |   3 +-
 13 files changed, 306 insertions(+), 352 deletions(-)

diff --git a/src/colmap/base/reconstruction.cc b/src/colmap/base/reconstruction.cc
index 7004a3e6de..c773a8ac2d 100644
--- a/src/colmap/base/reconstruction.cc
+++ b/src/colmap/base/reconstruction.cc
@@ -32,11 +32,13 @@
 #include "colmap/base/reconstruction.h"
 
 #include "colmap/base/database_cache.h"
+#include "colmap/estimators/similarity_transform.h"
 #include "colmap/geometry/gps.h"
 #include "colmap/geometry/pose.h"
 #include "colmap/geometry/projection.h"
 #include "colmap/geometry/triangulation.h"
 #include "colmap/image/bitmap.h"
+#include "colmap/optim/loransac.h"
 #include "colmap/util/misc.h"
 #include "colmap/util/ply.h"
 
@@ -432,7 +434,7 @@ void Reconstruction::Transform(const SimilarityTransform3& tform) {
     tform.TransformPose(&image.second.Qvec(), &image.second.Tvec());
   }
   for (auto& point3D : points3D_) {
-    tform.TransformPoint(&point3D.second.XYZ());
+    point3D.second.XYZ() = tform * point3D.second.XYZ();
   }
 }
 
@@ -539,8 +541,7 @@ bool Reconstruction::Merge(const Reconstruction& reconstruction,
         (new_track.Length() + old_track.Length()) >= 2 &&
         old_point3D_ids.size() == 1;
     if (create_new_point || merge_new_and_old_point) {
-      Eigen::Vector3d xyz = point3D.second.XYZ();
-      alignment.TransformPoint(&xyz);
+      const Eigen::Vector3d xyz = alignment * point3D.second.XYZ();
       const auto point3D_id =
           AddPoint3D(xyz, new_track, point3D.second.Color());
       if (old_point3D_ids.size() == 1) {
@@ -1265,13 +1266,11 @@ void Reconstruction::ExportVRML(const std::string& images_path,
     images_file << " coord Coordinate {\n";
     images_file << " point [\n";
 
-    Eigen::Transform<double, 3, Eigen::Affine> transform;
-    transform.matrix().topLeftCorner<3, 4>() =
-        image.second.InverseProjectionMatrix();
-
     // Move camera base model to camera pose.
+    const Eigen::Matrix3x4d worldFromCam =
+        image.second.InverseProjectionMatrix();
     for (size_t i = 0; i < points.size(); i++) {
-      const Eigen::Vector3d point = transform * points[i];
+      const Eigen::Vector3d point = worldFromCam * points[i].homogeneous();
       images_file << point(0) << " " << point(1) << " " << point(2) << "\n";
     }
 
@@ -2174,4 +2173,113 @@ void Reconstruction::ResetTriObservations(const image_t image_id,
   }
 }
 
+bool Reconstruction::Align(const std::vector<std::string>& image_names,
+                           const std::vector<Eigen::Vector3d>& locations,
+                           const int min_common_images,
+                           SimilarityTransform3* tform) {
+  CHECK_GE(min_common_images, 3);
+  CHECK_EQ(image_names.size(), locations.size());
+
+  // Find out which images are contained in the reconstruction and get the
+  // positions of their camera centers.
+  std::unordered_set<image_t> common_image_ids;
+  std::vector<Eigen::Vector3d> src;
+  std::vector<Eigen::Vector3d> dst;
+  for (size_t i = 0; i < image_names.size(); ++i) {
+    const class Image* image = FindImageWithName(image_names[i]);
+    if (image == nullptr) {
+      continue;
+    }
+
+    if (!IsImageRegistered(image->ImageId())) {
+      continue;
+    }
+
+    // Ignore duplicate images.
+    if (common_image_ids.count(image->ImageId()) > 0) {
+      continue;
+    }
+
+    common_image_ids.insert(image->ImageId());
+    src.push_back(image->ProjectionCenter());
+    dst.push_back(locations[i]);
+  }
+
+  // Only compute the alignment if there are enough correspondences.
+  if (common_image_ids.size() < static_cast<size_t>(min_common_images)) {
+    return false;
+  }
+
+  SimilarityTransform3 transform;
+  if (!transform.Estimate(src, dst)) {
+    return false;
+  }
+
+  Transform(transform);
+
+  if (tform != nullptr) {
+    *tform = transform;
+  }
+
+  return true;
+}
+
+bool Reconstruction::AlignRobust(const std::vector<std::string>& image_names,
+                                 const std::vector<Eigen::Vector3d>& locations,
+                                 const int min_common_images,
+                                 const RANSACOptions& ransac_options,
+                                 SimilarityTransform3* tform) {
+  CHECK_GE(min_common_images, 3);
+  CHECK_EQ(image_names.size(), locations.size());
+
+  // Find out which images are contained in the reconstruction and get the
+  // positions of their camera centers.
+  std::unordered_set<image_t> common_image_ids;
+  std::vector<Eigen::Vector3d> src;
+  std::vector<Eigen::Vector3d> dst;
+  for (size_t i = 0; i < image_names.size(); ++i) {
+    const class Image* image = FindImageWithName(image_names[i]);
+    if (image == nullptr) {
+      continue;
+    }
+
+    if (!IsImageRegistered(image->ImageId())) {
+      continue;
+    }
+
+    // Ignore duplicate images.
+    if (common_image_ids.count(image->ImageId()) > 0) {
+      continue;
+    }
+
+    common_image_ids.insert(image->ImageId());
+    src.push_back(image->ProjectionCenter());
+    dst.push_back(locations[i]);
+  }
+
+  // Only compute the alignment if there are enough correspondences.
+  if (common_image_ids.size() < static_cast<size_t>(min_common_images)) {
+    return false;
+  }
+
+  LORANSAC<SimilarityTransformEstimator<3, true>,
+           SimilarityTransformEstimator<3, true>>
+      ransac(ransac_options);
+
+  const auto report = ransac.Estimate(src, dst);
+
+  if (report.support.num_inliers < static_cast<size_t>(min_common_images)) {
+    return false;
+  }
+
+  SimilarityTransform3 transform = SimilarityTransform3(report.model);
+  Transform(transform);
+
+  if (tform != nullptr) {
+    *tform = transform;
+  }
+
+  return true;
+}
+
 }  // namespace colmap
diff --git a/src/colmap/base/reconstruction.h b/src/colmap/base/reconstruction.h
index b54cb54649..f57bce8688 100644
--- a/src/colmap/base/reconstruction.h
+++ b/src/colmap/base/reconstruction.h
@@ -37,9 +37,7 @@
 #include "colmap/base/point2d.h"
 #include "colmap/base/point3d.h"
 #include "colmap/base/track.h"
-#include "colmap/estimators/similarity_transform.h"
 #include "colmap/geometry/similarity_transform.h"
-#include "colmap/optim/loransac.h"
 #include "colmap/util/types.h"
 
 #include <memory>
@@ -208,14 +206,12 @@ class Reconstruction {
   // Align the given reconstruction with a set of pre-defined camera positions.
   // Assuming that locations[i] gives the 3D coordinates of the center
   // of projection of the image with name image_names[i].
-  template <bool kEstimateScale = true>
   bool Align(const std::vector<std::string>& image_names,
              const std::vector<Eigen::Vector3d>& locations,
              int min_common_images,
              SimilarityTransform3* tform = nullptr);
 
   // Robust alignment using RANSAC.
-  template <bool kEstimateScale = true>
   bool AlignRobust(const std::vector<std::string>& image_names,
                    const std::vector<Eigen::Vector3d>& locations,
                    int min_common_images,
@@ -536,115 +532,4 @@ bool Reconstruction::IsImageRegistered(const image_t image_id) const {
   return Image(image_id).IsRegistered();
 }
 
-template <bool kEstimateScale>
-bool Reconstruction::Align(const std::vector<std::string>& image_names,
-                           const std::vector<Eigen::Vector3d>& locations,
-                           const int min_common_images,
-                           SimilarityTransform3* tform) {
-  CHECK_GE(min_common_images, 3);
-  CHECK_EQ(image_names.size(), locations.size());
-
-  // Find out which images are contained in the reconstruction and get the
-  // positions of their camera centers.
-  std::unordered_set<image_t> common_image_ids;
-  std::vector<Eigen::Vector3d> src;
-  std::vector<Eigen::Vector3d> dst;
-  for (size_t i = 0; i < image_names.size(); ++i) {
-    const class Image* image = FindImageWithName(image_names[i]);
-    if (image == nullptr) {
-      continue;
-    }
-
-    if (!IsImageRegistered(image->ImageId())) {
-      continue;
-    }
-
-    // Ignore duplicate images.
-    if (common_image_ids.count(image->ImageId()) > 0) {
-      continue;
-    }
-
-    common_image_ids.insert(image->ImageId());
-    src.push_back(image->ProjectionCenter());
-    dst.push_back(locations[i]);
-  }
-
-  // Only compute the alignment if there are enough correspondences.
-  if (common_image_ids.size() < static_cast<size_t>(min_common_images)) {
-    return false;
-  }
-
-  SimilarityTransform3 transform;
-  if (!transform.Estimate<kEstimateScale>(src, dst)) {
-    return false;
-  }
-
-  Transform(transform);
-
-  if (tform != nullptr) {
-    *tform = transform;
-  }
-
-  return true;
-}
-
-template <bool kEstimateScale>
-bool Reconstruction::AlignRobust(const std::vector<std::string>& image_names,
-                                 const std::vector<Eigen::Vector3d>& locations,
-                                 const int min_common_images,
-                                 const RANSACOptions& ransac_options,
-                                 SimilarityTransform3* tform) {
-  CHECK_GE(min_common_images, 3);
-  CHECK_EQ(image_names.size(), locations.size());
-
-  // Find out which images are contained in the reconstruction and get the
-  // positions of their camera centers.
-  std::unordered_set<image_t> common_image_ids;
-  std::vector<Eigen::Vector3d> src;
-  std::vector<Eigen::Vector3d> dst;
-  for (size_t i = 0; i < image_names.size(); ++i) {
-    const class Image* image = FindImageWithName(image_names[i]);
-    if (image == nullptr) {
-      continue;
-    }
-
-    if (!IsImageRegistered(image->ImageId())) {
-      continue;
-    }
-
-    // Ignore duplicate images.
-    if (common_image_ids.count(image->ImageId()) > 0) {
-      continue;
-    }
-
-    common_image_ids.insert(image->ImageId());
-    src.push_back(image->ProjectionCenter());
-    dst.push_back(locations[i]);
-  }
-
-  // Only compute the alignment if there are enough correspondences.
-  if (common_image_ids.size() < static_cast<size_t>(min_common_images)) {
-    return false;
-  }
-
-  LORANSAC<SimilarityTransformEstimator<3, kEstimateScale>,
-           SimilarityTransformEstimator<3, kEstimateScale>>
-      ransac(ransac_options);
-
-  const auto report = ransac.Estimate(src, dst);
-
-  if (report.support.num_inliers < static_cast<size_t>(min_common_images)) {
-    return false;
-  }
-
-  SimilarityTransform3 transform = SimilarityTransform3(report.model);
-  Transform(transform);
-
-  if (tform != nullptr) {
-    *tform = transform;
-  }
-
-  return true;
-}
-
 }  // namespace colmap
diff --git a/src/colmap/base/synthetic.cc b/src/colmap/base/synthetic.cc
index 454ab9640a..7f2cb85fae 100644
--- a/src/colmap/base/synthetic.cc
+++ b/src/colmap/base/synthetic.cc
@@ -33,6 +33,7 @@
 
 #include "colmap/geometry/essential_matrix.h"
 #include "colmap/geometry/pose.h"
+#include "colmap/geometry/projection.h"
 #include "colmap/util/random.h"
 
 #include <Eigen/Geometry>
diff --git a/src/colmap/estimators/absolute_pose_test.cc b/src/colmap/estimators/absolute_pose_test.cc
index 782b03add6..00e479e732 100644
--- a/src/colmap/estimators/absolute_pose_test.cc
+++ b/src/colmap/estimators/absolute_pose_test.cc
@@ -70,9 +70,7 @@ TEST(AbsolutePose, P3P) {
       // Project points to camera coordinate system.
       std::vector<Eigen::Vector2d> points2D;
       for (size_t i = 0; i < points3D.size(); ++i) {
-        Eigen::Vector3d point3D_camera = points3D[i];
-        orig_tform.TransformPoint(&point3D_camera);
-        points2D.push_back(point3D_camera.hnormalized());
+        points2D.push_back((orig_tform * points3D[i]).hnormalized());
       }
 
       RANSACOptions options;
@@ -132,9 +130,7 @@ TEST(AbsolutePose, EPNP) {
       // Project points to camera coordinate system.
       std::vector<Eigen::Vector2d> points2D;
       for (size_t i = 0; i < points3D.size(); ++i) {
-        Eigen::Vector3d point3D_camera = points3D[i];
-        orig_tform.TransformPoint(&point3D_camera);
-        points2D.push_back(point3D_camera.hnormalized());
+        points2D.push_back((orig_tform * points3D[i]).hnormalized());
       }
 
       RANSACOptions options;
diff --git a/src/colmap/estimators/coordinate_frame_test.cc b/src/colmap/estimators/coordinate_frame_test.cc
index f0e4925a20..8d77e1c5ca 100644
--- a/src/colmap/estimators/coordinate_frame_test.cc
+++ b/src/colmap/estimators/coordinate_frame_test.cc
@@ -102,11 +102,11 @@ TEST(CoordinateFrame, AlignToPrincipalPlane) {
                 .norm(),
             1e-6);
   // Verify that transform matrix does shuffling of axes
-  Eigen::Matrix4d mat;
+  Eigen::Matrix3x4d mat;
   if (inverted) {
-    mat << 0, -1, 0, 0, 0, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 1;
+    mat << 0, -1, 0, 0, 0, 0, -1, 0, 1, 0, 0, 0;
   } else {
-    mat << 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1;
+    mat << 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0;
   }
   std::cout << tform.Matrix() << std::endl;
   EXPECT_LE((tform.Matrix() - mat).norm(), 1e-6);
diff --git a/src/colmap/estimators/generalized_absolute_pose_test.cc b/src/colmap/estimators/generalized_absolute_pose_test.cc
index d82e210dfe..71f04138ba 100644
--- a/src/colmap/estimators/generalized_absolute_pose_test.cc
+++ b/src/colmap/estimators/generalized_absolute_pose_test.cc
@@ -94,11 +94,10 @@ TEST(GeneralizedAbsolutePose, Estimate) {
       // Project points to camera coordinate system.
       std::vector<GP3PEstimator::X_t> points2D;
       for (size_t i = 0; i < points3D.size(); ++i) {
-        Eigen::Vector3d point3D_camera = points3D[i];
-        orig_tforms[i % kNumTforms].TransformPoint(&point3D_camera);
         points2D.emplace_back();
         points2D.back().rel_tform = rel_tforms[i % kNumTforms];
-        points2D.back().xy = point3D_camera.hnormalized();
+        points2D.back().xy =
+            (orig_tforms[i % kNumTforms] * points3D[i]).hnormalized();
       }
 
       RANSACOptions options;
diff --git a/src/colmap/estimators/generalized_relative_pose_test.cc b/src/colmap/estimators/generalized_relative_pose_test.cc
index 26f349e3d0..5284a71a98 100644
--- a/src/colmap/estimators/generalized_relative_pose_test.cc
+++ b/src/colmap/estimators/generalized_relative_pose_test.cc
@@ -92,9 +92,8 @@ TEST(GeneralizedRelativePose, Estimate) {
       for (size_t i = 0; i < points3D.size(); ++i) {
         const Eigen::Vector3d point3D_camera1 =
             rel_tforms[i % kNumTforms] * points3D[i].homogeneous();
-        Eigen::Vector3d point3D_camera2 = points3D[i];
-        orig_tforms[(i + 1) % kNumTforms].TransformPoint(&point3D_camera2);
-
+        const Eigen::Vector3d point3D_camera2 =
+            orig_tforms[(i + 1) % kNumTforms] * points3D[i];
         if (point3D_camera1.z() < 0 || point3D_camera2.z() < 0) {
           continue;
         }
diff --git a/src/colmap/exe/model.cc b/src/colmap/exe/model.cc
index cfdc39be9d..c6f023c0c4 100644
--- a/src/colmap/exe/model.cc
+++ b/src/colmap/exe/model.cc
@@ -72,9 +72,9 @@ Eigen::Vector3d TransformLatLonAltToModelCoords(
   // set the altitude to 0 when converting from LLA to ECEF and then we use the
   // altitude at the end, after scaling, to set it as the z coordinate in the
   // ENU frame.
-  Eigen::Vector3d xyz = GPSTransform(GPSTransform::WGS84)
-                            .EllToXYZ({Eigen::Vector3d(lat, lon, 0.0)})[0];
-  tform.TransformPoint(&xyz);
+  Eigen::Vector3d xyz =
+      tform * GPSTransform(GPSTransform::WGS84)
+                  .EllToXYZ({Eigen::Vector3d(lat, lon, 0.0)})[0];
   xyz(2) = tform.Scale() * alt;
   return xyz;
 }
@@ -270,7 +270,6 @@ int RunModelAligner(int argc, char** argv) {
   std::string alignment_type = "custom";
   int min_common_images = 3;
   bool robust_alignment = true;
-  bool estimate_scale = true;
   RANSACOptions ransac_options;
 
   OptionManager options;
@@ -286,7 +285,6 @@ int RunModelAligner(int argc, char** argv) {
       &alignment_type,
       "{plane, ecef, enu, enu-plane, enu-plane-unscaled, custom}");
   options.AddDefaultOption("min_common_images", &min_common_images);
-  options.AddDefaultOption("estimate_scale", &estimate_scale);
   options.AddDefaultOption("robust_alignment", &robust_alignment);
   options.AddDefaultOption("robust_alignment_max_error",
                            &ransac_options.max_error);
@@ -357,28 +355,15 @@ int RunModelAligner(int argc, char** argv) {
                               ref_image_names.size())
               << std::endl;
 
-    if (estimate_scale) {
-      if (robust_alignment) {
-        alignment_success = reconstruction.AlignRobust(ref_image_names,
-                                                       ref_locations,
-                                                       min_common_images,
-                                                       ransac_options,
-                                                       &tform);
-      } else {
-        alignment_success = reconstruction.Align(
-            ref_image_names, ref_locations, min_common_images, &tform);
-      }
+    if (robust_alignment) {
+      alignment_success = reconstruction.AlignRobust(ref_image_names,
+                                                     ref_locations,
+                                                     min_common_images,
+                                                     ransac_options,
+                                                     &tform);
     } else {
-      if (robust_alignment) {
-        alignment_success = reconstruction.AlignRobust<false>(ref_image_names,
-                                                              ref_locations,
-                                                              min_common_images,
-                                                              ransac_options,
-                                                              &tform);
-      } else {
-        alignment_success = reconstruction.Align<false>(
-            ref_image_names, ref_locations, min_common_images, &tform);
-      }
+      alignment_success = reconstruction.Align(
+          ref_image_names, ref_locations, min_common_images, &tform);
     }
 
     std::vector<double> errors;
@@ -434,7 +419,7 @@ int RunModelAligner(int argc, char** argv) {
     std::cout << "=> Alignment succeeded" << std::endl;
     reconstruction.Write(output_path);
     if (!transform_path.empty()) {
-      tform.Write(transform_path);
+      tform.ToFile(transform_path);
     }
     return EXIT_SUCCESS;
   } else {
diff --git a/src/colmap/geometry/similarity_transform.cc b/src/colmap/geometry/similarity_transform.cc
index 45db597900..d799715f7b 100644
--- a/src/colmap/geometry/similarity_transform.cc
+++ b/src/colmap/geometry/similarity_transform.cc
@@ -53,12 +53,12 @@ struct ReconstructionAlignmentEstimator {
     max_squared_reproj_error_ = max_reproj_error * max_reproj_error;
   }
 
-  void SetReconstructions(const Reconstruction* reconstruction1,
-                          const Reconstruction* reconstruction2) {
-    CHECK_NOTNULL(reconstruction1);
-    CHECK_NOTNULL(reconstruction2);
-    reconstruction1_ = reconstruction1;
-    reconstruction2_ = reconstruction2;
+  void SetReconstructions(const Reconstruction* src_reconstruction,
+                          const Reconstruction* tgt_reconstruction) {
+    CHECK_NOTNULL(src_reconstruction);
+    CHECK_NOTNULL(tgt_reconstruction);
+    src_reconstruction_ = src_reconstruction;
+    tgt_reconstruction_ = tgt_reconstruction;
   }
 
   // Estimate 3D similarity transform from corresponding projection centers.
@@ -76,10 +76,12 @@ struct ReconstructionAlignmentEstimator {
       proj_centers2[i] = tgt_images[i]->ProjectionCenter();
     }
 
-    SimilarityTransform3 tform12;
-    tform12.Estimate(proj_centers1, proj_centers2);
+    SimilarityTransform3 tgtFromSrc;
+    if (tgtFromSrc.Estimate(proj_centers1, proj_centers2)) {
+      return {tgtFromSrc.Matrix()};
+    }
 
-    return {tform12.Matrix().topRows<3>()};
+    return {};
   }
 
   // For each image, determine the ratio of 3D points that correctly project
@@ -92,63 +94,69 @@ struct ReconstructionAlignmentEstimator {
                  const M_t& tgtFromSrc,
                  std::vector<double>* residuals) const {
     CHECK_EQ(src_images.size(), tgt_images.size());
-    CHECK_NOTNULL(reconstruction1_);
-    CHECK_NOTNULL(reconstruction2_);
+    CHECK_NOTNULL(src_reconstruction_);
+    CHECK_NOTNULL(tgt_reconstruction_);
 
     const Eigen::Matrix3x4d srcFromTgt =
-        SimilarityTransform3(tgtFromSrc).Inverse().Matrix().topRows<3>();
+        SimilarityTransform3(tgtFromSrc).Inverse().Matrix();
 
     residuals->resize(src_images.size());
 
     for (size_t i = 0; i < src_images.size(); ++i) {
-      const auto& image1 = *src_images[i];
-      const auto& image2 = *tgt_images[i];
+      const auto& src_image = *src_images[i];
+      const auto& tgt_image = *tgt_images[i];
 
-      CHECK_EQ(image1.ImageId(), image2.ImageId());
+      CHECK_EQ(src_image.ImageId(), tgt_image.ImageId());
 
-      const auto& camera1 = reconstruction1_->Camera(image1.CameraId());
-      const auto& camera2 = reconstruction2_->Camera(image2.CameraId());
+      const auto& src_camera =
+          src_reconstruction_->Camera(src_image.CameraId());
+      const auto& tgt_camera =
+          tgt_reconstruction_->Camera(tgt_image.CameraId());
 
-      const Eigen::Matrix3x4d proj_matrix1 = image1.ProjectionMatrix();
-      const Eigen::Matrix3x4d proj_matrix2 = image2.ProjectionMatrix();
+      const Eigen::Matrix3x4d tgt_proj_matrix = src_image.ProjectionMatrix();
+      const Eigen::Matrix3x4d src_proj_matrix = tgt_image.ProjectionMatrix();
 
-      CHECK_EQ(image1.NumPoints2D(), image2.NumPoints2D());
+      CHECK_EQ(src_image.NumPoints2D(), tgt_image.NumPoints2D());
 
       size_t num_inliers = 0;
       size_t num_common_points = 0;
 
-      for (point2D_t point2D_idx = 0; point2D_idx < image1.NumPoints2D();
+      for (point2D_t point2D_idx = 0; point2D_idx < src_image.NumPoints2D();
            ++point2D_idx) {
         // Check if both images have a 3D point.
 
-        const auto& point2D1 = image1.Point2D(point2D_idx);
-        if (!point2D1.HasPoint3D()) {
+        const auto& src_point2D = src_image.Point2D(point2D_idx);
+        if (!src_point2D.HasPoint3D()) {
           continue;
         }
 
-        const auto& point2D2 = image2.Point2D(point2D_idx);
-        if (!point2D2.HasPoint3D()) {
+        const auto& tgt_point2D = tgt_image.Point2D(point2D_idx);
+        if (!tgt_point2D.HasPoint3D()) {
           continue;
         }
 
         num_common_points += 1;
 
-        // Reproject 3D point in image 1 to image 2.
-        const Eigen::Vector3d xyz12 =
-            tgtFromSrc *
-            reconstruction1_->Point3D(point2D1.Point3DId()).XYZ().homogeneous();
-        if (CalculateSquaredReprojectionError(
-                point2D2.XY(), xyz12, proj_matrix2, camera2) >
+        const Eigen::Vector3d src_point_in_tgt =
+            tgtFromSrc * src_reconstruction_->Point3D(src_point2D.Point3DId())
+                             .XYZ()
+                             .homogeneous();
+        if (CalculateSquaredReprojectionError(tgt_point2D.XY(),
+                                              src_point_in_tgt,
+                                              src_proj_matrix,
+                                              tgt_camera) >
             max_squared_reproj_error_) {
           continue;
         }
 
-        // Reproject 3D point in image 2 to image 1.
-        const Eigen::Vector3d xyz21 =
-            srcFromTgt *
-            reconstruction2_->Point3D(point2D2.Point3DId()).XYZ().homogeneous();
-        if (CalculateSquaredReprojectionError(
-                point2D1.XY(), xyz21, proj_matrix1, camera1) >
+        const Eigen::Vector3d tgt_point_in_src =
+            srcFromTgt * tgt_reconstruction_->Point3D(tgt_point2D.Point3DId())
+                             .XYZ()
+                             .homogeneous();
+        if (CalculateSquaredReprojectionError(src_point2D.XY(),
+                                              tgt_point_in_src,
+                                              tgt_proj_matrix,
+                                              src_camera) >
             max_squared_reproj_error_) {
           continue;
         }
@@ -169,104 +177,92 @@ struct ReconstructionAlignmentEstimator {
 
  private:
   double max_squared_reproj_error_ = 0.0;
-  const Reconstruction* reconstruction1_ = nullptr;
-  const Reconstruction* reconstruction2_ = nullptr;
+  const Reconstruction* src_reconstruction_ = nullptr;
+  const Reconstruction* tgt_reconstruction_ = nullptr;
 };
 
 }  // namespace
 
 SimilarityTransform3::SimilarityTransform3()
-    : SimilarityTransform3(
-          1, ComposeIdentityQuaternion(), Eigen::Vector3d(0, 0, 0)) {}
-
-SimilarityTransform3::SimilarityTransform3(const Eigen::Matrix3x4d& matrix) {
-  transform_.matrix().topLeftCorner<3, 4>() = matrix;
-}
+    : matrix_(Eigen::Matrix3x4d::Identity()) {}
 
-SimilarityTransform3::SimilarityTransform3(
-    const Eigen::Transform<double, 3, Eigen::Affine>& transform)
-    : transform_(transform) {}
+SimilarityTransform3::SimilarityTransform3(const Eigen::Matrix3x4d& matrix)
+    : matrix_(matrix) {}
 
 SimilarityTransform3::SimilarityTransform3(const double scale,
                                            const Eigen::Vector4d& qvec,
                                            const Eigen::Vector3d& tvec) {
-  Eigen::Matrix4d matrix = Eigen::MatrixXd::Identity(4, 4);
-  matrix.topLeftCorner<3, 4>() = ComposeProjectionMatrix(qvec, tvec);
-  matrix.block<3, 3>(0, 0) *= scale;
-  transform_.matrix() = matrix;
-}
-
-void SimilarityTransform3::Write(const std::string& path) {
-  std::ofstream file(path, std::ios::trunc);
-  CHECK(file.is_open()) << path;
-  // Ensure that we don't loose any precision by storing in text.
-  file.precision(17);
-  file << transform_.matrix() << std::endl;
+  matrix_ = ComposeProjectionMatrix(qvec, tvec);
+  matrix_.leftCols<3>() *= scale;
 }
 
 SimilarityTransform3 SimilarityTransform3::Inverse() const {
-  return SimilarityTransform3(transform_.inverse());
+  const double scale = Scale();
+  Eigen::Matrix3x4d inverse;
+  inverse.leftCols<3>() = matrix_.leftCols<3>().transpose() / (scale * scale);
+  inverse.col(3) = inverse.leftCols<3>() * -matrix_.col(3);
+  return SimilarityTransform3(inverse);
 }
 
-void SimilarityTransform3::TransformPoint(Eigen::Vector3d* xyz) const {
-  *xyz = transform_ * *xyz;
+const Eigen::Matrix3x4d& SimilarityTransform3::Matrix() const {
+  return matrix_;
 }
 
-void SimilarityTransform3::TransformPose(Eigen::Vector4d* qvec,
-                                         Eigen::Vector3d* tvec) const {
-  // Projection matrix P1 projects 3D object points to image plane and thus to
-  // 2D image points in the source coordinate system:
-  //    x' = P1 * X1
-  // 3D object points can be transformed to the destination system by applying
-  // the similarity transformation S:
-  //    X2 = S * X1
-  // To obtain the projection matrix P2 that transforms the object point in the
-  // destination system to the 2D image points, which do not change:
-  //    x' = P2 * X2 = P2 * S * X1 = P1 * S^-1 * S * X1 = P1 * I * X1
-  // and thus:
-  //    P2' = P1 * S^-1
-  // Finally, undo the inverse scaling of the rotation matrix:
-  //    P2 = s * P2'
-
-  Eigen::Matrix4d src_matrix = Eigen::MatrixXd::Identity(4, 4);
-  src_matrix.topLeftCorner<3, 4>() = ComposeProjectionMatrix(*qvec, *tvec);
-  Eigen::Matrix4d dst_matrix =
-      src_matrix.matrix() * transform_.inverse().matrix();
-  dst_matrix *= Scale();
-
-  *qvec = RotationMatrixToQuaternion(dst_matrix.block<3, 3>(0, 0));
-  *tvec = dst_matrix.block<3, 1>(0, 3);
+double SimilarityTransform3::Scale() const { return matrix_.col(0).norm(); }
+
+Eigen::Vector4d SimilarityTransform3::Rotation() const {
+  return RotationMatrixToQuaternion(matrix_.leftCols<3>() / Scale());
 }
 
-Eigen::Matrix4d SimilarityTransform3::Matrix() const {
-  return transform_.matrix();
+Eigen::Vector3d SimilarityTransform3::Translation() const {
+  return matrix_.col(3);
 }
 
-double SimilarityTransform3::Scale() const {
-  return Matrix().block<1, 3>(0, 0).norm();
+bool SimilarityTransform3::Estimate(const std::vector<Eigen::Vector3d>& src,
+                                    const std::vector<Eigen::Vector3d>& tgt) {
+  const auto results =
+      SimilarityTransformEstimator<3, true>().Estimate(src, tgt);
+  if (results.empty()) {
+    return false;
+  }
+  CHECK_EQ(results.size(), 1);
+  matrix_ = results[0];
+  return true;
 }
 
-Eigen::Vector4d SimilarityTransform3::Rotation() const {
-  return RotationMatrixToQuaternion(Matrix().block<3, 3>(0, 0) / Scale());
+void SimilarityTransform3::TransformPose(Eigen::Vector4d* qvec,
+                                         Eigen::Vector3d* tvec) const {
+  Eigen::Matrix4d inverse;
+  inverse.topRows<3>() = Inverse().Matrix();
+  inverse.row(3) = Eigen::Vector4d(0, 0, 0, 1);
+  const Eigen::Matrix3x4d transformed =
+      ComposeProjectionMatrix(*qvec, *tvec) * inverse;
+  const double transformed_scale = transformed.col(0).norm();
+  *qvec =
+      RotationMatrixToQuaternion(transformed.leftCols<3>() / transformed_scale);
+  *tvec = transformed.col(3) / transformed_scale;
 }
 
-Eigen::Vector3d SimilarityTransform3::Translation() const {
-  return Matrix().block<3, 1>(0, 3);
+void SimilarityTransform3::ToFile(const std::string& path) const {
+  std::ofstream file(path, std::ios::trunc);
+  CHECK(file.good()) << path;
+  // Ensure that we don't loose any precision by storing in text.
+  file.precision(17);
+  file << matrix_ << std::endl;
 }
 
 SimilarityTransform3 SimilarityTransform3::FromFile(const std::string& path) {
   std::ifstream file(path);
-  CHECK(file.is_open()) << path;
+  CHECK(file.good()) << path;
 
-  Eigen::Matrix4d matrix = Eigen::MatrixXd::Identity(4, 4);
+  Eigen::Matrix3x4d matrix;
   for (int i = 0; i < matrix.rows(); ++i) {
     for (int j = 0; j < matrix.cols(); ++j) {
       file >> matrix(i, j);
     }
   }
-  SimilarityTransform3 tform;
-  tform.transform_.matrix() = matrix;
-  return tform;
+
+  return SimilarityTransform3(matrix);
 }
 
 bool ComputeAlignmentBetweenReconstructions(
diff --git a/src/colmap/geometry/similarity_transform.h b/src/colmap/geometry/similarity_transform.h
index bbedde965f..3abc21ed60 100644
--- a/src/colmap/geometry/similarity_transform.h
+++ b/src/colmap/geometry/similarity_transform.h
@@ -31,7 +31,6 @@
 
 #pragma once
 
-#include "colmap/estimators/similarity_transform.h"
 #include "colmap/util/types.h"
 
 #include <vector>
@@ -41,43 +40,48 @@
 
 namespace colmap {
 
-struct RANSACOptions;
 class Reconstruction;
 
-// 3D similarity transformation with 7 degrees of freedom.
+// 3D similarity transform with 7 degrees of freedom.
+// Transforms point x from a to b as: x_in_b = scale * R * x_in_a + t.
 class SimilarityTransform3 {
  public:
+  // Default construct identity transform.
   SimilarityTransform3();
 
+  // Construct from existing transform.
   explicit SimilarityTransform3(const Eigen::Matrix3x4d& matrix);
-
-  explicit SimilarityTransform3(
-      const Eigen::Transform<double, 3, Eigen::Affine>& transform);
-
   SimilarityTransform3(double scale,
                        const Eigen::Vector4d& qvec,
                        const Eigen::Vector3d& tvec);
 
-  void Write(const std::string& path);
-
-  template <bool kEstimateScale = true>
-  bool Estimate(const std::vector<Eigen::Vector3d>& src,
-                const std::vector<Eigen::Vector3d>& dst);
-
   SimilarityTransform3 Inverse() const;
 
-  void TransformPoint(Eigen::Vector3d* xyz) const;
-  void TransformPose(Eigen::Vector4d* qvec, Eigen::Vector3d* tvec) const;
+  // Matrix that transforms points as x_in_b = matrix * x_in_a.homogeneous().
+  const Eigen::Matrix3x4d& Matrix() const;
 
-  Eigen::Matrix4d Matrix() const;
+  // Transformation parameters.
   double Scale() const;
   Eigen::Vector4d Rotation() const;
   Eigen::Vector3d Translation() const;
 
+  // Estimate tgtFromSrc transform. Return true if successful.
+  bool Estimate(const std::vector<Eigen::Vector3d>& src,
+                const std::vector<Eigen::Vector3d>& tgt);
+
+  // Apply transform to point.
+  inline Eigen::Vector3d operator*(const Eigen::Vector3d& x) const;
+
+  // Transform world for camFromWorld pose.
+  // TODO(jsch): Rename and refactor with future RigidTransform class.
+  void TransformPose(Eigen::Vector4d* qvec, Eigen::Vector3d* tvec) const;
+
+  // Read from or write to text file without loss of precision.
+  void ToFile(const std::string& path) const;
   static SimilarityTransform3 FromFile(const std::string& path);
 
  private:
-  Eigen::Transform<double, 3, Eigen::Affine> transform_;
+  Eigen::Matrix3x4d matrix_;
 };
 
 // Robustly compute alignment between reconstructions by finding images that
@@ -119,19 +123,9 @@ std::vector<ImageAlignmentError> ComputeImageAlignmentError(
 // Implementation
 ////////////////////////////////////////////////////////////////////////////////
 
-template <bool kEstimateScale>
-bool SimilarityTransform3::Estimate(const std::vector<Eigen::Vector3d>& src,
-                                    const std::vector<Eigen::Vector3d>& dst) {
-  const auto results =
-      SimilarityTransformEstimator<3, kEstimateScale>().Estimate(src, dst);
-  if (results.empty()) {
-    return false;
-  }
-
-  CHECK_EQ(results.size(), 1);
-  transform_.matrix().topLeftCorner<3, 4>() = results[0];
-
-  return true;
+Eigen::Vector3d SimilarityTransform3::operator*(
+    const Eigen::Vector3d& x) const {
+  return matrix_ * x.homogeneous();
 }
 
 }  // namespace colmap
diff --git a/src/colmap/geometry/similarity_transform_test.cc b/src/colmap/geometry/similarity_transform_test.cc
index bb32ea2b92..dfab1880e1 100644
--- a/src/colmap/geometry/similarity_transform_test.cc
+++ b/src/colmap/geometry/similarity_transform_test.cc
@@ -32,6 +32,7 @@
 #include "colmap/geometry/similarity_transform.h"
 
 #include "colmap/geometry/pose.h"
+#include "colmap/util/testing.h"
 
 #include <fstream>
 
@@ -40,80 +41,72 @@
 
 namespace colmap {
 
-TEST(SimilarityTransform, Default) {
-  const SimilarityTransform3 tform;
+SimilarityTransform3 TestSimilarityTransform3() {
+  return SimilarityTransform3(1.23,
+                              NormalizeQuaternion(Eigen::Vector4d(1, 2, 3, 4)),
+                              Eigen::Vector3d(1, 2, 3));
+}
 
+TEST(SimilarityTransform3, Default) {
+  const SimilarityTransform3 tform;
   EXPECT_EQ(tform.Scale(), 1);
-
-  EXPECT_EQ(tform.Rotation()[0], 1);
-  EXPECT_EQ(tform.Rotation()[1], 0);
-  EXPECT_EQ(tform.Rotation()[2], 0);
-  EXPECT_EQ(tform.Rotation()[3], 0);
-
-  EXPECT_EQ(tform.Translation()[0], 0);
-  EXPECT_EQ(tform.Translation()[1], 0);
-  EXPECT_EQ(tform.Translation()[2], 0);
+  EXPECT_EQ(tform.Rotation(), ComposeIdentityQuaternion());
+  EXPECT_EQ(tform.Translation(), Eigen::Vector3d::Zero());
 }
 
-TEST(SimilarityTransform, Initialization) {
-  const Eigen::Vector4d qvec =
-      NormalizeQuaternion(Eigen::Vector4d(0.1, 0.3, 0.2, 0.4));
-
-  const SimilarityTransform3 tform(2, qvec, Eigen::Vector3d(100, 10, 0.5));
-
-  EXPECT_NEAR(tform.Scale(), 2, 1e-10);
-
-  EXPECT_NEAR(tform.Rotation()[0], qvec(0), 1e-10);
-  EXPECT_NEAR(tform.Rotation()[1], qvec(1), 1e-10);
-  EXPECT_NEAR(tform.Rotation()[2], qvec(2), 1e-10);
-  EXPECT_NEAR(tform.Rotation()[3], qvec(3), 1e-10);
-
-  EXPECT_NEAR(tform.Translation()[0], 100, 1e-10);
-  EXPECT_NEAR(tform.Translation()[1], 10, 1e-10);
-  EXPECT_NEAR(tform.Translation()[2], 0.5, 1e-10);
+TEST(SimilarityTransform3, Initialization) {
+  const SimilarityTransform3 tform = TestSimilarityTransform3();
+  const SimilarityTransform3 tform2(tform.Matrix());
+  EXPECT_EQ(tform.Scale(), tform2.Scale());
+  EXPECT_EQ(tform.Rotation(), tform2.Rotation());
+  EXPECT_EQ(tform.Translation(), tform2.Translation());
 }
 
 void TestEstimationWithNumCoords(const size_t num_coords) {
-  const SimilarityTransform3 orig_tform(
-      2, Eigen::Vector4d(0.1, 0.3, 0.2, 0.4), Eigen::Vector3d(100, 10, 0.5));
+  const SimilarityTransform3 original = TestSimilarityTransform3();
 
   std::vector<Eigen::Vector3d> src;
   std::vector<Eigen::Vector3d> dst;
-
   for (size_t i = 0; i < num_coords; ++i) {
     src.emplace_back(i, i + 2, i * i);
-    dst.push_back(src.back());
-    orig_tform.TransformPoint(&dst.back());
+    dst.push_back(original * src.back());
   }
 
-  SimilarityTransform3 est_tform;
-  EXPECT_TRUE(est_tform.Estimate(src, dst));
+  SimilarityTransform3 estimated;
+  EXPECT_TRUE(estimated.Estimate(src, dst));
+  EXPECT_TRUE((original.Matrix() - estimated.Matrix()).norm() < 1e-6);
+}
 
-  EXPECT_TRUE((orig_tform.Matrix() - est_tform.Matrix()).norm() < 1e-6);
+TEST(SimilarityTransform3, EstimateMinimal) { TestEstimationWithNumCoords(3); }
 
-  std::vector<Eigen::Vector3d> invalid_src_dst(3, Eigen::Vector3d::Zero());
-  EXPECT_FALSE(est_tform.Estimate(invalid_src_dst, invalid_src_dst));
+TEST(SimilarityTransform3, EstimateOverDetermined) {
+  TestEstimationWithNumCoords(100);
 }
 
-TEST(SimilarityTransform, Estimation) {
-  TestEstimationWithNumCoords(3);
-  TestEstimationWithNumCoords(100);
+TEST(SimilarityTransform3, EstimateDegenerate) {
+  std::vector<Eigen::Vector3d> invalid_src_dst(3, Eigen::Vector3d::Zero());
+  EXPECT_FALSE(
+      SimilarityTransform3().Estimate(invalid_src_dst, invalid_src_dst));
 }
 
-TEST(SimilarityTransform, FromFile) {
-  // Create transform file
-  const std::string path = "test_from_file_transform.txt";
-  {
-    std::ofstream out(path);
-    out << "0.0 2.0 0.0 3.0 0.0 0.0 2.0 4.0 2.0 0.0 0.0 5.0 0.0 0.0 0.0 1.0"
-        << std::endl;
+TEST(SimilarityTransform3, Inverse) {
+  const SimilarityTransform3 bFromA = TestSimilarityTransform3();
+  const SimilarityTransform3 aFromB = bFromA.Inverse();
+  for (int i = 0; i < 100; ++i) {
+    const Eigen::Vector3d x_in_a = Eigen::Vector3d::Random();
+    const Eigen::Vector3d x_in_b = bFromA * x_in_a;
+    EXPECT_LT((aFromB * x_in_b - x_in_a).norm(), 1e-6);
   }
-  SimilarityTransform3 tform = SimilarityTransform3::FromFile(path);
-  EXPECT_NEAR(tform.Scale(), 2.0, 1e-10);
-  EXPECT_LE((tform.Translation() - Eigen::Vector3d(3.0, 4.0, 5.0)).norm(),
-            1e-6);
-  EXPECT_LE((tform.Rotation() - Eigen::Vector4d(-0.5, 0.5, 0.5, 0.5)).norm(),
-            1e-6);
+}
+
+TEST(SimilarityTransform3, ToFromFile) {
+  const std::string path = CreateTestDir() + "/file.txt";
+  const SimilarityTransform3 written = TestSimilarityTransform3();
+  written.ToFile(path);
+  const SimilarityTransform3 read = SimilarityTransform3::FromFile(path);
+  EXPECT_EQ(written.Scale(), read.Scale());
+  EXPECT_EQ(written.Rotation(), read.Rotation());
+  EXPECT_EQ(written.Translation(), read.Translation());
 }
 
 }  // namespace colmap
diff --git a/src/colmap/optim/loransac_test.cc b/src/colmap/optim/loransac_test.cc
index ac53693545..411294ab94 100644
--- a/src/colmap/optim/loransac_test.cc
+++ b/src/colmap/optim/loransac_test.cc
@@ -67,8 +67,7 @@ TEST(LORANSAC, SimilarityTransform) {
   std::vector<Eigen::Vector3d> dst;
   for (size_t i = 0; i < num_samples; ++i) {
     src.emplace_back(i, std::sqrt(i) + 2, std::sqrt(2 * i + 2));
-    dst.push_back(src.back());
-    orig_tform.TransformPoint(&dst.back());
+    dst.push_back(orig_tform * src.back());
   }
 
   // Add some faulty data.
diff --git a/src/colmap/optim/ransac_test.cc b/src/colmap/optim/ransac_test.cc
index 33d15bbb6d..d22f96c68d 100644
--- a/src/colmap/optim/ransac_test.cc
+++ b/src/colmap/optim/ransac_test.cc
@@ -99,8 +99,7 @@ TEST(RANSAC, SimilarityTransform) {
   std::vector<Eigen::Vector3d> dst;
   for (size_t i = 0; i < num_samples; ++i) {
     src.emplace_back(i, std::sqrt(i) + 2, std::sqrt(2 * i + 2));
-    dst.push_back(src.back());
-    orig_tform.TransformPoint(&dst.back());
+    dst.push_back(orig_tform * src.back());
   }
 
   // Add some faulty data.