cvl-robot's diary

研究ノート メモメモ https://github.com/dotchang/

Open3DのReconstruction systemをC++に移植(その1-2 make_fragment)

make_fragmentのc++移植の続き

opencv_pose_estimation.h

#pragma once

// Open3D: www.open3d.org
// The MIT License (MIT)
// See license file or visit www.open3d.org for details

#include <Core/Core.h>
#include <IO/IO.h>
#include <Visualization/Visualization.h>

#include <opencv/cv.h>

std::tuple<bool, Eigen::Matrix4d> pose_estimation(three::RGBDImage& source_rgbd_image, three::RGBDImage& target_rgbd_image,
	three::PinholeCameraIntrinsic intrinsic, bool debug_draw_correspondences);

std::tuple<bool, Eigen::Matrix4d, std::vector<int>&> estimate_3D_transform_RANSAC(std::vector<Eigen::Vector3d>& pts_xyz_s, std::vector<Eigen::Vector3d>& pts_xyz_t);

std::tuple<Eigen::Matrix3d, Eigen::Vector3d> estimate_3D_transform(std::vector<Eigen::Vector3d>& input_xyz_s, std::vector<Eigen::Vector3d>& input_xyz_t);

Eigen::Vector3d get_xyz_from_pts(cv::Point2d pts_row, cv::Mat& depth, double px, double py, double focal);

Eigen::Vector3d get_xyz_from_uv(double u, double v, double d, double px, double py, double focal);

opencv_pose_estimation.cpp

// following original code is tested with OpenCV 3.2.0 and Python2.7

#include "opencv_pose_estimation.h"

#include <random>
#include <Eigen/SVD>
#include <Eigen/LU>

#include <opencv/cv.h>
#include <opencv2/features2d.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/calib3d.hpp>


std::tuple<bool, Eigen::Matrix4d> pose_estimation(three::RGBDImage& source_rgbd_image, three::RGBDImage& target_rgbd_image,
	three::PinholeCameraIntrinsic pinhole_camera_intrinsic, bool debug_draw_correspondences)
{
	bool success = false;
	Eigen::Matrix4d trans = Eigen::Matrix4d::Identity();

	// transform double array to unit8 array
	cv::Mat color_cv_s_f(source_rgbd_image.color_.height_, source_rgbd_image.color_.width_, CV_32FC1);
	cv::Mat color_cv_t_f(target_rgbd_image.color_.height_, target_rgbd_image.color_.width_, CV_32FC1);
	memcpy((void*)(color_cv_s_f.data), (void*)(&source_rgbd_image.color_.data_[0]), source_rgbd_image.color_.width_ * source_rgbd_image.color_.height_ * source_rgbd_image.color_.num_of_channels_ * source_rgbd_image.color_.bytes_per_channel_);
	memcpy((void*)(color_cv_t_f.data), (void*)(&target_rgbd_image.color_.data_[0]), target_rgbd_image.color_.width_ * target_rgbd_image.color_.height_ * target_rgbd_image.color_.num_of_channels_ * target_rgbd_image.color_.bytes_per_channel_);

	cv::Mat color_cv_s;
	cv::Mat color_cv_t;
	color_cv_s_f.convertTo(color_cv_s, CV_8UC1, 255);
	color_cv_t_f.convertTo(color_cv_t, CV_8UC1, 255);

	int nfeatures = 100;
	float scaleFactor = 1.2f;
	int nlevels = 8;
	int edgeThreshold = 31;
	int firstLevel = 0;
	int WTA_K = 2;
	int scoreType = cv::ORB::HARRIS_SCORE;
	int patchSize = 31;
	cv::Ptr<cv::ORB> orb = cv::ORB::create(nfeatures, scaleFactor, nlevels, edgeThreshold,
		firstLevel, WTA_K, scoreType, patchSize); // to save time

	cv::Mat des_s, des_t;
	std::vector<cv::KeyPoint> kp_s, kp_t;
	orb->detect(color_cv_s, kp_s);
	orb->detect(color_cv_t, kp_t);
	orb->compute(color_cv_s, kp_s, des_s);
	orb->compute(color_cv_t, kp_t, des_t);
	if ((kp_s.size() == 0) || (kp_t.size() == 0)) {
		return std::tuple<bool, Eigen::Matrix4d>(success, trans);
	}

	cv::Ptr<cv::DescriptorMatcher> matcher = cv::DescriptorMatcher::create("BruteForce-Hamming");
	std::vector<cv::DMatch> dmatch;
	dmatch.clear();
	std::vector<cv::DMatch> dmatch12, dmatch21;

	matcher->match(des_s, des_t, dmatch12);
	matcher->match(des_t, des_s, dmatch21);
	// CrossCheck = True
	for (size_t i = 0; i < dmatch12.size(); ++i)
	{
		cv::DMatch m12 = dmatch12[i];
		cv::DMatch m21 = dmatch21[m12.trainIdx];

		if (m21.trainIdx == m12.queryIdx)
			dmatch.push_back(m12);
	}

	int count = static_cast<int>(dmatch.size());

	std::vector<cv::Point2d> pts_s;
	std::vector<cv::Point2d> pts_t;
	pts_s.clear();
	pts_t.clear();
	for (int i = 0; i < dmatch.size(); i++) {
		pts_t.push_back(kp_t[dmatch[i].trainIdx].pt);
		pts_s.push_back(kp_s[dmatch[i].queryIdx].pt);
	}

	// inlier points after initial BF matching
	if (debug_draw_correspondences) {
		cv::Mat result;
		drawMatches(color_cv_s, kp_s, color_cv_t, kp_t, dmatch, result);
		cv::imshow("Initial BF matching", result);
		cv::waitKey(1);
		// draw_correspondences();
	}

	double focal_input = (pinhole_camera_intrinsic.intrinsic_matrix_(0, 0) +
		pinhole_camera_intrinsic.intrinsic_matrix_(1, 1)) / 2.0;
	double pp_x = pinhole_camera_intrinsic.intrinsic_matrix_(0, 2);
	double pp_y = pinhole_camera_intrinsic.intrinsic_matrix_(1, 2);

	// Essential matrix is made for masking inliers
	std::vector<cv::Point2i> pts_s_int;
	std::vector<cv::Point2i> pts_t_int;
	for (int i = 0; i < pts_s.size(); i++) {
		pts_s_int.push_back(cv::Point2i(pts_s[i].x + 0.5, pts_s[i].y + 0.5));
	}
	for (int i = 0; i < pts_t.size(); i++) {
		pts_t_int.push_back(cv::Point2i(pts_t[i].x + 0.5, pts_t[i].y + 0.5));
	}
	cv::Mat E, mask;
	double focal = focal_input;
	cv::Point2d pp(pp_x, pp_y);
	int method = cv::RANSAC;
	double prob = 0.999;
	double threshold = 1.0;
	E = cv::findEssentialMat(pts_s_int, pts_t_int, focal,
		pp, method, prob, threshold, mask);
	if (mask.empty()) {
		return std::tuple<bool, Eigen::Matrix4d>(success, trans);
	}

	// inlier points after 5pt algorithm
	if (debug_draw_correspondences) {
		
	}

	// make 3D correspondences
	cv::Mat depth_s(source_rgbd_image.depth_.height_, source_rgbd_image.depth_.width_, CV_32FC1);
	cv::Mat depth_t(source_rgbd_image.depth_.height_, source_rgbd_image.depth_.width_, CV_32FC1);
	memcpy((void*)(depth_s.data), (void*)(&source_rgbd_image.depth_.data_[0]), source_rgbd_image.depth_.width_ * source_rgbd_image.depth_.height_ * source_rgbd_image.depth_.num_of_channels_ * source_rgbd_image.depth_.bytes_per_channel_);
	memcpy((void*)(depth_t.data), (void*)(&target_rgbd_image.depth_.data_[0]), target_rgbd_image.depth_.width_ * target_rgbd_image.depth_.height_ * target_rgbd_image.depth_.num_of_channels_ * target_rgbd_image.depth_.bytes_per_channel_);
	std::vector<Eigen::Vector3d> pts_xyz_s;
	std::vector<Eigen::Vector3d> pts_xyz_t;
	pts_xyz_s.clear();
	pts_xyz_t.clear();
	int cnt = 0;
	for (int i = 0; i < pts_s.size(); i++) {
		if (mask.at<int>(i)) {
			pts_xyz_s.push_back(get_xyz_from_pts(pts_s[i], depth_s,
				pp_x, pp_y, focal_input));
			pts_xyz_t.push_back(get_xyz_from_pts(pts_t[i], depth_t,
				pp_x, pp_y, focal_input));
			cnt = cnt + 1;
		}
	}

	auto ret = estimate_3D_transform_RANSAC(pts_xyz_s, pts_xyz_t);
	success = std::get<0>(ret);
	trans = std::get<1>(ret);
	std::vector<int> inlier_id_vec = std::get<2>(ret);

	if (debug_draw_correspondences)
	{

	}

	return std::tuple<bool, Eigen::Matrix4d>(success, trans);
}

std::tuple<bool, Eigen::Matrix4d, std::vector<int>&> estimate_3D_transform_RANSAC(std::vector<Eigen::Vector3d>& pts_xyz_s, std::vector<Eigen::Vector3d>& pts_xyz_t)
{
	int max_iter = 1000;
	double max_distance = 0.05;
	int n_sample = 5;
	int n_points = static_cast<int>(pts_xyz_s.size());
	Eigen::Matrix4d Transform_good = Eigen::Matrix4d::Identity();
	int max_inlier = n_sample;
	std::vector<int> inlier_vec_good;
	inlier_vec_good.clear();
	bool success = false;

	if (n_points < n_sample) {
		return std::tuple<bool, Eigen::Matrix4d, std::vector<int>&>(success, Transform_good, inlier_vec_good);
	}

	for (int i = 0; i < max_iter; i++) {
		std::vector<Eigen::Vector3d> sample_xyz_s;
		std::vector<Eigen::Vector3d> sample_xyz_t;

		// sampling
		std::random_device rnd;
		std::mt19937 mt(rnd());
		std::uniform_int_distribution<> randn(0, n_points);
		for (int j = 0; j < n_sample; j++) {
			int rand_idx = randn(mt);
			sample_xyz_s.push_back(pts_xyz_s[rand_idx]);
			sample_xyz_t.push_back(pts_xyz_t[rand_idx]);
		}
		auto ret = estimate_3D_transform(sample_xyz_s, sample_xyz_t);
		Eigen::Matrix3d R_approx = std::get<0>(ret);
		Eigen::Vector3d t_approx = std::get<1>(ret);

		// evaluation
		std::vector<Eigen::Vector3d> diff_mat;
		for (int j = 0; j < n_points; j++) {
			diff_mat.push_back(pts_xyz_t[j] - (R_approx * pts_xyz_s[j] + t_approx));
		}
		std::vector<double> diff;
		for (int j = 0; j < n_points; j++) {
			diff.push_back(diff_mat[j].norm());
		}
		int n_inlier = 0;
		for (int j = 0; j < diff.size(); j++) {
			double diff_iter = diff[j];
			if (diff_iter < max_distance) {
				n_inlier += 1;
			}
		}

		// note: diag(R_approx) > 0 prevents ankward transformation between
		// RGBD pair of relatively small amount of baseline.
		if ((n_inlier > max_inlier) && (R_approx.determinant() != 0.) &&
			(R_approx(0, 0) > 0) && (R_approx(1, 1) > 0) && (R_approx(2, 2) > 0))
		{
			for (int j = 0; j < 3; j++) {
				for (int k = 0; k < 3; k++) {
					Transform_good(j, k) = R_approx(j, k);
				}
				Transform_good(j, 3) = t_approx(j);
			}
			// std::cout << "Transform_good = \n" << Transform_good << std::endl;
			max_inlier = n_inlier;

			inlier_vec_good.clear();
			for (int id_iter = 0; id_iter < n_points; id_iter++) {
				double diff_iter = diff[id_iter];
				if (diff_iter < max_distance) {
					inlier_vec_good.push_back(id_iter);
				}
			}
			success = true;
		}
	}

	return std::tuple<bool, Eigen::Matrix4d, std::vector<int>&>(success, Transform_good, inlier_vec_good);
}

// signular value decomposition approach
// based on the description in the sec 3.1.2 in
// http::/graphics.stanford.edu/~smr/ICP/comparison/eggert_comparison_mva97.pdf
std::tuple<Eigen::Matrix3d, Eigen::Vector3d> estimate_3D_transform(std::vector<Eigen::Vector3d>& input_xyz_s, std::vector<Eigen::Vector3d>& input_xyz_t)
{
	// compute H
	int n_points = static_cast<int>(input_xyz_s.size());
	Eigen::Vector3d mean_s(0., 0., 0.);
	for (int i = 0; i < input_xyz_s.size(); i++) {
		mean_s += input_xyz_s[i];
	}
	mean_s /= n_points;
	Eigen::Vector3d mean_t(0., 0., 0.);
	for (int i = 0; i < input_xyz_t.size(); i++) {
		mean_t += input_xyz_t[i];
	}
	mean_t /= n_points;

	Eigen::MatrixXd xyz_diff_s(3, input_xyz_s.size());
	for (int i = 0; i < input_xyz_s.size(); i++) {
		for (int j = 0; j < 3; j++) {
			xyz_diff_s(j, i) = input_xyz_s[i](j) - mean_s(j);
		}
	}
	Eigen::MatrixXd xyz_diff_t(3, input_xyz_t.size());
	for (int i = 0; i < input_xyz_t.size(); i++) {
		for (int j = 0; j < 3; j++) {
			xyz_diff_t(j, i) = input_xyz_t[i](j) - mean_t(j);
		}
	}

	Eigen::MatrixXd H;
	H.resize(3, 3);
	H = xyz_diff_s * xyz_diff_t.transpose();

	// solve system
	Eigen::JacobiSVD<Eigen::MatrixXd> svd(H, Eigen::ComputeFullU | Eigen::ComputeFullV);
	Eigen::Matrix3d R_approx = svd.matrixV().transpose() * svd.matrixU().transpose();
	if (R_approx.determinant() < 0.0) {
		double det = (svd.matrixU() * svd.matrixV()).determinant();
		Eigen::Matrix3d D = Eigen::Matrix3d::Identity();
		D(2, 2) = det;
		R_approx = svd.matrixU() * (D * svd.matrixV());
	}
	Eigen::Vector3d t_approx = mean_t - (R_approx * mean_s);
	return std::tuple<Eigen::Matrix3d, Eigen::Vector3d>(R_approx, t_approx);
}

Eigen::Vector3d get_xyz_from_pts(cv::Point2d pts_row, cv::Mat& depth, double px, double py, double focal)
{
	double u = pts_row.x;
	double v = pts_row.y;
	int u0 = int(u);
	int v0 = int(v);
	int height = depth.rows;
	int width = depth.cols;
	// bilinear depth interpolation
	if ((u0 > 0) && (u0 < width - 1) && (v0 > 0) && (v0 < height - 1)) {
		double up = pts_row.x - u0;
		double vp = pts_row.y - v0;
		double d0 = depth.at<double>(v0, u0);
		double d1 = depth.at<double>(v0, u0 + 1);
		double d2 = depth.at<double>(v0 + 1, u0);
		double d3 = depth.at<double>(v0 + 1, u0);
		double d = (1 - vp) * (d1 * up + d0 * (1 - up)) + vp * (d3 * up + d2 * (1 - up));
		return get_xyz_from_uv(u, v, d, px, py, focal);
	}
	return Eigen::Vector3d(0., 0., 0.);
}

Eigen::Vector3d get_xyz_from_uv(double u, double v, double d, double px, double py, double focal)
{
	double x, y;
	if (focal != 0.) {
		x = (u - px) / focal + d;
		y = (v - py) / focal + d;
	}
	else {
		x = 0.;
		y = 0.;
	}
	return Eigen::Vector3d(x, y, d).transpose();
}

optimize_posegraph.h

#pragma once

// Open3D: www.open3d.org
// The MIT License (MIT)
// See license file or visit www.open3d.org for details

#include <Core/Core.h>
#include <IO/IO.h>
#include <Visualization/Visualization.h>

void optimize_posegraph_for_fragment(std::string path_dataset, int fragment_id);
void optimize_posegraph_for_scene(std::string path_dataset);

optimize_posegraph.cpp

#include "optimize_posegraph.h"

#include <Core/Registration/PoseGraph.h>
#include <Core/Registration/GlobalOptimizationMethod.h>
#include <Core/Registration/GlobalOptimizationConvergenceCriteria.h>
#include <Core/Registration/GlobalOptimization.h>

#include <IO/ClassIO/PoseGraphIO.h>

extern std::string template_fragment_posegraph;
extern std::string template_fragment_posegraph_optimized;
extern std::string template_global_posegraph;
extern std::string template_global_posegraph_optimized;

void run_posegraph_optimization(std::string pose_graph_name, std::string pose_graph_optimized_name,
	double max_correspondence_distance, double preference_loop_closure)
{
	// to display messages from global optimization
	three::SetVerbosityLevel(three::VerbosityLevel::VerboseDebug);
	const three::GlobalOptimizationMethod & method = three::GlobalOptimizationLevenbergMarquardt();
	const three::GlobalOptimizationConvergenceCriteria &criteria = three::GlobalOptimizationConvergenceCriteria();
	const three::GlobalOptimizationOption &option = three::GlobalOptimizationOption(max_correspondence_distance,
		0.25,
		preference_loop_closure,
		0);
	three::PoseGraph pose_graph;
	//three::ReadPoseGraph(pose_graph_name, pose_graph); // Read PoseGraph failed: unknown file extension.
	three::ReadIJsonConvertibleFromJSON(pose_graph_name, pose_graph);
	three::GlobalOptimization(pose_graph, method, criteria, option);
	//three::WritePoseGraph(pose_graph_optimized_name, pose_graph); // Write PoseGraph failed: unknown file extension.
	three::WriteIJsonConvertibleToJSON(pose_graph_optimized_name, pose_graph);
	three::SetVerbosityLevel(three::VerbosityLevel::VerboseError);
}

void optimize_posegraph_for_fragment(std::string path_dataset, int fragment_id)
{
	std::string pose_graph_name(path_dataset + template_fragment_posegraph);
	sprintf((char*)pose_graph_name.data(), (path_dataset + template_fragment_posegraph).c_str(), fragment_id);
	std::string pose_graph_optimized_name(path_dataset + template_fragment_posegraph_optimized);
	sprintf((char*)pose_graph_optimized_name.data(), (path_dataset + template_fragment_posegraph_optimized).c_str(), fragment_id);

	run_posegraph_optimization(pose_graph_name, pose_graph_optimized_name,
		0.07,
		0.1);
}

void optimize_posegraph_for_scene(std::string path_dataset)
{
	std::string pose_graph_name(path_dataset + template_global_posegraph);
	std::string pose_graph_optimized_name(path_dataset +
		template_global_posegraph_optimized);
	double max_correspondence_distance = 0.07;
	double preference_loop_closure = 2.0;
	run_posegraph_optimization(pose_graph_name, pose_graph_optimized_name,
		max_correspondence_distance,
		preference_loop_closure);
}
注意点
  • ファイル入出力だけにopenFrameworksの関数を使っています。ofDirなど。代替のライブラリがあれば、そちらに書き換えてください。
  • ReadPoseGraph, WritePoseGraphなどファイルの入出力のユーティリティ関数において、拡張子の照合が上手くいかなくて正しく動作しません。代わりに、拡張子を特定してWriteJsonConvertibleToJSONなどを直接呼んでいます。
  • デバッグ用の表示関数を省略しています。つまり、ちゃんとデバッグできていない可能性があります。
  • Eigenを使い慣れてないせいで無駄にstd::vectorと組み合わせたり、汚い書き方していますがご容赦を。もしくはきれいに書き換えてください。

今日のそうめん

そうめんは古くなるほどおいしくなるそうなので、もっと大きなやつもオススメ。
store.shopping.yahoo.co.jp