1. 原理介绍

对于图片1与图片2(连续的两帧), 我们首先对图片1提取GFTT角点,然后希望在图片2中追踪这些角点。设两张图分别为$I_1$, $I_2$, 第一张图中提取的角点为$P = {p_i}$, 其中$p_i = [x_i, y_i]^T$为像素坐标值。考虑到第$i$个点,我们希望计算$\Delta x_i, \Delta y_i$, 满足:

其中$W$ 为窗口大小,在本次实验中我们采取$8 \times 8$的窗口。上述最小二乘问题可以通过Gauss-Newton 求解。我们这里使用反向法,即使用$I_1(x_i, y_i)$处的梯度来代替$I_2(x_i + \Delta x_i, y_i + \Delta y_i)$处的梯度。

这样做的好处有:

  • $I_1(x_i, y_i)$是角点,梯度总会有意义。

  • $I_1(x_i, y_i)$处的梯度不随迭代改变,所以只需计算一次,就可以在后续的迭代中一直使用,节省了计算时间。

同时我们可以将上述过程推广至金字塔,使用较低分辨率层得到的结果来初始化高分辨率层。我们使用缩放倍率为2,共四层的图像金字塔。

2. 代码实现

使用的c++库有:Eigen,opencv. 代码实现如下:

#include <opencv2/opencv.hpp>
#include <string>
#include <Eigen/Core>
#include <Eigen/Dense>

using namespace std;
using namespace cv;

// this program shows how to use optical flow

string file_1 = "../1.png";  // first image
string file_2 = "../2.png";  // second image
/**
 * single level optical flow
 * @param [in] img1 the first image
 * @param [in] img2 the second image
 * @param [in] kp1 keypoints in img1
 * @param [in|out] kp2 keypoints in img2, if empty, use initial guess in kp1
 * @param [out] success true if a keypoint is tracked successfully
 * @param [in] inverse use inverse formulation?
 */
void OpticalFlowSingleLevel(
        const Mat &img1,
        const Mat &img2,
        const vector<KeyPoint> &kp1,
        vector<KeyPoint> &kp2,
        vector<bool> &success,
        bool inverse = false
);
/**
 * multi level optical flow, scale of pyramid is set to 2 by default
 * the image pyramid will be create inside the function
 * @param [in] img1 the first pyramid
 * @param [in] img2 the second pyramid
 * @param [in] kp1 keypoints in img1
 * @param [out] kp2 keypoints in img2
 * @param [out] success true if a keypoint is tracked successfully
 * @param [in] inverse set true to enable inverse formulation
 */
void OpticalFlowMultiLevel(
        const Mat &img1,
        const Mat &img2,
        const vector<KeyPoint> &kp1,
        vector<KeyPoint> &kp2,
        vector<bool> &success,
        bool inverse = false
);

/**
 * get a gray scale value from reference image (bi-linear interpolated)
 * @param img
 * @param x
 * @param y
 * @return
 */
inline float GetPixelValue(const cv::Mat &img, float x, float y) {
    uchar *data = &img.data[int(y) * img.step + int(x)];
    float xx = x - floor(x);
    float yy = y - floor(y);
    return float(
            (1 - xx) * (1 - yy) * data[0] +
            xx * (1 - yy) * data[1] +
            (1 - xx) * yy * data[img.step] +
            xx * yy * data[img.step + 1]
    );
}


int main(int argc, char **argv) {

    // images, note they are CV_8UC1, not CV_8UC3
    Mat img1 = imread(file_1, 0);
    Mat img2 = imread(file_2, 0);

    // key points, using GFTT here.
    vector<KeyPoint> kp1;
    Ptr<GFTTDetector> detector = GFTTDetector::create(500, 0.01, 20); // maximum 500 keypoints
    detector->detect(img1, kp1);

    // now lets track these key points in the second image
    // first use single level LK in the validation picture
    vector<KeyPoint> kp2_single;
    vector<bool> success_single;
    OpticalFlowSingleLevel(img1, img2, kp1, kp2_single, success_single);

    // then test multi-level LK
    vector<KeyPoint> kp2_multi;
    vector<bool> success_multi;
    OpticalFlowMultiLevel(img1, img2, kp1, kp2_multi, success_multi);

    // use opencv's flow for validation
    vector<Point2f> pt1, pt2;
    for (auto &kp: kp1) pt1.push_back(kp.pt);
    vector<uchar> status;
    vector<float> error;
    cv::calcOpticalFlowPyrLK(img1, img2, pt1, pt2, status, error, cv::Size(8, 8));

    // plot the differences of those functions
    Mat img2_single;
    cv::cvtColor(img2, img2_single, CV_GRAY2BGR);
    for (int i = 0; i < kp2_single.size(); i++) {
        if (success_single[i]) {
            cv::circle(img2_single, kp2_single[i].pt, 2, cv::Scalar(0, 250, 0), 2);
            cv::line(img2_single, kp1[i].pt, kp2_single[i].pt, cv::Scalar(0, 250, 0));
        }
    }

    Mat img2_multi;
    cv::cvtColor(img2, img2_multi, CV_GRAY2BGR);
    for (int i = 0; i < kp2_multi.size(); i++) {
        if (success_multi[i]) {
            cv::circle(img2_multi, kp2_multi[i].pt, 2, cv::Scalar(0, 250, 0), 2);
            cv::line(img2_multi, kp1[i].pt, kp2_multi[i].pt, cv::Scalar(0, 250, 0));
        }
    }

    Mat img2_CV;
    cv::cvtColor(img2, img2_CV, CV_GRAY2BGR);
    for (int i = 0; i < pt2.size(); i++) {
        if (status[i]) {
            cv::circle(img2_CV, pt2[i], 2, cv::Scalar(0, 250, 0), 2);
            cv::line(img2_CV, pt1[i], pt2[i], cv::Scalar(0, 250, 0));
        }
    }

    cv::imshow("tracked single level", img2_single);
    cv::imshow("tracked multi level", img2_multi);
    cv::imshow("tracked by opencv", img2_CV);
    cv::waitKey(0);

    return 0;
}

void OpticalFlowSingleLevel(
        const Mat &img1,
        const Mat &img2,
        const vector<KeyPoint> &kp1,
        vector<KeyPoint> &kp2,
        vector<bool> &success,
        bool inverse
) {

    // parameters
    int half_patch_size = 4;
    int iterations = 10;
    bool have_initial = !kp2.empty();

    for (size_t i = 0; i < kp1.size(); i++) {
        auto kp = kp1[i];
        double dx = 0, dy = 0; // dx,dy need to be estimated
        if (have_initial) {
            dx = kp2[i].pt.x - kp.pt.x;
            dy = kp2[i].pt.y - kp.pt.y;
        }

        double cost = 0, lastCost = 0;
        bool succ = true; // indicate if this point succeeded

        // Gauss-Newton iterations
        for (int iter = 0; iter < iterations; iter++) {
            Eigen::Matrix2d H = Eigen::Matrix2d::Zero();
            Eigen::Vector2d b = Eigen::Vector2d::Zero();
            cost = 0;
            if (kp.pt.x + dx <= half_patch_size || kp.pt.x + dx >= img1.cols - half_patch_size ||kp.pt.y + dy <= half_patch_size || kp.pt.y + dy >= img1.rows - half_patch_size) {   // go outside
                succ = false;
                break;
            }

            // compute cost and jacobian
            for (int x = -half_patch_size; x < half_patch_size; x++)
                for (int y = -half_patch_size; y < half_patch_size; y++) {

                    // TODO START YOUR CODE HERE (~8 lines)
                    float p_x = kp.pt.x;
                    float p_y = kp.pt.y;
                    int k = x + y + 2 * half_patch_size;
                    double error = 0;
                    Eigen::Vector2d J;  // Jacobian
                    if (inverse == false) {
                        // Forward Jacobian
                        J[0] = GetPixelValue(img2, p_x + x + dx, p_y + y + dy) - GetPixelValue(img2, p_x + x + dx - 1, p_y + y + dy);
                        J[1] = GetPixelValue(img2, p_x + x + dx, p_y + y + dy) - GetPixelValue(img2, p_x + x + dx, p_y + y + dy - 1);
                    } else {
                        // Inverse Jacobian
                        // NOTE this J does not change when dx, dy is updated, so we can store it and only compute error
                        J[0] = GetPixelValue(img1, p_x + x, p_y + y) - GetPixelValue(img1, p_x + x - 1, p_y + y);
                        J[1] = GetPixelValue(img1, p_x + x, p_y + y) - GetPixelValue(img1, p_x + x, p_y + y - 1);
                    }

                    // compute H, b and set cost;
                    error = - GetPixelValue(img1, p_x + x, p_y + y) + GetPixelValue(img2, p_x + x + dx, p_y + y + dy);
                    H += J * J.transpose();
                    b += -error * J;
                    cost += error * error / 2.0;
                }

            // compute update
            Eigen::Vector2d update;
            update = H.ldlt().solve(b);

            // TODO END YOUR CODE HERE

            if (isnan(update[0])) {
                // sometimes occurred when we have a black or white patch and H is irreversible
                cout << "update is nan" << endl;
                succ = false;
                break;
            }
            if (iter > 0 && cost > lastCost) {
                cout << "cost increased: " << cost << ", " << lastCost << endl;
                break;
            }

            // update dx, dy
            dx += update[0];
            dy += update[1];
            lastCost = cost;
            succ = true;
        }

        success.push_back(succ);

        // set kp2
        if (have_initial) {
            kp2[i].pt = kp.pt + Point2f(dx, dy);
        } else {
            KeyPoint tracked = kp;
            tracked.pt += cv::Point2f(dx, dy);
            kp2.push_back(tracked);
        }
    }
}

void OpticalFlowMultiLevel(
        const Mat &img1,
        const Mat &img2,
        const vector<KeyPoint> &kp1,
        vector<KeyPoint> &kp2,
        vector<bool> &success,
        bool inverse) {

    // parameters
    int pyramids = 4;
    double pyramid_scale = 0.5;
    double scales[] = {1.0, 0.5, 0.25, 0.125};

    // create pyramids
    vector<Mat> pyr1, pyr2; // image pyramids
    // TODO START YOUR CODE HERE (~8 lines)
    cout << "image1.size = "<<img1.size << "image2.size = " << img2.size << endl;
    pyr1.resize(pyramids);
    pyr2.resize(pyramids);

    vector<vector<KeyPoint>> vec_kp1(pyramids,vector<KeyPoint>(kp1.size()));
    vector<vector<KeyPoint>> vec_kp2(pyramids,vector<KeyPoint>(kp1.size()));

    pyr1[0] = img1;
    pyr2[0] = img2;

    for(int j = 0; j < kp1.size(); j++)
    {
        vec_kp1[0][j].pt = kp1[j].pt*scales[0];
    }

    for (int i = 0; i < pyramids-1; i++) 
    {
        pyrDown(pyr1[i], pyr1[i+1], Size(pyr1[i].cols * pyramid_scale, pyr1[i].rows * pyramid_scale));
        pyrDown(pyr2[i], pyr2[i+1], Size(pyr2[i].cols * pyramid_scale, pyr2[i].rows * pyramid_scale));

        for (int j = 0; j < kp1.size(); j++)
        {
            vec_kp1[i+1][j].pt = kp1[j].pt*scales[i+1];
        }
    }
    // coarse-to-fine LK tracking in pyramids
    vector<vector<bool>> vec_success_single(4);
    vector<KeyPoint> kp2_now;
    vector<KeyPoint> kp2_last(kp1.size());

    for (int k = pyramids-1; k >= 0; k--)
    {
        OpticalFlowSingleLevel(pyr1[k], pyr2[k], vec_kp1[k], kp2_now, vec_success_single[k], inverse);
        for (int j = 0; j < kp1.size(); j++)
        {
            kp2_last[j].pt = kp2_now[j].pt;
            kp2_now[j].pt = kp2_last[j].pt / pyramid_scale;
        }
    }
    kp2.resize(kp1.size());
    success.resize(kp1.size());

    for(int i = 0; i < kp1.size(); i++) {
        kp2[i].pt = kp2_last[i].pt;
        success[i] = vec_success_single[0][i];
    }
}