共轭梯度实现收敛到错误的值

huangapple go评论64阅读模式
英文:

Conjugate gradient implementation converging on wrong values

问题

我理解你需要对代码进行翻译,以下是代码的中文翻译:

template <typename TData>
void DoConjGrad(
    std::vector<TData> &in,
    std::vector<TData> &out,
    std::vector<TData> &A
)
{
    TData tol = 10e-8;

    size_t N = in.size();

    std::vector<TData> r(N);
    std::vector<TData> p(N);
    std::vector<TData> w(N);
    size_t k = 0;

    TData rTr;
    TData rTr_prev;
    TData pTw;

    TData alpha;
    TData beta;

    // 对 x0 进行初始猜测
    std::fill(out.begin(), out.end(), 0);

    // r0 = b - Ax0
    ApplyLHS(out, r, A);
    std::transform(in.cbegin(), in.cend(), r.cbegin(), r.begin(),
        [](const TData &bElem, const TData &rElem) { return bElem - rElem; });

    // p0 := r0
    std::copy(r.cbegin(), r.cend(), p.begin());

    // 计算 (rTr)0
    rTr = innerProduct(r, r);

    for (int i = 0; i < 100; ++i)
    {
        ApplyLHS(p, w, A);
        pTw = innerProduct(p, w);

        alpha = rTr / pTw;

        std::transform(out.cbegin(), out.cend(), p.cbegin(), out.begin(),
            [alpha](const TData &xElem, const TData &pElem)
            {
                return xElem + alpha * pElem;
            });

        std::transform(r.cbegin(), r.cend(), w.cbegin(), r.begin(),
            [alpha](const TData &rElem, const TData &wElem)
            {
                return rElem - alpha * wElem;
            });

        if (rTr < tol)
            break;

        rTr_prev = rTr;
        rTr = innerProduct(r, r);

        beta = rTr / rTr_prev;

        std::transform(r.cbegin(), r.cend(), p.cbegin(), p.begin(),
            [beta](const TData &rElem, const TData &pElem)
            {
                return rElem + beta * pElem;
            });
    }
}

主函数:

int main()
{
    srand(100);
    size_t N = 2;

    std::vector<double> A(N * N);
    std::vector<double> M(N * N);
    std::vector<double> x_true(N);
    std::vector<double> x_calc(N);
    std::vector<double> b(N);

    for (int i = 0; i < N; ++i)
        for (int j = 0; j <= i; ++j)
        {
            double r = 2 * ((double)rand()) / ((double)RAND_MAX) - 1;
            M[i * N + j] = r;
            M[j * N + i] = r;
        }

    // 获取正定矩阵 = M.T @ M
    for (int i = 0; i < N; ++i)
        for (int j = 0; j < N; ++j)
            for (int k = 0; k < N; ++k)
                A[i * N + j] = M[k * N + i] * M[k * N + j];

    // 生成随机的 x
    for (int i = 0; i < N; ++i)
        x_true[i] = 2 * ((double)rand()) / ((double)RAND_MAX) - 1;

    // 计算 b
    ApplyLHS(x_true, b, A);

    // 解 x 的计算
    DoConjGrad<double>(b, x_calc, A);
}

辅助函数:

template <typename TData>
void printMat(
    size_t nr,
    size_t nc,
    std::vector<TData> &mat
)
{
    for (int i = 0; i < nr; ++i)
    {
        for (int j = 0; j < nc; ++j)
        {
            std::cout.precision(5);
            std::cout.width(10);
            std::cout << mat[i * nc + j] << "\t";
        }
        std::cout << "\n";
    }
}

template <typename TData>
void ApplyLHS(
    std::vector<TData> &in,
    std::vector<TData> &out,
    std::vector<TData> &A
)
{
    size_t N = in.size();
    std::fill(out.begin(), out.end(), 0);

    for (int i = 0; i < N; ++i)
        for (int j = 0; j < N; ++j)
            out[i] += in[j] * A[i * N + j];
}

template <typename TData>
TData innerProduct(
    std::vector<TData> &a,
    std::vector<TData> &b
)
{
    TData result = 0;
    for (int i = 0; i < a.size(); ++i)
    {
        result += a[i] * b[i];
    }
    return result;
}
英文:

I'm trying to write a simple conjugate gradient method function in C++ - pretty simple right? But it literally can't solve any systems of equations. It converges but onto the completely wrong values.

I've written the function out here and provided the main function where I generate positive definite matrix A using symmetric matrix M. I've provided all the code to see if anyone can spot anything wrong. Please don't roast me for doing this - it's a well understand algorithm and it's pointless not providing all the code right? Please assume that the ApplyLHS and innerProduct functions work properly.

template &lt;typename TData&gt;
void DoConjGrad(
std::vector&lt;TData&gt; &amp;in,
std::vector&lt;TData&gt; &amp;out,
std::vector&lt;TData&gt; &amp;A
)
{
TData tol = 10e-8;
size_t N = in.size();
std::vector&lt;TData&gt; r(N);
std::vector&lt;TData&gt; p(N);
std::vector&lt;TData&gt; w(N);
size_t k = 0;
TData rTr;
TData rTr_prev;
TData pTw;
TData alpha;
TData beta;
// initial guess for x0
std::fill(out.begin(), out.end(), 0);
// r0 = b - Ax0
ApplyLHS(out, r, A);
std::transform(in.cbegin(), in.cend(), r.cbegin(), r.begin(), 
[](const TData &amp;bElem, const TData &amp;rElem){ return bElem - rElem; } );
// p0 := r0
std::copy(r.cbegin(), r.cend(), p.begin());
// calculate (rTr)0
rTr = innerProduct(r, r);
for (int i = 0; i &lt; 100; ++i)
{
ApplyLHS(p, w, A);
pTw = innerProduct(p, w);
alpha = rTr / pTw;
std::transform(out.cbegin(), out.cend(), p.cbegin(), out.begin(),
[alpha](const TData &amp;xElem, const TData &amp;pElem)
{
return xElem + alpha*pElem;
});
std::transform(r.cbegin(), r.cend(), w.cbegin(), r.begin(),
[alpha](const TData &amp;rElem, const TData &amp;wElem)
{
return rElem - alpha*wElem;
});
if (rTr &lt; tol)
break;
rTr_prev = rTr;
rTr = innerProduct(r, r);
beta = rTr / rTr_prev;
std::transform(r.cbegin(), r.cend(), p.cbegin(), p.begin(),
[beta](const TData &amp;rElem, const TData &amp;pElem)
{
return rElem + beta*pElem;
});
}
}

Main:

int main()
{
srand(100);
size_t N = 2;
std::vector&lt;double&gt; A(N * N);
std::vector&lt;double&gt; M(N * N);
std::vector&lt;double&gt; x_true(N);
std::vector&lt;double&gt; x_calc(N);
std::vector&lt;double&gt; b(N);
for (int i = 0; i &lt; N; ++i)
for (int j = 0; j &lt;= i; ++j)
{
double r = 2*((double)rand())/((double)RAND_MAX) - 1;
M[i*N + j] = r;
M[j*N + i] = r;
}
// get positive semi-definite matrix = M.T @ M
for (int i = 0; i &lt; N; ++i)
for (int j = 0; j &lt; N; ++j)
for (int k = 0; k &lt; N; ++k)
A[i*N + j] = M[k*N + i] * M[k*N + j];
// generate random x
for (int i = 0; i &lt; N; ++i)
x_true[i] = 2*((double)rand())/((double)RAND_MAX) - 1;
// calculate b
ApplyLHS(x_true, b, A);
// solve for x calc
DoConjGrad&lt;double&gt;(b, x_calc, A);
}

Helper functions:

template &lt;typename TData&gt;
void printMat(
size_t nr,
size_t nc,
std::vector&lt;TData&gt; &amp;mat
)
{
for (int i = 0; i &lt; nr; ++i)
{
for (int j = 0; j &lt; nc; ++j)
{
std::cout.precision(5);
std::cout.width(10);
std::cout &lt;&lt; mat[i*nc + j] &lt;&lt; &quot;\t&quot;;
}
std::cout &lt;&lt; &quot;\n&quot;;
}
}
template &lt;typename TData&gt;
void ApplyLHS(
std::vector&lt;TData&gt; &amp;in,
std::vector&lt;TData&gt; &amp;out,
std::vector&lt;TData&gt; &amp;A
)
{
size_t N = in.size();
std::fill(out.begin(), out.end(), 0);
for (int i = 0; i &lt; N; ++i)
for (int j = 0; j &lt; N; ++j)
out[i] += in[j] * A[i*N + j];
}
template &lt;typename TData&gt;
TData innerProduct(
std::vector&lt;TData&gt; &amp;a,
std::vector&lt;TData&gt; &amp;b
)
{
TData result = 0;
for (int i = 0; i &lt; a.size(); ++i)
{
result += a[i] * b[i];
}
return result;
}

答案1

得分: 0

以下是翻译的内容:

所以,首先好消息是:你的共轭梯度算法似乎没有任何问题!(嗯,至少没有导致它失败的问题。个人建议将容差设得更小,并将循环退出条件移动到计算 rTr 的点之后。)

如果你从你的代码中计算矩阵 A 的行列式... 你会发现无论 N 的值如何,det(A) 都等于 0。(因此 A 是奇异的;存在一些非零的 x 使得 Ax=0,因此 xT.A.x=0:因此 A 也不是严格正定的,这是共轭梯度算法的一个条件)。

但一个重要的线索是矩阵 M 的行列式通常不是零... 但是 det(A) 应该等于 det(M) 的平方。

因此,请查看你计算 A = M^T*M 的矩阵乘法部分。它是不正确的;(每个元素应该先初始化,然后从一个总和中形成)。将此部分更改为以下内容,你就可以继续了。

// 获取半正定矩阵 = M的转置 * M
for (int i = 0; i < N; ++i)
{
for (int j = 0; j < N; ++j)
{
A[iN+j] = 0; // 小心!
for (int k = 0; k < N; ++k) // ""
A[i
N + j] += M[kN + i] * M[kN + j]; // ""
}
}

英文:

So, the good news first: there appears to be NOTHING WRONG with your conjugate-gradient algorithm! (Well, nothing causing it to fail anyway. Personally, I would make the tolerance smaller, and move the loop exit condition after the point where you calculate rTr.)

If you calculate the determinant of A from your code ... you will find that det(A)=0 every time, regardless of the value of N. (So A is singular; there is some non-zero x such that Ax=0 and hence xT.A.x=0: thus A isn't strictly positive definite either, which is a condition for the CG algorithm).

But a major clue is that the determinant of M is not (usually) zero ... and yet det(A) should be det(M) squared.

So, look at your matrix multiplication for A = M<sup>T</sup>M. It is not correct; (each element should be initialised and then formed from a sum). Change this part to the following and you are good to go.

// get positive semi-definite matrix = Mtranspose * M
for (int i = 0; i &lt; N; ++i)
{
for (int j = 0; j &lt; N; ++j)
{
A[i*N+j] = 0;                                     // CAREFUL!!
for (int k = 0; k &lt; N; ++k)                       // &quot;&quot;  
A[i*N + j] += M[k*N + i] * M[k*N + j];        // &quot;&quot;
}
}

huangapple
  • 本文由 发表于 2023年6月6日 05:12:02
  • 转载请务必保留本文链接:https://go.coder-hub.com/76410044.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定