问题

import numpy as np
from scipy.stats import expon, kstest
from matplotlib import pyplot as plt

def custom_exponential_cdf(x, lamb):
    x = x.copy()
    x[x < 0] = 0.0
    pdf = lamb * np.exp(-lamb * x)
    cdf = np.cumsum(pdf * np.diff(np.concatenate(([0], x))))
    return cdf

unique_values = np.array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.85, 0.87, 0.89])
counts = np.array([1597, 1525, 1438, 1471, 1311, 1303, 1202, 1147, 1002, 918, 893, 801, 713, 680, 599, 578, 478, 430, 409, 353, 350, 292, 245, 211, 224, 182, 171, 151, 125, 111, 94, 85, 72, 73, 57, 36, 53, 35, 35, 27, 19, 20, 15, 10, 20, 12, 10, 13, 11, 10, 17, 15, 8, 3, 3, 3, 5, 6, 6, 2, 3, 3, 4, 6, 1, 1, 3, 1, 2, 1, 3, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2])
x = np.repeat(unique_values, counts)
lamb = 9.23

fig, ax = plt.subplots()
ax.plot(x, expon.cdf(x, 0.0, 1.0 / lamb))
ax.plot(x, custom_exponential_cdf(x, lamb))

print(kstest(x, custom_exponential_cdf, (lamb,)))
print(kstest(x, "expon", (0.0, 1.0 / lamb)))

英文:

For context, I'm trying to understand how to use np.cumsum to replicate scipy.stats.expon.cdf because I need to write a function compatible with scipy.stats.kstest which is not one of the distributions already available in scipy.stats.

I am having issues with finding resources online which give good guides to numerically computing the CDF, because the majority of resources just point you to in-built methods. I've gotten as far as the custom_exponential_cdf function defined below, which works well in some cases but breaks down in others (for example, the one below).

Does anyone know what I am doing wrong, and how I can modify custom_exponential_cdf to match the output from scipy.stats.expon.cdf?

import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import kstest, expon

def custom_exponential_cdf(x, lamb):
    x = x.copy()
    x[x &lt; 0] = 0.0
    pdf = lamb * np.exp(-lamb * x)
    cdf = np.cumsum(pdf * np.diff(np.concatenate(([0], x))))
    return cdf

unique_values = np.array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.85, 0.87, 0.89])
counts = np.array([1597, 1525, 1438, 1471, 1311, 1303, 1202, 1147, 1002, 918, 893, 801, 713, 680, 599, 578, 478, 430, 409, 353, 350, 292, 245, 211, 224, 182, 171, 151, 125, 111, 94, 85, 72, 73, 57, 36, 53, 35, 35, 27, 19, 20, 15, 10, 20, 12, 10, 13, 11, 10, 17, 15, 8, 3, 3, 3, 5, 6, 6, 2, 3, 3, 4, 6, 1, 1, 3, 1, 2, 1, 3, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2])
x = np.repeat(unique_values, counts)
lamb = 9.23

fig, ax = plt.subplots()
ax.plot(x, expon.cdf(x, 0.0, 1.0 / lamb))
ax.plot(x, custom_exponential_cdf(x, lamb))

print(kstest(x, custom_exponential_cdf, (lamb,)))
print(kstest(x, &quot;expon&quot;, (0.0, 1.0 / lamb)))

The output from the print statements is

KstestResult(statistic=0.08740741955472273, pvalue=6.857709296861777e-145, statistic_location=0.02, statistic_sign=-1)
KstestResult(statistic=0.0988550670723162, pvalue=2.7098163860110364e-185, statistic_location=0.04, statistic_sign=-1)

The output from the plot is:

答案1

得分: 1

指数分布的概率密度函数在 x < 0 时为 0，自定义函数需要反映这一点。以下是修复的一种方法：

import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import kstest, expon

def custom_exponential_cdf(x, lamb):
    x = x.copy()
    x[x < 0] = 0
    pdf = lamb * np.exp(-lamb * x)
    cdf = np.cumsum(pdf * np.diff(np.concatenate(([0], x)))
    return cdf

x = np.sort(np.random.normal(loc=0.1, scale=0.09, size=21729))
lamb = 9.23

cdf = custom_exponential_cdf(x, lamb)
fig, ax = plt.subplots()
ax.plot(x, expon.cdf(x, 0.0, 1.0 / lamb), lw=7, alpha=0.7, label="scipy")
ax.plot(x, cdf, 'r', label="custom")
plt.legend()

它生成的结果如下所示：

英文:

The pdf of the exponential distribution is 0 for x < 0 and the custom function needs to reflect this. Here is one way to fix it:

import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import kstest, expon

def custom_exponential_cdf(x, lamb):
    x = x.copy()
    x[x &lt; 0] = 0
    pdf = lamb * np.exp(-lamb * x)
    cdf = np.cumsum(pdf * np.diff(np.concatenate(([0], x))))
    return cdf

x = np.sort(np.random.normal(loc=0.1, scale=0.09, size=21729))
lamb = 9.23

cdf = custom_exponential_cdf(x, lamb)
fig, ax = plt.subplots()
ax.plot(x, expon.cdf(x, 0.0, 1.0 / lamb), lw=7, alpha = 0.7, label=&quot;scipy&quot;)
ax.plot(x, cdf, &#39;r&#39;, label=&quot;custom&quot;)
plt.legend()

It gives:

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

如何使用np.cumsum来复制scipy.stats.expon.cdf的输出？

问题

答案1

在Django表单中保存复选框的值。

使用shell_exec()在PHP代码中运行Python脚本时出现问题。

如何从多对多字段获取查询集

将Python字典中的键转换为值，反之亦然。

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论