Jupyter 版本的用 Python 实现68-95-99.7原则

也可以点击 这里 获取相关文件呀

The Empirical Rule and Distribution

68-95-99.7 原则

In [2]:
import warnings
warnings.filterwarnings("ignore")
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

random.seed(1738)
In [3]:
mu = 7
sigma = 1.7
observations = [random.normalvariate(mu, sigma) for _ in range(100000)]
In [4]:
sns.distplot(observations)

plt.axvline(np.mean(observations) + np.std(observations), color = "g")
plt.axvline(np.mean(observations) - np.std(observations), color = "g")

plt.axvline(np.mean(observations) + (np.std(observations) * 2), color = "y")
plt.axvline(np.mean(observations) - (np.std(observations) * 2), color = "y")
Out[4]:
<matplotlib.lines.Line2D at 0x183b96ce1c0>
In [5]:
pd.Series(observations).describe()
Out[5]:
count    100000.000000
mean          7.000626
std           1.693249
min          -0.754203
25%           5.865611
50%           7.003080
75%           8.144851
max          14.595650
dtype: float64
In [7]:
sampleA = random.sample(observations, 100)
sampleB = random.sample(observations, 100)
sampleC = random.sample(observations, 100)
In [10]:
fig, ax = plt.subplots()

sns.distplot(sampleA, ax = ax)
sns.distplot(sampleB, ax = ax)
sns.distplot(sampleC, ax = ax)
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x183bc44c760>
In [11]:
mu = 7
sigma = 1.7
observations = [random.normalvariate(mu, sigma) for _ in range(100000)]

sns.distplot(observations)
plt.axvline(np.mean(observations) + np.std(observations), 0, 0.59, color = "g")
plt.axvline(np.mean(observations) - np.std(observations), 0, 0.59, color = "g")

plt.axvline(np.mean(observations) + (np.std(observations) * 2), 0, 0.15, color = "y")
plt.axvline(np.mean(observations) - (np.std(observations) * 2), 0, 0.15, color = "y")
Out[11]:
<matplotlib.lines.Line2D at 0x183bc273f70>
In [14]:
from statsmodels.distributions.empirical_distribution import ECDF
import matplotlib.pyplot as plt

ecdf = ECDF(Observations)

plt.plot(ecdf.x, ecdf.y)

plt.axhline(y = 0.025, color = 'y', linestyle='-')
plt.axvline(x = np.mean(Observations) - (2 * np.std(Observations)), color = 'y', linestyle='-')

plt.axhline(y = 0.975, color = 'y', linestyle='-')
plt.axvline(x = np.mean(Observations) + (2 * np.std(Observations)), color = 'y', linestyle='-')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-14-426c2052e812> in <module>
----> 1 from statsmodels.distributions.empirical_distribution import ECDF
      2 import matplotlib.pyplot as plt
      3 
      4 ecdf = ECDF(Observations)
      5 

ModuleNotFoundError: No module named 'statsmodels'
In [ ]: