Hypothesis Test (2 Sample T-test)¶
Performing a hypothesis test on 20 before and after samples. This method can be utilized to measure process improvement impacts on a process.
Null Hypothesis (H0): Scores between the samples is the same. First 20 samples = last 20 samples
Alternative Hypothesis (H1): Scores for the samples is different.
Import data¶
# Libraries
import pandas as pd
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
import seaborn as sns
# Data
df = pd.read_excel('my_data.xlsx')
df.head()
scores | |
---|---|
0 | 30 |
1 | 29 |
2 | 35 |
3 | 29 |
4 | 29 |
# Isolate the last twenty and the to last twenty
last_twenty = df['scores'].iloc[-20:]
first_twenty = df['scores'].iloc[-40:-20]
df_new = pd.DataFrame(columns=['first_twenty', 'last_twenty'], index=range(20))
df_new['first_twenty'] = first_twenty.values
df_new['last_twenty'] = last_twenty.values
df_new.head(20)
first_twenty | last_twenty | |
---|---|---|
0 | 30 | 49 |
1 | 29 | 44 |
2 | 35 | 35 |
3 | 29 | 34 |
4 | 29 | 41 |
5 | 38 | 47 |
6 | 38 | 38 |
7 | 34 | 45 |
8 | 25 | 36 |
9 | 39 | 42 |
10 | 40 | 42 |
11 | 36 | 41 |
12 | 33 | 39 |
13 | 30 | 41 |
14 | 26 | 59 |
15 | 35 | 40 |
16 | 41 | 49 |
17 | 46 | 47 |
18 | 32 | 57 |
19 | 50 | 37 |
first_twenty_mean = np.mean(first_twenty)
last_twenty_mean = np.mean(last_twenty)
print(f'The average score of the first twenty samples is: {first_twenty_mean}')
print(f'The average score of the last twenty samples is: {last_twenty_mean}')
The average score of the first twenty samples is: 34.75 The average score of the last twenty samples is: 43.15
df_new.describe().T
count | mean | std | min | 25% | 50% | 75% | max | |
---|---|---|---|---|---|---|---|---|
first_twenty | 20.0 | 34.75 | 6.463460 | 25.0 | 29.75 | 34.5 | 38.25 | 50.0 |
last_twenty | 20.0 | 43.15 | 6.706438 | 34.0 | 38.75 | 41.5 | 47.00 | 59.0 |
Charting the data¶
x = range(20)
print(x)
range(0, 20)
# Create the plots
plt.plot(x, first_twenty, 'o', label='next_twenty')
plt.plot(x, last_twenty, 'o', label='last_twenty')
# Labels and Titles
plt.xlabel('observation')
plt.ylabel('score')
plt.title('Scores')
# Legend
plt.legend()
# Show plot
plt.grid(True)
plt.show()
# Plotly interactive graph
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_new.index, y=df_new['first_twenty'], mode='markers', name="first_twenty"))
fig.add_trace(go.Scatter(x=df_new.index, y=df_new['last_twenty'], mode='markers', name="last_twenty"))
fig.update_layout(title='Point plot')
fig.show()
# Export method for embedding html into personal website
with open('hypot_point.html', 'w') as f:
f.write(fig.to_html(include_plotlyjs='cdn'))
# Line plot
plt.figure(figsize=(12,6))
plt.plot(df_new['first_twenty'], label='first_twenty', marker='o')
plt.plot(df_new['last_twenty'], label='last_twenty', marker='x')
plt.title('Trend Comparison Over Time')
plt.xlabel('observation')
plt.ylabel('score')
plt.legend()
plt.grid(True)
plt.show()
# Plotly interactive graph
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_new.index, y=df_new['first_twenty'], mode='lines', name="first_twenty"))
fig.add_trace(go.Scatter(x=df_new.index, y=df_new['last_twenty'], mode='lines', name="last_twenty"))
fig.update_layout(title='Line chart')
fig.show()
# Export method for embedding html into personal website
with open('hypot_line.html', 'w') as f:
f.write(fig.to_html(include_plotlyjs='cdn'))
df_new.boxplot()
<Axes: >
# Plotly interactive graph
fig = go.Figure()
fig.add_trace(go.Box(y=df_new['first_twenty'], name="first_twenty", boxpoints='all'))
fig.add_trace(go.Box(y=df_new['last_twenty'], name="last_twenty", boxpoints='all'))
fig.update_layout(title='Boxplot')
fig.show()
# Export method for embedding html into personal website
with open('hypot_box.html', 'w') as f:
f.write(fig.to_html(include_plotlyjs='cdn'))
# Histogram for frequency distribution
plt.figure(figsize=(12, 6))
df_new.plot(kind='hist', alpha=0.7, bins=15)
plt.title('Frequency Distribution of Trends')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()
<Figure size 1200x600 with 0 Axes>
# Plotly histogram
fig = go.Figure()
fig.add_trace(go.Histogram(x=first_twenty, name="first_twenty"))
fig.add_trace(go.Histogram(x=last_twenty, name="last_twenty"))
# Overlay both histograms
fig.update_layout(barmode='overlay')
# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.update_layout(title='Histogram of Samples')
fig.show()
# Export method for embedding html into personal website
with open('hypot_histogram.html', 'w') as f:
f.write(fig.to_html(include_plotlyjs='cdn'))
differences = df_new['last_twenty'] - df_new['first_twenty']
mean_difference = differences.mean()
print(f'The mean difference is: {mean_difference}')
The mean difference is: 8.4
## Control Chart
# Calculate mean and standard deviation for both columns
before_mean = df_new['first_twenty'].mean()
before_std = df_new['first_twenty'].std()
after_mean = df_new['last_twenty'].mean()
after_std = df_new['last_twenty'].std()
# Create the control chart
plt.figure(figsize=(10, 6))
# Plot the before data
plt.plot(df_new.index, df_new['first_twenty'], 'o-', label='first_twenty')
plt.axhline(y=before_mean, color='b', linestyle='--', label='Before Mean')
plt.axhline(y=before_mean + before_std, color='b', linestyle=':', label='Before + 1 Std Dev')
plt.axhline(y=before_mean - before_std, color='b', linestyle=':', label='Before - 1 Std Dev')
# Plot the after data with a shift of 20 points in the x-axis
plt.plot(df_new.index + 20, df_new['last_twenty'], 's-', label='last_twenty')
plt.axhline(y=after_mean, color='orange', linestyle='--', label='After Mean')
plt.axhline(y=after_mean + after_std, color='orange', linestyle=':', label='After + 1 Std Dev')
plt.axhline(y=after_mean - after_std, color='orange', linestyle=':', label='After - 1 Std Dev')
# Set labels and title
plt.xlabel('Sample')
plt.ylabel('Sample Value')
plt.title('Control Chart (Before vs After)')
plt.legend()
# Rotate x-axis labels for better readability
plt.xticks(rotation=45)
# Adjust x-axis limits to accommodate both sets of data
plt.xlim(-2, df_new.index.max() + 22) # Add a buffer on both sides
# Show the plot
plt.grid(True)
plt.tight_layout()
plt.show()
Test Assumptions
Normality Test (Shapiro-Wilks Test)¶
Null Hypothesis (H0): Data follows the normal distribution
Alternative Hypothesis (H1): Data does not follow the normal distribution
alpha = 1 - 0.95
# Create a function to read the p-value
def p_value_reader(p_value, alpha):
"""
Interpret the p-value in a statistical context
"""
# Raise errors
if not (0 <= p_value <= 1):
raise ValueError("p_value must be between 0 and 1")
if not (0 <= alpha <= 1):
raise ValueError("threshold must be between 0 and 1")
# Evaluate the p_value
if p_value < alpha:
print(f'p-value ({p_value}) is less than the threshold ({round(alpha, 2)}).')
print("Evidence suggests rejecting the null hypothesis")
else:
print(f'p-value ({p_value}) is greater than or equal to the threshold ({round(alpha, 2)}).')
print("Not enough evidence to reject the null hypothesis")
# Shapiro Wilks Test for normality
stat, p_value = st.shapiro(df_new['first_twenty'])
print(f'The p-value is {p_value}')
p_value_reader(p_value, alpha)
The p-value is 0.5092968959512759 p-value (0.5092968959512759) is greater than or equal to the threshold (0.05). Not enough evidence to reject the null hypothesis
# Shapiro Wilks Test for normality
stat, p_value = st.shapiro(df_new['last_twenty'])
print(f'The p-value is {p_value}')
p_value_reader(p_value, alpha)
The p-value is 0.11828396201875863 p-value (0.11828396201875863) is greater than or equal to the threshold (0.05). Not enough evidence to reject the null hypothesis
✅ Passes normality assumption check
Equal Variance Test (Levene's Test)¶
Null Hypothesis (H0): There is no difference in variance
Alternative Hypothesis (H1): There is a difference in the variance
# Levene's Test for Equal Variance
levene_stat, pvalue = st.levene(first_twenty, last_twenty)
p_value_reader(p_value, alpha=0.05)
p-value (0.11828396201875863) is greater than or equal to the threshold (0.05). Not enough evidence to reject the null hypothesis
✅ sample have the same variance
💡 If there is not equal variance, you can perform Welch's T-test below by changing 'equal_var'=False (example below)
T-test¶
2 tailed paired T-test¶
💡 This test is more appropriate two related samples. This dataset has multiple different observations of students and probably not a good match for this test.
Null Hypothesis (H0): Scores between the samples is the same. First 20 samples = last 20 samples
Alternative Hypothesis (H1): Scores for the samples is different.
# Perform a paired t-test
t_score, p_value = st.ttest_rel(a=df_new['first_twenty'],
b=df_new['last_twenty'],
alternative='two-sided') # change this if the hypothesis is greater or less than.
print(f'T-score: {t_score}')
p_value_reader(p_value, alpha=0.05)
T-score: -3.8227572324275547 p-value (0.0011485324742790328) is less than the threshold (0.05). Evidence suggests rejecting the null hypothesis
2 tailed T-test (equal variance)¶
💡 This test is more appropriate for this dataset of independent samples. This test assumes equal variance.
# Perform t-test
t_score, p_value = st.ttest_ind(a=df_new['first_twenty'],
b=df_new['last_twenty'],
alternative='two-sided') # change this if the hypothesis is greater or less than.
print(f'T-score: {t_score}')
p_value_reader(p_value, alpha=0.05)
T-score: -4.033229685686957 p-value (0.0002558439327543572) is less than the threshold (0.05). Evidence suggests rejecting the null hypothesis
Conclusion:
- Use a 2 tailed T-test for equal variance as the test
- ❌ Reject the Null Hypothesis, there is a difference in the means.
2 tailed T-test (unequal variance)¶
- This test below is just an example if the sample did not pass the variance test. This data did pass the variance assumption, so we would not use it here.
💡 This test is more appropriate for this dataset of independent samples. This test assumes unequal variance.
# Perform t-test
t_score, p_value = st.ttest_ind(a=df_new['first_twenty'],
b=df_new['last_twenty'],
alternative='two-sided',
equal_var=False) # for unequal variance <------------------------------
print(f'T-score: {t_score}')
p_value_reader(p_value, alpha=0.05)
T-score: -4.033229685686958 p-value (0.0002562689392081225) is less than the threshold (0.05). Evidence suggests rejecting the null hypothesis