import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("ticks")
plt.style.use('fivethirtyeight')
#sns.set_style("white")
#plt.style.use('ggplot')
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.rcParams["figure.figsize"] = (12,8)
plt.rcParams['axes.labelsize'] = 16
plt.rcParams['axes.titlesize'] = 20
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 50)
df_test = pd.read_csv("data/Pricing_Test/test_results.csv")
print(df_test.shape)
df_test.head()
df_user = pd.read_csv("data/Pricing_Test/user_table.csv")
print(df_user.shape)
df_user.head()
# missing value
df_test.isnull().sum()
df_user.isnull().sum()
# merge two dataframes on user_id
df = pd.merge(df_test, df_user, on='user_id', how= 'left')
df.head()
# check missing value after merge
df.isnull().sum()
# those missing values are exactly the number of rows
# check if people in test always get the 59$ and people in control 39$
df.groupby(['test','price'])[['price']].count()
Issue:
Fix:
df=df[((df['test']==0) & (df['price']==39)) | ((df['test']==1) & (df['price']==59))]
df.head()
# check count again
df.groupby(['test','price'])[['price']].count()
# create a new feature: revenue
df['revenue'] = df['converted'] * df['price']
df.head()
df['revenue'].describe()
# compare conversion rate and revenue
df.groupby('test')['converted'].mean()
df.groupby('test')['revenue'].mean()
Although conversion rate seems to go down by around 25%, revenue per use is going up.
# Perform a t-test
from scipy import stats
t_test = stats.ttest_ind(df[df['test'] == 1]['revenue'],
df[df['test'] == 0]['revenue'],
equal_var=False)
print('t statistic is: ',t_test.statistic)
print('p-value is: ',t_test.pvalue)
Revenue per user is up and highly statistically significant.
Let’s now check how different segments are reacting to the test. This can give us extremely useful information about user price sensitivity by segment, which is really just a fancy way to say: how much different users are valuing our product?
# check revenue by source
sns.barplot(x='source', y='revenue', hue='test', data=df);
plt.xticks(rotation=45);
plt.title('Revenue by Source');
Insights:
# check revenue by device
sns.barplot(x='device', y='revenue', hue='test', data=df);
plt.xticks(rotation=45) ;
plt.title('Revenue by Device');
Insight:
# check revenue by operating system
sns.barplot(x='operative_system', y='revenue', hue='test', data=df);
plt.xticks(rotation=45);
Insights:
# check the ratio of Linux users
df[df['operative_system']=='linux'].shape[0] / df.shape[0]
There are only 1% users choosing Linux platform. Let's remove them from our dataset.
df = df.query('operative_system!=\'linux\'')
df.groupby('test')[['revenue']].mean()
# perform t-test
t_test = stats.ttest_ind(df[df['test'] == 1]['revenue'],
df[df['test'] == 0]['revenue'],
equal_var=False)
print('t statistic is: ',t_test.statistic)
print('p-value is: ',t_test.pvalue)
The result improves slightly.
# check top 10 cities with most users
top_cities=df['city'].value_counts().reset_index().sort_values('city', ascending=False).head(10)
top_cities
#plot avg revenue for top 10 cities
sns.barplot(x='city', y='revenue', hue='test', data=df[df['city'].isin(top_cities['index'])]);
plt.xticks(rotation=45);
Conclusions:
Other insights:
Let’s say significance level and power are the standard 0.05 and 0.8. Let’s also assume we are interested in detecting a difference in conversion rate of about ~33%, i.e. ~1-39/59. That is, if the new conversion rate is about 66% of the old one we are good.
import statsmodels.stats.api as sms
conversion_rate_control = df_test.query('test==0').converted.mean()
p1_and_p2 = sms.proportion_effectsize(conversion_rate_control, conversion_rate_control*39/59)
sample_size = round(sms.NormalIndPower().solve_power(p1_and_p2, power=0.8, alpha=0.05))
print("The required sample size per group is ~", sample_size)
# check how many users we have per week to figure out how long it will take to get enough users
#create a column that's week of the year
df_test['week_year'] = pd.to_datetime(df_test.timestamp.str.split(' ').str[0]).dt.strftime('%W')
#take avg count of users per week. We remove first and last week, just in case they don't include all 7 days
users_week = df_test['week_year'].value_counts().sort_index()
users_week = users_week.drop(users_week.index[[0,(users_week.shape[0]-1)]]).mean()
print("Avg number of users per week is:", round(users_week))
If we ran the test on 50% of the users, we would be done in less than a week.
However, it is a good practice to run the test for at least two weeks regardless of that, so that we can get weekly patterns.
\In terms of percentage split, we can look at what’s the minimum percentage of users that would need to be in test to get the required sample size in two weeks.
print("The percentage of traffic that needs to be in test is", round(sample_size/(users_week*2)*100),'%')