es-retention-graphs/graph.py

73 lines
2.6 KiB
Python

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcdefaults()
import matplotlib.colors as mcolors
from operator import attrgetter
class PDGraphPeers():
def __init__(self, data):
self.df = pd.DataFrame(data)
def unique_peers_counts(self):
return self.df.groupby(['Peer'])['Date'].nunique()
def days_per_peers(self, exclude=20):
nu_peers = self.unique_peers_counts()
ex_twenty_day = nu_peers[nu_peers > exclude]
ax = sns.distplot(ex_twenty_day, kde=False, hist=True)
ax.set(
title='Distribution of number of days per peers excluding 20 days',
xlabel='# of days',
ylabel='# of peers')
return ax
def weekly_cohorts(self, figsize=(18, 14)):
self.df['datetime'] = pd.to_datetime(self.df['Date'])
self.df['week'] = self.df['datetime'].dt.to_period('W')
self.df['month'] = self.df['datetime'].dt.to_period('M')
self.df['cohort'] = self.df.groupby('Peer')['datetime'].transform(
'min').dt.to_period('W')
df_cohort = self.df.groupby(
['cohort',
'week']).agg(n_peers=('Peer', 'nunique')).reset_index(drop=False)
df_cohort['period_number'] = (df_cohort.week - df_cohort.cohort).apply(
attrgetter('n'))
cohort_pivot = df_cohort.pivot_table(index='cohort',
columns='period_number',
values='n_peers')
cohort_size = cohort_pivot.iloc[:, 0]
retention_matrix = cohort_pivot.divide(cohort_size, axis=0)
fig, ax = plt.subplots(1,
2,
figsize=figsize,
sharey=True,
gridspec_kw={'width_ratios': [1, 11]})
# retention matrix
sns.heatmap(retention_matrix,
mask=retention_matrix.isnull(),
annot=True,
fmt='.0%',
cmap='RdYlGn',
ax=ax[1])
ax[1].set_title('Weekly Cohorts: Peer Retention', fontsize=16)
ax[1].set(xlabel='# of periods', ylabel='')
# cohort size
cohort_size_df = pd.DataFrame(cohort_size).rename(
columns={0: 'cohort_size'})
white_cmap = mcolors.ListedColormap(['white'])
fig.tight_layout()
return sns.heatmap(cohort_size_df,
annot=True,
cbar=False,
fmt='g',
cmap=white_cmap,
ax=ax[0])