-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcore_content_metric.py
67 lines (50 loc) · 2.38 KB
/
core_content_metric.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import pandas as pd
from utils import get_valid_views, timed
@timed
def calculate_core_content_metric_trend(core_content_views, start_date=None, end_date=None, posts_read_exponent=1, karma_exponent=1):
if not start_date:
start_date = core_content_views.index.min().date()
if not end_date:
end_date = core_content_views.index.max().date()
dates = pd.date_range(start_date, end_date)
core_content_metric = {}
users_views_accumulator = {}
users_views_score = {}
for i, _ in enumerate(dates[:-1]):
start_date = dates[i]
end_date = dates[i + 1]
for view in core_content_views.sort_index()[start_date:end_date].itertuples(index=False, name='View'):
userId = view.userId
current_weighted_reads = users_views_accumulator.get(userId, 0) + view.weighting
users_views_accumulator[userId] = current_weighted_reads
users_views_score[userId] = current_weighted_reads ** posts_read_exponent
core_content_metric[start_date] = pd.Series(users_views_score).sum()
return pd.Series(core_content_metric)
@timed
def compute_core_content_metric(collections, start_date=None, end_date=None, posts_read_exponent=1.5, karma_exponent=1,
included_collections=('rationality', 'codex', 'hpmor')):
weightings = (pd.DataFrame
.from_dict({
'rationality': 1,
'codex': 1,
'hpmor': 1,
}, orient='index')
.rename(columns={0: 'weighting'})
)
## Get core content posts with weightings
posts = collections['posts']
posts_with_weightings = (
posts[['_id', 'title', 'canonicalCollectionSlug']]
.loc[posts['canonicalCollectionSlug'].isin(included_collections)]
.merge(weightings, left_on='canonicalCollectionSlug', right_index=True)
)
core_content_views = (get_valid_views(collections)
.assign(date=lambda x: x['createdAt'].dt.date)
.sort_values('createdAt')
.drop_duplicates()
.drop_duplicates(subset=['date', 'documentId', 'userId'])
.merge(posts_with_weightings, left_on='documentId', right_on='_id')
.set_index('createdAt')
)
cumulative_metric = calculate_core_content_metric_trend(core_content_views, start_date, end_date, posts_read_exponent, karma_exponent)
return cumulative_metric.diff()