Back

Key error can anyone help to solve it

Created 2 years ago
87 Views
2 Comments
HemavathijGIz7c
@HemavathijGIz7c
HemavathijGIz7c
@HemavathijGIz7cProfile is locked. Login

import pandas as pd

import numpy as np

from datetime import date

import datetime as dt

takehome_users = pd.read_csv('takehome_users.csv',encoding='ISO-8859-1')

takehome_users.creation_time=pd.to_datetime(takehome_users['creation_time'])

takehome_users.last_session_creation_time=pd.to_datetime(takehome_users['last_session_creation_time'])

takehome_users['users_id']=takehome_users['object_id']

takehome_users.drop('object_id',axis=1,inplace=True)

takehome_users.drop(['name','email'],axis=1,inplace=True)

#takehome_users.head()

#takehome_users.info()

takehome_user_engagement = pd.read_csv('takehome_user_engagement.csv',encoding='ISO-8859-1')

#takehome_user_engagement.info()

def get_date_int(df, column):

    year = df[column].dt.year

    month = df[column].dt.month

    day = df[column].dt.day

    return year, month, day

def get_week(x): return x.isocalendar()

def get_iso_date_int(df,column):

    temp_df=pd.DataFrame(df[column].tolist(), index=df.index)

    year,week,day=temp_df[0],temp_df[1],temp_df[2]

    return year,week,day

takehome_user_engagement['time_stamp']=pd.to_datetime(takehome_user_engagement['time_stamp'])

takehome_user_engagement['week_time_stamp']=takehome_user_engagement['time_stamp'].apply(get_week)

'''print('first user:',min(takehome_user_engagement.time_stamp))

print('last user: ',max(takehome_user_engagement.time_stamp))'''

year,month,day=get_date_int(takehome_user_engagement,'time_stamp')

takehome_user_engagement['year'],takehome_user_engagement['month'],takehome_user_engagement['day']=year,month,day

takehome_user_engagement['week']=takehome_user_engagement['time_stamp'].dt.weekofyear

iso_year,iso_week,iso_day=get_iso_date_int(takehome_user_engagement,'week_time_stamp')

takehome_user_engagement['year_week']=list(zip(iso_year,iso_week))

takehome_user_engagement=takehome_user_engagement.sort_values(['time_stamp','user_id'],ascending=True)

takehome_user_engagement=takehome_user_engagement[['user_id','visited','day','year_week']]

adopted_user_dict={}

weeks=takehome_user_engagement.year_week

user_ids=list(set(takehome_user_engagement['user_id']))

for i in range(len(user_ids)):

    user_id=user_ids[i]

    

    reduced_df=takehome_user_engagement[(takehome_user_engagement['user_id']==user_id)&(weeks.isin(weeks[weeks.duplicated()]))]

   

    week_counts=reduced_df.year_week.value_counts()[reduced_df.year_week.value_counts()>2]

    three_logins=reduced_df[reduced_df.year_week.isin(list(week_counts.index))]

    

    three_logins=three_logins[~three_logins.duplicated()]

    

    adopted_user_dict[str(user_id)]=len(three_logins)

    

takehome_user_engagement['engagement_index']=takehome_user_engagement['user_id'].apply(lambda x: adopted_user_dict[str(x)])

takehome_user_engagement['adopted_user']=0

takehome_user_engagement['adopted_user'][takehome_user_engagement['engagement_index']>0]=1

adopted_count=takehome_user_engagement[['user_id','adopted_user']][takehome_user_engagement['adopted_user']==1].groupby('user_id').count()

print('No of adopted users:',len(adopted_count))

adopted=takehome_user_engagement[['user_id','adopted_user']]

adopted_users = pd.merge(takehome_users, adopted, on='user_id', how='outer')

Comments (2)
Please login to comment.