Untitled
Never
import featuretools as ft import pandas as pd from IPython.display import display pd.options.display.max_columns = 200 pd.options.display.max_rows = 200 # make a dict name_of_table : table_pandas import os list_names = ['BEHAVIOSEC_header', 'BEHAVIOSEC_ACC', 'BEHAVIOSEC_GYRO', 'BEHAVIOSEC_header', 'BEHAVIOSEC_MOTION', 'BEHAVIOSEC_USAGE', 'HCL_ambientBrightness', 'HCL_appData', 'HCL_gravity', 'HCL_magneticField'] dict_name_pd = {} for name in list_names: dict_name_pd[name] = pd.read_pickle(os.path.join(r"C:\Users\admin\PycharmProjects\ActiveAuth\008", str(name) + '.pkl')) for key, pd_data in dict_name_pd.items(): print(key) display(pd_data.head()) print(pd_data.dtypes) # drop columns and rename acc and gyro (preprocessing) dict_name_pd['BEHAVIOSEC_ACC'].rename(columns={'xValue':'xValue_acc', 'yValue':'yValue_acc', 'zValue':'zValue_acc'}, inplace=True) dict_name_pd['BEHAVIOSEC_ACC'].drop(columns=['eventType', 'tag'], inplace=True) dict_name_pd['BEHAVIOSEC_GYRO'].rename(columns={'xValue':'xValue_gyro', 'yValue':'yValue_gyro', 'zValue':'zValue_gyro'}, inplace=True) dict_name_pd['BEHAVIOSEC_GYRO'].drop(columns=['eventType', 'tag'], inplace=True) # dict_name_pd['BEHAVIOSEC_ACC'].columns # BEHAVIOSEC_ACC + BEHAVIOSEC_GYRO data1 = pd.concat([dict_name_pd['BEHAVIOSEC_ACC'], dict_name_pd['BEHAVIOSEC_GYRO']], sort=False) data1.head() dict_name_pd['BEHAVIOSEC_MOTION']['majorAxis'] = dict_name_pd['BEHAVIOSEC_MOTION']['majorAxis'].replace('-', pd.np.nan) dict_name_pd['BEHAVIOSEC_MOTION']['minorAxis'] = dict_name_pd['BEHAVIOSEC_MOTION']['minorAxis'].replace('-', pd.np.nan) dict_name_pd['BEHAVIOSEC_MOTION']['majorAxis'] = dict_name_pd['BEHAVIOSEC_MOTION']['majorAxis'].astype(float) dict_name_pd['BEHAVIOSEC_MOTION']['minorAxis'] = dict_name_pd['BEHAVIOSEC_MOTION']['minorAxis'].astype(float) # drop columns usage and gyro (preprocessing) -- not-renaming: if not need after, delete this rows in code # dict_name_pd['BEHAVIOSEC_USAGE'].rename(columns={'xValue':'xValue_acc', 'yValue':'yValue_acc', 'zValue':'zValue_acc'}, inplace=True) dict_name_pd['BEHAVIOSEC_USAGE'].drop(columns=['hash', 'tag'], inplace=True) # dict_name_pd['BEHAVIOSEC_GYRO'].rename(columns={'xValue':'xValue_gyro', 'yValue':'yValue_gyro', 'zValue':'zValue_gyro'}, inplace=True) dict_name_pd['BEHAVIOSEC_MOTION'].drop(columns=['tag', 'eventType'], inplace=True) if 'uiElementType' in dict_name_pd['BEHAVIOSEC_MOTION']: dict_name_pd['BEHAVIOSEC_MOTION'].drop(columns=['uiElementType'], inplace=True) # MOTION + BEHAVIOSEC_USAGE data2 = pd.concat([dict_name_pd['BEHAVIOSEC_MOTION'], dict_name_pd['BEHAVIOSEC_USAGE']], sort=False) difference = {} for session_id in data2.SESSION.unique(): df = dict_name_pd['BEHAVIOSEC_header'] time, eventTime = map(int, df[df['SESSION'] == session_id].time.iloc[0].split(' ')) difference[session_id] = (time - eventTime // 1000) * 1_000_000_000 data2['eventTimeHardware'] = data2.apply(lambda x: difference[x.SESSION] + x.eventTime, axis=1) dict_name_pd['HCL_magneticField'].rename(columns={'epoc_time_ns_hardware':'eventTimeHardware', 'x':'x_mf', 'y':'y_mf', 'z':'z_mf'}, inplace=True) dict_name_pd['HCL_ambientBrightness'].rename(columns={'epoc_time_ns_hardware':'eventTimeHardware', 'value':'value_aB'}, inplace=True) dict_name_pd['HCL_gravity'].rename(columns={'epoc_time_ns_hardware':'eventTimeHardware', 'x':'x_gr', 'y':'y_gr', 'z':'z_gr'}, inplace=True) data3 = pd.concat([dict_name_pd['HCL_magneticField'], dict_name_pd['HCL_ambientBrightness'], dict_name_pd['HCL_gravity']], sort=False) # data3['eventTimeHardware'] = data3['eventTimeHardware'] // 1000000 display(data1.head()) display(data2.head()) display(data3.head()) data = pd.concat([data1, data2, data3], sort=False) data.head() # создаем новый столбец, в котором значения это EventTimeHardware // 15 секунд # делаем groupBy по USER, SESSION и НОВОМУ СТОЛБЦУ data.sort_values(by=['eventTimeHardware'], inplace=True) data.fillna(method='ffill', inplace=True) # data.fillna(method='backfill', inplace=True) data['timeDividedBy15'] = data['eventTimeHardware'] // (15 * (10**9)) * (15 * (10**9)) # время записано в наносекундах data.drop(columns=['eventTimeHardware', 'eventTime'], inplace=True) data.head() features = ['xValue_acc', 'yValue_acc', 'zValue_acc', 'xValue_gyro', 'yValue_gyro', 'zValue_gyro', 'eventPressure', 'majorAxis', 'minorAxis', 'positionX', 'positionY', 'cpuUsage', 'memUsage', 'x_mf', 'y_mf', 'z_mf', 'value_aB', 'x_gr', 'y_gr', 'z_gr'] agg = { f: [ pd.Series.mean, pd.Series.sum, pd.Series.min, pd.Series.max, pd.Series.std, pd.Series.skew ] for f in features } # agg['TARGET'] = pd.Series.min data = data.groupby(['USER', 'SESSION', 'timeDividedBy15']).agg(agg) # valid = valid.groupby('CLIENT_REG_CODE').agg(agg) # valid.columns = ['_'.join(col) for col in valid.columns] data.columns = ['_'.join(col) for col in data.columns] data.dropna(how='any', inplace=True) data = data.reset_index(level=['USER', 'SESSION', 'timeDividedBy15']) data.head() data_app = dict_name_pd['HCL_appData'].copy() data_app.head() data_app['timeDividedBy15'] = data_app['appForegroundStartEpoch'] * 1_000_000 // (15 * (10**9)) * (15 * (10**9)) # data_app['END'] = data_app['appForegroundEndEpoch'] * 1_000_000 // (15 * (10**9)) * (15 * (10**9)) # data_app['timeDividedBy15'].unique().shape data_app.drop(columns=list(set(data_app.columns) - set(['SESSION', 'USER', 'timeDividedBy15', 'appName'])), inplace=True) data_app = data_app[data_app['timeDividedBy15'] != 0] final_data = data.merge(data_app, how='outer', on=['USER', 'SESSION', 'timeDividedBy15']) final_data.sort_values(by=['timeDividedBy15'], inplace=True) final_data['appName'].fillna(method='ffill', inplace=True) final_data.to_pickle('final_008.pkl')
Raw Text
-
My Cutie Wife needed a dick and I gave it to her.
5 min ago
-
Untitled
16 min ago
-
Untitled
19 min ago
-
Untitled
23 min ago
-
Untitled
29 min ago
-
Backpacker Nata Ocean Opens Her Legs For BBC Roommate - HORNY HOSTEL
32 min ago
-
Untitled
40 min ago
-
~@~[[Official!]] MLB Opening Day Live Streams@ReddiT at Home?
44 min ago
-
Untitled
1 hour ago
-
Baji Live
1 hour ago