본문 바로가기
MachineLearning

랜덤포레스트

by forkballpitch 2022. 8. 31.
728x90
728x90
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization

rf_parameter_bounds = {
    'max_depth' : (5,30),
    'n_estimators': (10,100),
    'min_samples_split': (2,10),
    'min_samples_leaf': (1,4)
}

def NMAE(true, pred):
    score = np.sum(np.abs(true-pred)) / np.sum(true)
    return score

def rf_bo(max_depth,n_estimators,min_samples_split, min_samples_leaf):
    rf_params = {
                'max_depth' : int(round(max_depth)),
                'n_estimators': int(round(n_estimators)),
                'min_samples_split': int(round(min_samples_split)),
                'min_samples_leaf': int(round(min_samples_leaf)),
    }
    rf = RandomForestRegressor(**rf_params)
    
    rf.fit(x_train, y_train)
    
    score = -1 * NMAE(y_valid, rf.predict(x_valid))
    return score

from sklearn.ensemble import RandomForestRegressor

train_x, train_y = train.drop('count', axis=1), train['count']

rf_mode1 = RandomForestRegressor()

rf_model_result = []

kf = KFold(n_splits = 5)

for idx, (t_index, v_index) in enumerate(kf.split(train_x)):
    
    train_x = np.array(train_x)
    x_train, x_valid = train_x[t_index], train_x[v_index]
    y_train, y_valid = train_y[t_index], train_y[v_index]
    
    BO_rf = BayesianOptimization(f=rf_bo, pbounds = rf_parameter_bounds, random_state = 0, verbose = 0)
    BO_rf.maximize(init_points = 5, n_iter = 5)
    
    max_params = BO_rf.max['params']
    max_params['max_depth'] = int(max_params['max_depth'])
    max_params['n_estimators'] = int(max_params['n_estimators'])
    max_params['min_samples_split'] = int(max_params['min_samples_split'])
    max_params['min_samples_leaf'] = int(max_params['min_samples_leaf'])
    rf_model = RandomForestRegressor(**max_params)
    
    rf_model.fit(x_train, y_train)
    
    rf_model_result.append(rf_model.predict(test))
    print('RandomForest Regression', idx, 'fold end')

rf_prediction = np.mean(rf_model_result,axis = 0)
728x90
728x90

'MachineLearning' 카테고리의 다른 글

파일생성  (0) 2022.08.31
단순 선형회귀 분석  (0) 2022.08.31
특정 상관계수 이하 제거 및 x,y 분리  (0) 2022.08.31
상관계수 heatmap  (0) 2022.08.31
histplot  (0) 2022.08.31