Композиция регрессоров#

В этом примере мы создадим регрессионный Глубокий лес регрессии. Он будет состоять из двух моделей регрессии: градиентного бустинга и случайного леса.

[1]:

from bosk.executor.topological import TopologicalExecutor
from bosk.pipeline.builder.functional import FunctionalPipelineBuilder
from bosk.stages import Stage
from sklearn.datasets import make_friedman1
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

n_estimators = 100
random_state = 42

[2]:

# создание объекта, конструирующего конвейер
b = FunctionalPipelineBuilder()
# создаем обертки блоков и соединяем их в граф
X, y = b.Input()(), b.TargetInput()()
rf_1 = b.RFR(n_estimators=n_estimators)(X=X, y=y)
cb_1 = b.CatBoostRegressor(num_trees=n_estimators, verbose=0)(X=X, y=y)
rf_reshape = b.Reshape((-1, 1))(X=rf_1)
cb_reshape = b.Reshape((-1, 1))(X=cb_1)
concat_1 = b.Concat(['X', 'rf_1', 'cb_1'], axis=1)(X=X, rf_1=rf_reshape, cb_1=cb_reshape)
rf_2 = b.RFR(n_estimators=n_estimators)(X=concat_1, y=y)
cb_2 = b.CatBoostRegressor(num_trees=n_estimators, verbose=0)(X=concat_1, y=y)
stack = b.Stack(['rf_2', 'cb_2'], axis=1)(rf_2=rf_2, cb_2=cb_2)
average = b.Average(axis=1)(X=stack)
# после создания структуры графа
# создаем конвейер
pipeline = b.build(
    {'X': X, 'y': y},
    {'outcome': average}
)

# давайте сгенерируем данные
# для обучения и тестирования Глубокого леса
all_X, all_y = make_friedman1(n_samples=1000, random_state=random_state)
train_X, test_X, train_y, test_y = train_test_split(
    all_X, all_y, test_size=0.2, random_state=random_state)
# создание исполнителей
fit_executor = TopologicalExecutor(
    pipeline,
    stage=Stage.FIT,
    inputs=['X', 'y'],
    outputs=['outcome']
)
transform_executor = TopologicalExecutor(
    pipeline,
    stage=Stage.TRANSFORM,
    inputs=['X'],
    outputs=['outcome']
)
# выполнение нашего конвейера и вывод метрик
fit_result = fit_executor({'X': train_X, 'y': train_y}).numpy()
print("Train MSE:", mean_squared_error(train_y, fit_result['outcome']))
test_result = transform_executor({'X': test_X}).numpy()
print("Test MSE:", mean_squared_error(test_y, test_result['outcome']))

Train MSE: 0.010965590009033686
Test MSE: 0.4845832612241044