Regression example#

In this example we will create a regression Deep Forest. It will consist of two regressors: GBM and a random forest.

[1]:
from bosk.executor.topological import TopologicalExecutor
from bosk.pipeline.builder.functional import FunctionalPipelineBuilder
from bosk.stages import Stage
from sklearn.datasets import make_friedman1
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

n_estimators = 100
random_state = 42
[2]:
# firstly we must obtain the functional builder object
b = FunctionalPipelineBuilder()
# we get blocks wrappers and connect with each other
X, y = b.Input()(), b.TargetInput()()
rf_1 = b.RFR(n_estimators=n_estimators)(X=X, y=y)
cb_1 = b.CatBoostRegressor(num_trees=n_estimators, verbose=0)(X=X, y=y)
rf_reshape = b.Reshape((-1, 1))(X=rf_1)
cb_reshape = b.Reshape((-1, 1))(X=cb_1)
concat_1 = b.Concat(['X', 'rf_1', 'cb_1'], axis=1)(X=X, rf_1=rf_reshape, cb_1=cb_reshape)
rf_2 = b.RFR(n_estimators=n_estimators)(X=concat_1, y=y)
cb_2 = b.CatBoostRegressor(num_trees=n_estimators, verbose=0)(X=concat_1, y=y)
stack = b.Stack(['rf_2', 'cb_2'], axis=1)(rf_2=rf_2, cb_2=cb_2)
average = b.Average(axis=1)(X=stack)
# after defining the graph structure we obtain
# the pipeline object from the builder
pipeline = b.build(
    {'X': X, 'y': y},
    {'outcome': average}
)

# let's get some data
# than train and test our pipeline
all_X, all_y = make_friedman1(n_samples=1000, random_state=random_state)
train_X, test_X, train_y, test_y = train_test_split(
    all_X, all_y, test_size=0.2, random_state=random_state)
# creating executors
fit_executor = TopologicalExecutor(
    pipeline,
    stage=Stage.FIT,
    inputs=['X', 'y'],
    outputs=['outcome']
)
transform_executor = TopologicalExecutor(
    pipeline,
    stage=Stage.TRANSFORM,
    inputs=['X'],
    outputs=['outcome']
)
# executing our pipeline and obtaining metrics
fit_result = fit_executor({'X': train_X, 'y': train_y}).numpy()
print("Train MSE:", mean_squared_error(train_y, fit_result['outcome']))
test_result = transform_executor({'X': test_X}).numpy()
print("Test MSE:", mean_squared_error(test_y, test_result['outcome']))
Train MSE: 0.010965590009033686
Test MSE: 0.4845832612241044