gist of IPYNB with all code Utility code All imports used from dataclasses import dataclass import numpy as np import numpy.typing as npt import plotly.io as pio import statsmodels.api as sm from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.neural_network import MLPRegressor from sklearn.tree import DecisionTreeRegressor pio.renderers.default = 'png' # comment this out for interactive plots Dataset generation code from dataclasses import dataclass from typing import Callable import numpy as np import numpy.typing as npt import plotly.graph_objects as go from plotly.subplots import make_subplots from sklearn.datasets import make_classification, make_regression @dataclass class Dataset: name: str X: npt.NDArray[np.float64] y: npt.NDArray[np.float64] def make_poisson( n_samples: int, X_loc: float, coef_: float, intercept_: float, random_state: int = 0, ) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: np.random.seed(random_state) X = np.random.normal(loc=X_loc, scale=X_loc / 3, size=n_samples) mu = np.exp(coef_ * X + intercept_) y = np.random.poisson(lam=mu, size=n_samples) return X, y def f(x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: return np.sin(x) + np.sin(2 * x) def make_nonlinear( n_samples: int, func: Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]], X_dist: str = "uniform", X_scale: float = 1.0, noise_scale: float = 0.5, random_state: int = 0, ) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: np.random.seed(random_state) if X_dist == "normal" or X_dist == "n": X = np.random.normal(scale=X_scale, size=n_samples) elif X_dist == "uniform" or X_dist == "u": X = np.random.uniform(low=-3 * X_scale, high=3 * X_scale, size=n_samples) np.random.seed(random_state + 1) noise = np.random.normal(scale=noise_scale, size=n_samples) y = func(X) + noise return X.reshape(-1, 1), y def generate_datasets() -> list[Dataset]: datasets: list[Dataset] = [] # Linear data X, y = make_regression( n_samples=1000, n_features=1, n_informative=1, n_targets=1, noise=25.0, random_state=0, bias=7.0, ) datasets.append(Dataset("Linear Data", X, y)) # Binary data X, y = make_classification( n_samples=1000, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, class_sep=1.0, flip_y=0.1, random_state=0, ) datasets.append(Dataset("Binary Data", X, y)) # Poisson data X, y = make_poisson(n_samples=1000, X_loc=3.0, coef_=0.7, intercept_=-0.2, random_state=0) datasets.append(Dataset("Poisson Data", X, y)) # Nonlinear data X, y = make_nonlinear( n_samples=1000, func=f, X_dist="u", X_scale=1.2, noise_scale=0.5, random_state=0 ) datasets.append(Dataset("Nonlinear Data", X, y)) return datasets def plot_datasets(datasets: list[Dataset]) -> None: fig = make_subplots(rows=1, cols=4, subplot_titles=[dataset.name for dataset in datasets]) for i, dataset in enumerate(datasets, 1): fig.add_trace( go.Scatter( x=dataset.X.flatten(), y=dataset.y, mode="markers", marker=dict(size=5, opacity=0.2), name=dataset.name, ), row=1, col=i, ) fig.update_xaxes(title_text="Feature", row=1, col=i) fig.update_yaxes(title_text="Target", row=1, col=i) fig.update_layout(height=400, width=1200, title_text="Datasets Overview", showlegend=False) fig.show() Model plot code from typing import Any, Callable import numpy as np import numpy.typing as npt import plotly.graph_objects as go def plot_model( X: npt.NDArray[np.float64], y: npt.NDArray[np.float64], xx: npt.NDArray[np.float64], y_pred: npt.NDArray[np.float64], title: str, model_label: str = "model", gt_func: Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]] | None = None, extra_plots: list[tuple[str, npt.NDArray[np.float64], dict[str, Any]]] | None = None, xlim: tuple[float, float] | None = None, ylim: tuple[float, float] | None = None, **scatter_kwargs, ): fig = go.Figure() # Scatter plot of data points scatter_kwargs = {**{"opacity": 0.4, "marker": {"size": 5}}, **scatter_kwargs} fig.add_trace(go.Scatter(x=X.flatten(), y=y, mode="markers", name="Data", **scatter_kwargs)) # Model prediction fig.add_trace( go.Scatter(x=xx, y=y_pred, mode="lines", name=model_label, line=dict(color="red", width=5)) ) # Ground truth function if gt_func: fig.add_trace( go.Scatter( x=xx, y=gt_func(xx), mode="lines", name="Ground Truth", line=dict(color="black") ) ) # Extra plots if extra_plots: for label, data, style in extra_plots: color = style.get("c", "blue") # Default to blue if 'c' is not a valid color if color in ["c", "b", "g", "r"]: # Map single-letter colors to full color names color_map = {"c": "cyan", "b": "blue", "g": "green", "r": "red"} color = color_map[color] fig.add_trace( go.Scatter( x=xx, y=data, mode="lines", name=label, line=dict(color=color, width=style.get("linewidth", 2)), ) ) # Update layout fig.update_layout( title=title, xaxis_title="Feature", yaxis_title="Target", legend=dict(x=0.02, y=0.98, bgcolor="rgba(255,255,255,0.8)"), width=900, height=700, margin=dict(l=50, r=20, t=50, b=50), # Add this line to tighten margins ) # Set axis limits if provided if xlim: fig.update_xaxes(range=xlim) if ylim: fig.update_yaxes(range=ylim) # Add grid fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="lightgray") fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="lightgray") fig.show() Datasets preview # Generate and plot datasets datasets: list[Dataset] = generate_datasets() plot_datasets(datasets) Models Simple models Linear regression Code import numpy as np import numpy.typing as npt from sklearn.linear_model import LinearRegression def linear_regression_example(X: npt.NDArray[np.float64], y: npt.NDArray[np.float64]): lr = LinearRegression().fit(X, y) xx: npt.NDArray[np.float64] = np.linspace(X.min(), X.max(), 101) y_pred: npt.NDArray[np.float64] = lr.predict(xx.reshape(-1, 1)) title = f"Linear Regression\ny = {lr.intercept_:#.3g} + {lr.coef_[0]:#.3g}x" plot_model(X, y, xx, y_pred, title) Demo linear_dataset = datasets[0] linear_regression_example(X=linear_dataset.X, y=linear_dataset.y) Logistic regression Code import numpy as np import numpy.typing as npt from sklearn.linear_model import LogisticRegression def logistic_regression_example(X: npt.NDArray[np.float64], y: npt.NDArray[np.float64]): lor = LogisticRegression().fit(X, y) xx: npt.NDArray[np.float64] = np.linspace(X.min(), X.max(), 101) y_pred: npt.NDArray[np.float64] = lor.predict_proba(xx.reshape(-1, 1))[:, 1] title = f"Logistic Regression\ny = σ({lor.intercept_[0]:#.3g} + {lor.coef_[0][0]:#.3g}x)" plot_model(X, y, xx, y_pred, title, marker=dict(symbol="line-ns", size=20, line_width=1)) Demo logistic_dataset = datasets[1] logistic_regression_example(X=logistic_dataset.X, y=logistic_dataset.y) GLM (Poisson) Code import numpy as np import numpy.typing as npt import statsmodels.api as sm def glm_poisson_example(X: npt.NDArray[np.float64], y: npt.NDArray[np.float64]): glm = sm.GLM(endog=y, exog=sm.add_constant(X), family=sm.families.Poisson()).fit() xx: npt.NDArray[np.float64] = np.linspace(X.min(), X.max(), 101) y_pred: npt.NDArray[np.float64] = glm.predict(sm.add_constant(xx)) title = f"GLM (Poisson Regression)\ny = exp({glm.params[0]:#.3g} + {glm.params[1]:#.3g}x)" plot_model(X, y, xx, y_pred, title) Demo poisson_dataset = datasets[2] glm_poisson_example(poisson_dataset.X, poisson_dataset.y) Nonlinear models - tree-based nonlinear_dataset = datasets[3] Decision tree Code import numpy as np import numpy.typing as npt from sklearn.tree import DecisionTreeRegressor def decision_tree_example( X: npt.NDArray[np.float64], y: npt.NDArray[np.float64] ) -> DecisionTreeRegressor: dt = DecisionTreeRegressor(max_depth=6, min_samples_split=100).fit(X, y) xx: npt.NDArray[np.float64] = np.linspace(X.min(), X.max(), 1001) y_dt: npt.NDArray[np.float64] = dt.predict(xx.reshape(-1, 1)) print(f"Prediction error: {np.mean((f(xx) - y_dt) ** 2):#.4g}") plot_model( X, y, xx, y_dt, "Decision Tree", model_label="DT", gt_func=f, xlim=(-2, 2), ylim=(-2, 2) ) return dt Demo dt = decision_tree_example(X=nonlinear_dataset.X, y=nonlinear_dataset.y) Prediction error: 0.02740 Analysis import dtreeviz viz_model = dtreeviz.model( model=dt, X_train=nonlinear_dataset.X, y_train=nonlinear_dataset.y, feature_names="X", target_name="y", ) viz_model.view(scale=1.5, x=[1.2]) Random forest Code import numpy as np import numpy.typing as npt from sklearn.ensemble import RandomForestRegressor def random_forest_example(X: npt.NDArray[np.float64], y: npt.NDArray[np.float64]): rf = RandomForestRegressor( n_estimators=3, max_depth=6, min_samples_split=100, random_state=0 ).fit(X, y) xx: npt.NDArray[np.float64] = np.linspace(X.min(), X.max(), 1001) y_rf: npt.NDArray[np.float64] = rf.predict(xx.reshape(-1, 1)) print(f"Prediction error: {np.mean((f(xx) - y_rf) ** 2):#.4g}") y_trees = [tree_.predict(xx.reshape(-1, 1)) for tree_ in rf.estimators_[:3]] extra_plots = [(f"tree {i+1}", y_tree, {"c": "cbg"[i]}) for i, y_tree in enumerate(y_trees)] plot_model( X, y, xx, y_rf, "Random Forest, 3 trees", model_label="RF", gt_func=f, extra_plots=extra_plots, xlim=(-2, 2), ylim=(-2, 2), ) Demo random_forest_example(X=nonlinear_dataset.X, y=nonlinear_dataset.y) Prediction error: 0.02408 Gradient boosting Code import numpy as np import numpy.typing as npt from sklearn.ensemble import GradientBoostingRegressor def gradient_boosting_example(X: npt.NDArray[np.float64], y: npt.NDArray[np.float64]): gbm = GradientBoostingRegressor( n_estimators=3, learning_rate=0.68, min_samples_split=150, max_depth=6 ).fit(X, y) xx: npt.NDArray[np.float64] = np.linspace(X.min(), X.max(), 1001) y_gbm = gbm.predict(xx.reshape(-1, 1)) print(f"Prediction error: {np.mean((f(xx) - y_gbm) ** 2):#.4g}") y_trees = list(gbm.staged_predict(xx.reshape(-1, 1))) extra_plots = [ (f"step {i+1}", y_tree, {"c": "bgr"[i], "linewidth": 2}) for i, y_tree in enumerate(y_trees[:2]) ] plot_model( X, y, xx, y_gbm, "Gradient Boosting Machine, 3 trees", model_label="step 3 (GBM)", gt_func=f, extra_plots=extra_plots, xlim=(-2, 2), ylim=(-2, 2), ) Demo gradient_boosting_example(X=nonlinear_dataset.X, y=nonlinear_dataset.y) Prediction error: 0.01625 Nonlinear models - other Neural network Code import numpy as np import numpy.typing as npt import plotly.graph_objs as go from plotly.subplots import make_subplots from sklearn.neural_network import MLPRegressor def neural_network_example( X: npt.NDArray[np.float64], y: npt.NDArray[np.float64], activation: str = "relu", analyze: bool = False, ) -> MLPRegressor: mlp = MLPRegressor( hidden_layer_sizes=(5, 8), activation=activation, max_iter=20000, random_state=0, ).fit(X, y) xx: npt.NDArray[np.float64] = np.linspace(X.min(), X.max(), 1001) y_mlp = mlp.predict(xx.reshape(-1, 1)) print(f"Prediction error: {np.mean((f(xx) - y_mlp) ** 2):#.4g}") if analyze: # Create a subplot with 1 row and 4 columns (1 for model prediction, 3 for layer analysis) fig = make_subplots( rows=1, cols=3, subplot_titles=["Layer 1", "Layer 2", "Layer 3 (output)"] ) # Add the layer analysis plots to the right three subplots analyze_neural_network(X, y, mlp, fig=fig, row=1, col_start=1) # Update layout for the combined plot fig.update_layout( height=600, width=1200, title_text="Neural Network Analysis", margin=dict(l=50, r=20, t=50, b=50), ) fig.show() else: # Show only the model prediction plot plot_model( X, y, xx, y_mlp, f"Neural Network with {mlp.hidden_layer_sizes} hidden layers", model_label="Neural Net", gt_func=f, xlim=(-2, 2), ylim=(-2, 2), ) return mlp def analyze_neural_network( X: npt.NDArray[np.float64], y: npt.NDArray[np.float64], mlp: MLPRegressor, fig=None, row=1, col_start=1, ): xx: npt.NDArray[np.float64] = np.linspace(X.min(), X.max(), 1001) def relu(x): return x * (x > 0) w1: npt.NDArray[np.float64] = mlp.coefs_[0] w2: npt.NDArray[np.float64] = mlp.coefs_[1] w3: npt.NDArray[np.float64] = mlp.coefs_[2] b1: npt.NDArray[np.float64] = mlp.intercepts_[0] b2: npt.NDArray[np.float64] = mlp.intercepts_[1] b3: npt.NDArray[np.float64] = mlp.intercepts_[2] if mlp.activation == "relu": activation = relu elif mlp.activation == "tanh": activation = np.tanh elif mlp.activation == "logistic": activation = lambda x: 1 / (1 + np.exp(-x)) # noqa: E731 elif mlp.activation == "identity": activation = lambda x: x # noqa: E731 else: raise ValueError(f"Activation function {mlp.activation} not supported") z1 = activation(np.dot(xx.reshape(-1, 1), w1) + b1) z2 = activation(np.dot(z1, w2) + b2) z3 = np.dot(z2, w3) + b3 # Generate equations for each layer equations = { "Layer 1": [ f"z1_{i} = {activation.__name__}({w1[0,i]:.3f}x + {b1[i]:.3f})" for i in range(w1.shape[1]) ], "Layer 2": [ f"z2_{i} = {activation.__name__}({' + '.join([f'{w2[j,i]:.3f}z1_{j}' for j in range(w2.shape[0])])} + {b2[i]:.3f})" for i in range(w2.shape[1]) ], "Layer 3": [ f"y = {' + '.join([f'{w3[i,0]:.3f}z2_{i}' for i in range(w3.shape[0])])} + {b3[0]:.3f}" ], } layer_sizes = [5, 8, 1] outputs = [z1, z2, z3] if fig is None: fig = make_subplots( rows=1, cols=3, subplot_titles=["Layer 1", "Layer 2", "Layer 3 (output)"] ) fig.update_layout( height=600, width=1200, title_text="Neural Network Analysis", margin=dict(l=50, r=20, t=50, b=50), ) for idx, (layer_size, layer_outputs) in enumerate(zip(layer_sizes, outputs), 1): col = col_start + idx - 1 # Add scatter plot for data points fig.add_trace( go.Scatter( x=X.flatten(), y=y, mode="markers", name="Data", marker=dict(color="blue", size=5, opacity=0.1), showlegend=False, ), row=row, col=col, ) # Add ground truth function fig.add_trace( go.Scatter( x=xx, y=f(xx), mode="lines", name="Ground Truth", line=dict(color="black", width=1, dash="dash"), showlegend=False, ), row=row, col=col, ) if idx == len(layer_sizes): out = layer_outputs[:, 0] fig.add_trace( go.Scatter( x=xx, y=out, mode="lines", name="Neuron 0", line=dict(color="red", width=3), hoverinfo="text", hovertext=equations[f"Layer {idx}"][0], showlegend=False, ), row=row, col=col, ) else: # Add neuron outputs for i in range(layer_size): out: npt.NDArray[np.float64] = layer_outputs[:, i] if idx < 3: out = (out - out.min()) / (out.max() - out.min() + 0.01) * 0.9 out = out - i - 1 + layer_size / 2 fig.add_trace( go.Scatter( x=xx, y=out, mode="lines", name=f"Neuron {i}", line=dict(width=4), hoverinfo="text", hovertext=equations[f"Layer {idx}"][i], showlegend=False, ), row=row, col=col, ) fig.update_xaxes( title_text="Feature", row=row, col=col, showgrid=True, griddash="dash", gridcolor="LightGray", minor_showgrid=True, minor_griddash="dot", minor_gridcolor="LightGray", minor_dtick=0.5, ) fig.update_yaxes( title_text="Target", row=row, col=col, showgrid=True, griddash="dash", gridcolor="LightGray", ) # Print equations for each layer for layer, eqs in equations.items(): print(f"\n{layer} equations:") for eq in eqs: print(eq) return fig Demo mlp = neural_network_example( X=nonlinear_dataset.X, y=nonlinear_dataset.y, activation="relu", # tanh, relu, logistic, identity analyze=False, ) Prediction error: 0.009900 Analysis Code import numpy as np import numpy.typing as npt import plotly.graph_objs as go from plotly.subplots import make_subplots def plot_activation_functions(): # Generate x values x: npt.NDArray[np.float64] = np.linspace(-5, 5, 200) # Define activation functions relu = lambda x: np.maximum(0, x) # noqa: E731 tanh = np.tanh logistic = lambda x: 1 / (1 + np.exp(-x)) # noqa: E731 identity = lambda x: x # noqa: E731 # Create subplots with shared y-axes fig = make_subplots( rows=1, cols=4, subplot_titles=("ReLU", "Tanh", "Logistic", "Identity"), shared_yaxes=True ) # Add traces for each activation function fig.add_trace(go.Scatter(x=x, y=relu(x), name="ReLU"), row=1, col=1) fig.add_trace(go.Scatter(x=x, y=tanh(x), name="Tanh"), row=1, col=2) fig.add_trace(go.Scatter(x=x, y=logistic(x), name="Logistic"), row=1, col=3) fig.add_trace(go.Scatter(x=x, y=identity(x), name="Identity"), row=1, col=4) # Update layout fig.update_layout( height=300, width=900, title_text="Activation Functions", margin=dict(l=40, r=0, t=50, b=50), ) fig.update_xaxes( title_text="x", zeroline=True, zerolinewidth=2, zerolinecolor="lightgray", range=[-5.0, 5.0] ) fig.update_yaxes( title_text="f(x)", zeroline=True, zerolinewidth=2, zerolinecolor="lightgray", range=[-5.0, 5.0], ) # Show the plot fig.show() Plot plot_activation_functions() mlp = neural_network_example( X=nonlinear_dataset.X, y=nonlinear_dataset.y, activation="relu", # tanh, relu, logistic, identity analyze=True, ) Prediction error: 0.009900 Layer 1 equations: z1_0 = relu(0.931x + 0.498) z1_1 = relu(0.334x + -0.371) z1_2 = relu(-0.218x + 0.941) z1_3 = relu(-0.303x + 1.093) z1_4 = relu(-0.801x + -0.848) Layer 2 equations: z2_0 = relu(0.302z1_0 + 0.808z1_1 + -0.812z1_2 + -1.661z1_3 + 0.000z1_4 + -0.283) z2_1 = relu(-0.859z1_0 + 0.361z1_1 + 0.676z1_2 + 0.485z1_3 + -1.997z1_4 + 0.087) z2_2 = relu(-0.446z1_0 + 0.504z1_1 + -0.176z1_2 + 0.068z1_3 + -0.111z1_4 + 0.497) z2_3 = relu(0.631z1_0 + 0.487z1_1 + -0.312z1_2 + -0.111z1_3 + 0.000z1_4 + -0.729) z2_4 = relu(-0.000z1_0 + -0.000z1_1 + -0.000z1_2 + 0.000z1_3 + -0.000z1_4 + -0.396) z2_5 = relu(-0.619z1_0 + 0.321z1_1 + 0.110z1_2 + -0.022z1_3 + 0.718z1_4 + -0.858) z2_6 = relu(-1.151z1_0 + -0.000z1_1 + 0.318z1_2 + 0.250z1_3 + -0.673z1_4 + 0.646) z2_7 = relu(0.567z1_0 + 0.400z1_1 + -0.143z1_2 + -0.457z1_3 + -0.000z1_4 + -0.523) Layer 3 equations: y = 1.975z2_0 + -0.825z2_1 + -0.481z2_2 + -1.145z2_3 + 0.000z2_4 + -1.401z2_5 + -1.383z2_6 + -0.827z2_7 + 1.604 3D example