注意

转到末尾下载完整示例代码。

当自定义模型既不是分类器也不是回归器时（备选方案）¶

注意

本示例使用示例玩转 ONNX 算子中提出的语法重写了当自定义模型既不是分类器也不是回归器时，以编写自定义转换器、形状计算器和解析器。

scikit-learn 的 API 指定回归器产生一个输出，而分类器产生两个输出：预测标签和概率。这里的目标是添加第三个结果，用于指示概率是否高于给定阈值。这在方法 validate 中实现。

Iris 数据集与评分¶

创建一个新类，它可以训练任何分类器并实现上面提到的方法 validate。

import inspect
import numpy as np
import skl2onnx
import onnx
import sklearn
from sklearn.base import ClassifierMixin, BaseEstimator, clone
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from skl2onnx import update_registered_converter
import os
from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer
import onnxruntime as rt
from skl2onnx import to_onnx, get_model_alias
from skl2onnx.proto import onnx_proto
from skl2onnx.common.data_types import FloatTensorType, Int64TensorType
from skl2onnx.algebra.onnx_ops import (
    OnnxGreater,
    OnnxCast,
    OnnxReduceMaxApi18,
    OnnxIdentity,
)
from skl2onnx.algebra.onnx_operator import OnnxSubEstimator
import matplotlib.pyplot as plt


class ValidatorClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, estimator=None, threshold=0.75):
        ClassifierMixin.__init__(self)
        BaseEstimator.__init__(self)
        if estimator is None:
            estimator = LogisticRegression(solver="liblinear")
        self.estimator = estimator
        self.threshold = threshold

    def fit(self, X, y, sample_weight=None):
        sig = inspect.signature(self.estimator.fit)
        if "sample_weight" in sig.parameters:
            self.estimator_ = clone(self.estimator).fit(
                X, y, sample_weight=sample_weight
            )
        else:
            self.estimator_ = clone(self.estimator).fit(X, y)
        return self

    def predict(self, X):
        return self.estimator_.predict(X)

    def predict_proba(self, X):
        return self.estimator_.predict_proba(X)

    def validate(self, X):
        pred = self.predict_proba(X)
        mx = pred.max(axis=1)
        return (mx >= self.threshold) * 1


data = load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

model = ValidatorClassifier()
model.fit(X_train, y_train)

ValidatorClassifier(estimator=LogisticRegression(solver='liblinear'))

在 Jupyter 环境中，请重新运行此单元格以显示 HTML 表示或信任此笔记本。
在 GitHub 上，HTML 表示无法渲染，请尝试使用 nbviewer.org 加载此页。

现在我们来衡量这个指标，它指示预测的概率是否高于某个阈值。

print(model.validate(X_test))

[0 1 1 0 0 1 1 0 1 1 1 0 0 1 1 1 1 1 0 1 0 0 1 0 0 0 1 1 1 1 1 0 1 1 1 0 0
 0]

转换为 ONNX¶

对于新模型，转换会失败，因为库不知道与此新模型关联的任何转换器。

try:
    to_onnx(model, X_train[:1].astype(np.float32), target_opset=12)
except RuntimeError as e:
    print(e)

Unable to find a shape calculator for type '<class '__main__.ValidatorClassifier'>'.
It usually means the pipeline being converted contains a
transformer or a predictor with no corresponding converter
implemented in sklearn-onnx. If the converted is implemented
in another library, you need to register
the converted so that it can be used by sklearn-onnx (function
update_registered_converter). If the model is not yet covered
by sklearn-onnx, you may raise an issue to
https://github.com/onnx/sklearn-onnx/issues
to get the converter implemented or even contribute to the
project. If the model is a custom model, a new converter must
be implemented. Examples can be found in the gallery.

自定义转换器¶

我们重用为您的模型编写自己的转换器中的部分代码。形状计算器定义了转换后模型的每个输出的形状。

def validator_classifier_shape_calculator(operator):
    input0 = operator.inputs[0]  # first input in ONNX graph
    outputs = operator.outputs  # outputs in ONNX graph
    op = operator.raw_operator  # scikit-learn model (mmust be fitted)
    if len(outputs) != 3:
        raise RuntimeError("3 outputs expected not {}.".format(len(outputs)))

    N = input0.type.shape[0]  # number of observations
    C = op.estimator_.classes_.shape[0]  # dimension of outputs

    outputs[0].type = Int64TensorType([N])  # label
    outputs[1].type = FloatTensorType([N, C])  # probabilities
    outputs[2].type = Int64TensorType([C])  # validation

然后是转换器。

def validator_classifier_converter(scope, operator, container):
    input0 = operator.inputs[0]  # first input in ONNX graph
    outputs = operator.outputs  # outputs in ONNX graph
    op = operator.raw_operator  # scikit-learn model (mmust be fitted)
    opv = container.target_opset

    # The model calls another one. The class `OnnxSubEstimator`
    # calls the converter for this operator.
    model = op.estimator_
    onnx_op = OnnxSubEstimator(model, input0, op_version=opv, options={"zipmap": False})

    rmax = OnnxReduceMaxApi18(onnx_op[1], axes=[1], keepdims=0, op_version=opv)
    great = OnnxGreater(
        rmax, np.array([op.threshold], dtype=np.float32), op_version=opv
    )
    valid = OnnxCast(great, to=onnx_proto.TensorProto.INT64, op_version=opv)

    r1 = OnnxIdentity(onnx_op[0], output_names=[outputs[0].full_name], op_version=opv)
    r2 = OnnxIdentity(onnx_op[1], output_names=[outputs[1].full_name], op_version=opv)
    r3 = OnnxIdentity(valid, output_names=[outputs[2].full_name], op_version=opv)

    r1.add_to(scope, container)
    r2.add_to(scope, container)
    r3.add_to(scope, container)

然后是注册。

update_registered_converter(
    ValidatorClassifier,
    "CustomValidatorClassifier",
    validator_classifier_shape_calculator,
    validator_classifier_converter,
)

然后是转换……

try:
    to_onnx(model, X_test[:1].astype(np.float32), target_opset=12)
except RuntimeError as e:
    print(e)

3 outputs expected not 2.

它失败了，因为库期望模型像分类器一样行为，产生两个输出。我们需要添加一个自定义解析器来告诉库此模型产生三个输出。

自定义解析器¶

def validator_classifier_parser(scope, model, inputs, custom_parsers=None):
    alias = get_model_alias(type(model))
    this_operator = scope.declare_local_operator(alias, model)

    # inputs
    this_operator.inputs.append(inputs[0])

    # outputs
    val_label = scope.declare_local_variable("val_label", Int64TensorType())
    val_prob = scope.declare_local_variable("val_prob", FloatTensorType())
    val_val = scope.declare_local_variable("val_val", Int64TensorType())
    this_operator.outputs.append(val_label)
    this_operator.outputs.append(val_prob)
    this_operator.outputs.append(val_val)

    # ends
    return this_operator.outputs

update_registered_converter(
    ValidatorClassifier,
    "CustomValidatorClassifier",
    validator_classifier_shape_calculator,
    validator_classifier_converter,
    parser=validator_classifier_parser,
)

再次转换。

model_onnx = to_onnx(model, X_test[:1].astype(np.float32), target_opset=12)

最终测试¶

现在我们需要检查 ONNX 的结果是否一致。

X32 = X_test[:5].astype(np.float32)

sess = rt.InferenceSession(
    model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
)
results = sess.run(None, {"X": X32})

print("--labels--")
print("sklearn", model.predict(X32))
print("onnx", results[0])
print("--probabilities--")
print("sklearn", model.predict_proba(X32))
print("onnx", results[1])
print("--validation--")
print("sklearn", model.validate(X32))
print("onnx", results[2])

--labels--
sklearn [2 1 0 2 2]
onnx [2 1 0 2 2]
--probabilities--
sklearn [[4.35572853e-04 2.50370783e-01 7.49193644e-01]
 [6.40021595e-02 7.74525152e-01 1.61472688e-01]
 [8.72966069e-01 1.27016600e-01 1.73305757e-05]
 [2.88656526e-03 4.13689781e-01 5.83423654e-01]
 [6.84848807e-04 4.18572039e-01 5.80743112e-01]]
onnx [[4.3557768e-04 2.5037074e-01 7.4919367e-01]
 [6.4002089e-02 7.7452540e-01 1.6147259e-01]
 [8.7296611e-01 1.2701656e-01 1.7331236e-05]
 [2.8865638e-03 4.1368976e-01 5.8342361e-01]
 [6.8487308e-04 4.1857198e-01 5.8074319e-01]]
--validation--
sklearn [0 1 1 0 0]
onnx [0 1 1 0 0]

看起来不错。

显示 ONNX 图¶

pydot_graph = GetPydotGraph(
    model_onnx.graph,
    name=model_onnx.graph.name,
    rankdir="TB",
    node_producer=GetOpNodeProducer(
        "docstring", color="yellow", fillcolor="yellow", style="filled"
    ),
)
pydot_graph.write_dot("validator_classifier.dot")

os.system("dot -O -Gdpi=300 -Tpng validator_classifier.dot")

image = plt.imread("validator_classifier.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
ax.axis("off")

(np.float64(-0.5), np.float64(3557.5), np.float64(4934.5), np.float64(-0.5))

本示例使用的版本

print("numpy:", np.__version__)
print("scikit-learn:", sklearn.__version__)
print("onnx: ", onnx.__version__)
print("onnxruntime: ", rt.__version__)
print("skl2onnx: ", skl2onnx.__version__)

numpy: 2.2.0
scikit-learn: 1.6.0
onnx:  1.18.0
onnxruntime:  1.21.0+cu126
skl2onnx:  1.18.0

脚本总运行时间： (0 分 2.343 秒)

由 Sphinx-Gallery 生成的示例集锦