模型转换的不同方法

此示例利用了一些添加的代码,以便以简单的方式实现自定义转换器。

使用 onnxruntime 预测

用于检查转换后的模型是否正常工作的简单函数。

import onnxruntime
import onnx
import numpy
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.cluster import KMeans
from sklearn.pipeline import make_pipeline
from onnxruntime import InferenceSession
from skl2onnx import convert_sklearn, to_onnx, wrap_as_onnx_mixin
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx.algebra.onnx_ops import OnnxSub, OnnxDiv
from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin


def predict_with_onnxruntime(onx, X):
    sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
    input_name = sess.get_inputs()[0].name
    res = sess.run(None, {input_name: X.astype(np.float32)})
    return res[0]

简单的 KMeans

第一种方法:convert_sklearn()

X = np.arange(20).reshape(10, 2)
tr = KMeans(n_clusters=2)
tr.fit(X)

onx = convert_sklearn(
    tr, initial_types=[("X", FloatTensorType((None, X.shape[1])))], target_opset=12
)
print(predict_with_onnxruntime(onx, X))
[1 1 1 1 1 0 0 0 0 0]

第二种方法:to_onnx():无需再使用 FloatTensorType

X = np.arange(20).reshape(10, 2)
tr = KMeans(n_clusters=2)
tr.fit(X)

onx = to_onnx(tr, X.astype(np.float32), target_opset=12)
print(predict_with_onnxruntime(onx, X))
[0 0 0 0 0 1 1 1 1 1]

第三种方法:wrap_as_onnx_mixin():将机器学习模型包装到一个新的类中,该类继承自 OnnxOperatorMixin

X = np.arange(20).reshape(10, 2)
tr = KMeans(n_clusters=2)
tr.fit(X)

tr_mixin = wrap_as_onnx_mixin(tr, target_opset=12)

onx = tr_mixin.to_onnx(X.astype(np.float32))
print(predict_with_onnxruntime(onx, X))
[0 0 0 0 0 1 1 1 1 1]

第四种方法:wrap_as_onnx_mixin():可以在拟合模型之前调用。

X = np.arange(20).reshape(10, 2)
tr = wrap_as_onnx_mixin(KMeans(n_clusters=2), target_opset=12)
tr.fit(X)

onx = tr.to_onnx(X.astype(np.float32))
print(predict_with_onnxruntime(onx, X))
[1 1 1 1 1 0 0 0 0 0]

管道和自定义对象

这是一个简单的缩放器。

class CustomOpTransformer(BaseEstimator, TransformerMixin, OnnxOperatorMixin):
    def __init__(self):
        BaseEstimator.__init__(self)
        TransformerMixin.__init__(self)
        self.op_version = 12

    def fit(self, X, y=None):
        self.W_ = np.mean(X, axis=0)
        self.S_ = np.std(X, axis=0)
        return self

    def transform(self, X):
        return (X - self.W_) / self.S_

    def onnx_shape_calculator(self):
        def shape_calculator(operator):
            operator.outputs[0].type = operator.inputs[0].type

        return shape_calculator

    def to_onnx_operator(
        self, inputs=None, outputs=("Y",), target_opset=None, **kwargs
    ):
        if inputs is None:
            raise RuntimeError("Parameter inputs should contain at least " "one name.")
        opv = target_opset or self.op_version
        i0 = self.get_inputs(inputs, 0)
        W = self.W_.astype(np.float32)
        S = self.S_.astype(np.float32)
        return OnnxDiv(
            OnnxSub(i0, W, op_version=12), S, output_names=outputs, op_version=opv
        )

方法 1

X = np.arange(20).reshape(10, 2)
tr = make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2))
tr.fit(X)

onx = convert_sklearn(
    tr, initial_types=[("X", FloatTensorType((None, X.shape[1])))], target_opset=12
)
print(predict_with_onnxruntime(onx, X))
[0 0 0 0 0 1 1 1 1 1]

方法 2

X = np.arange(20).reshape(10, 2)
tr = make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2))
tr.fit(X)

onx = to_onnx(tr, X.astype(np.float32), target_opset=12)
print(predict_with_onnxruntime(onx, X))
[1 1 1 1 1 0 0 0 0 0]

方法 3

X = np.arange(20).reshape(10, 2)
tr = make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2))
tr.fit(X)

tr_mixin = wrap_as_onnx_mixin(tr, target_opset=12)
tr_mixin.to_onnx(X.astype(np.float32))

print(predict_with_onnxruntime(onx, X))
[1 1 1 1 1 0 0 0 0 0]

方法 4

X = np.arange(20).reshape(10, 2)
tr = wrap_as_onnx_mixin(
    make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2)), target_opset=12
)

tr.fit(X)

onx = tr.to_onnx(X.astype(np.float32))
print(predict_with_onnxruntime(onx, X))
[0 0 0 0 0 1 1 1 1 1]

显示 ONNX 图

最后,让我们看看使用 *sklearn-onnx* 转换的图。

from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer  # noqa

pydot_graph = GetPydotGraph(
    onx.graph,
    name=onx.graph.name,
    rankdir="TB",
    node_producer=GetOpNodeProducer(
        "docstring", color="yellow", fillcolor="yellow", style="filled"
    ),
)
pydot_graph.write_dot("pipeline_onnx_mixin.dot")

import os  # noqa

os.system("dot -O -Gdpi=300 -Tpng pipeline_onnx_mixin.dot")

import matplotlib.pyplot as plt  # noqa

image = plt.imread("pipeline_onnx_mixin.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
ax.axis("off")
plot convert syntax
(-0.5, 3103.5, 6900.5, -0.5)

此示例使用的版本

import sklearn  # noqa

print("numpy:", numpy.__version__)
print("scikit-learn:", sklearn.__version__)
import skl2onnx  # noqa

print("onnx: ", onnx.__version__)
print("onnxruntime: ", onnxruntime.__version__)
print("skl2onnx: ", skl2onnx.__version__)
numpy: 1.23.5
scikit-learn: 1.4.dev0
onnx:  1.15.0
onnxruntime:  1.16.0+cu118
skl2onnx:  1.16.0

脚本的总运行时间:(0 分钟 3.140 秒)

由 Sphinx-Gallery 生成的库