注意
前往文末下载完整示例代码。
模型转换的不同方式¶
本示例利用了一些为方便实现自定义转换器而添加的代码。
使用 onnxruntime 进行预测¶
用于检查转换后的模型是否正常工作的简单函数。
import onnxruntime
import onnx
import numpy
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.cluster import KMeans
from sklearn.pipeline import make_pipeline
from onnxruntime import InferenceSession
from skl2onnx import convert_sklearn, to_onnx, wrap_as_onnx_mixin
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx.algebra.onnx_ops import OnnxSub, OnnxDiv
from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin
def predict_with_onnxruntime(onx, X):
sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
input_name = sess.get_inputs()[0].name
res = sess.run(None, {input_name: X.astype(np.float32)})
return res[0]
简单 KMeans¶
第一种方式:convert_sklearn()
。
X = np.arange(20).reshape(10, 2)
tr = KMeans(n_clusters=2)
tr.fit(X)
onx = convert_sklearn(
tr, initial_types=[("X", FloatTensorType((None, X.shape[1])))], target_opset=12
)
print(predict_with_onnxruntime(onx, X))
[1 1 1 1 1 0 0 0 0 0]
第二种方式:to_onnx()
:不再需要处理 FloatTensorType
。
X = np.arange(20).reshape(10, 2)
tr = KMeans(n_clusters=2)
tr.fit(X)
onx = to_onnx(tr, X.astype(np.float32), target_opset=12)
print(predict_with_onnxruntime(onx, X))
[1 1 1 1 0 0 0 0 0 0]
第三种方式:wrap_as_onnx_mixin()
:将机器学习模型包装成一个继承自 OnnxOperatorMixin
的新类。
X = np.arange(20).reshape(10, 2)
tr = KMeans(n_clusters=2)
tr.fit(X)
tr_mixin = wrap_as_onnx_mixin(tr, target_opset=12)
onx = tr_mixin.to_onnx(X.astype(np.float32))
print(predict_with_onnxruntime(onx, X))
[1 1 1 1 0 0 0 0 0 0]
第四种方式:wrap_as_onnx_mixin()
:可以在模型拟合之前调用。
X = np.arange(20).reshape(10, 2)
tr = wrap_as_onnx_mixin(KMeans(n_clusters=2), target_opset=12)
tr.fit(X)
onx = tr.to_onnx(X.astype(np.float32))
print(predict_with_onnxruntime(onx, X))
[1 1 1 1 0 0 0 0 0 0]
流水线和自定义对象¶
这是一个简单的缩放器。
class CustomOpTransformer(BaseEstimator, TransformerMixin, OnnxOperatorMixin):
def __init__(self):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
self.op_version = 12
def fit(self, X, y=None):
self.W_ = np.mean(X, axis=0)
self.S_ = np.std(X, axis=0)
return self
def transform(self, X):
return (X - self.W_) / self.S_
def onnx_shape_calculator(self):
def shape_calculator(operator):
operator.outputs[0].type = operator.inputs[0].type
return shape_calculator
def to_onnx_operator(
self, inputs=None, outputs=("Y",), target_opset=None, **kwargs
):
if inputs is None:
raise RuntimeError("Parameter inputs should contain at least one name.")
opv = target_opset or self.op_version
i0 = self.get_inputs(inputs, 0)
W = self.W_.astype(np.float32)
S = self.S_.astype(np.float32)
return OnnxDiv(
OnnxSub(i0, W, op_version=12), S, output_names=outputs, op_version=opv
)
方式 1
X = np.arange(20).reshape(10, 2)
tr = make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2))
tr.fit(X)
onx = convert_sklearn(
tr, initial_types=[("X", FloatTensorType((None, X.shape[1])))], target_opset=12
)
print(predict_with_onnxruntime(onx, X))
[1 1 1 1 1 1 0 0 0 0]
方式 2
X = np.arange(20).reshape(10, 2)
tr = make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2))
tr.fit(X)
onx = to_onnx(tr, X.astype(np.float32), target_opset=12)
print(predict_with_onnxruntime(onx, X))
[0 0 0 0 0 0 1 1 1 1]
方式 3
X = np.arange(20).reshape(10, 2)
tr = make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2))
tr.fit(X)
tr_mixin = wrap_as_onnx_mixin(tr, target_opset=12)
tr_mixin.to_onnx(X.astype(np.float32))
print(predict_with_onnxruntime(onx, X))
[0 0 0 0 0 0 1 1 1 1]
方式 4
X = np.arange(20).reshape(10, 2)
tr = wrap_as_onnx_mixin(
make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2)), target_opset=12
)
tr.fit(X)
onx = tr.to_onnx(X.astype(np.float32))
print(predict_with_onnxruntime(onx, X))
[1 1 1 1 1 0 0 0 0 0]
显示 ONNX 图¶
最后,让我们看看通过 sklearn-onnx 转换的图。
from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer
try:
pydot_graph = GetPydotGraph(
onx.graph,
name=onx.graph.name,
rankdir="TB",
node_producer=GetOpNodeProducer(
"docstring", color="yellow", fillcolor="yellow", style="filled"
),
)
pydot_graph.write_dot("pipeline_onnx_mixin.dot")
except AssertionError:
print("GetPydotGraph failed to produce a valid DOT graph. Trying something else.")
from onnx_array_api.plotting.dot_plot import to_dot
with open("pipeline_onnx_mixin.dot", "w", encoding="utf-8") as f:
f.write(to_dot(onx))
import os
os.system("dot -O -Gdpi=300 -Tpng pipeline_onnx_mixin.dot")
import matplotlib.pyplot as plt
image = plt.imread("pipeline_onnx_mixin.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
ax.axis("off")

(np.float64(-0.5), np.float64(3103.5), np.float64(6900.5), np.float64(-0.5))
本示例使用的版本
import sklearn
print("numpy:", numpy.__version__)
print("scikit-learn:", sklearn.__version__)
import skl2onnx
print("onnx: ", onnx.__version__)
print("onnxruntime: ", onnxruntime.__version__)
print("skl2onnx: ", skl2onnx.__version__)
numpy: 2.2.0
scikit-learn: 1.6.0
onnx: 1.18.0
onnxruntime: 1.21.0+cu126
skl2onnx: 1.18.0
脚本总运行时间: (0 分钟 3.267 秒)