注意
转到结尾 下载完整示例代码
通过添加解析器来更改输出数量¶
默认情况下,sklearn-onnx 假设分类器有两个输出(标签和概率),回归器有一个输出(预测),转换器有一个输出(转换后的数据)。如果情况并非如此?以下示例创建一个自定义转换器和一个自定义解析器,它们定义了转换后的模型期望的输出数量。
示例 带有选项的新转换器 显示了一个选择两种方法计算相同输出的转换器。在这种情况下,转换器会生成两者。这不会是一个非常高效的转换器,但这只是为了使用解析器的目的。默认情况下,转换器只返回一个输出,但两者都需要。
新的转换器¶
import numpy
from onnxruntime import InferenceSession
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.datasets import load_iris
from skl2onnx import update_registered_converter
from skl2onnx.common.data_types import guess_numpy_type
from skl2onnx.algebra.onnx_ops import OnnxSub, OnnxMatMul, OnnxGemm
from skl2onnx import to_onnx, get_model_alias
class DecorrelateTransformer(TransformerMixin, BaseEstimator):
"""
Decorrelates correlated gaussian features.
:param alpha: avoids non inversible matrices
by adding *alpha* identity matrix
*Attributes*
* `self.mean_`: average
* `self.coef_`: square root of the coveriance matrix
"""
def __init__(self, alpha=0.0):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
self.alpha = alpha
def fit(self, X, y=None, sample_weights=None):
if sample_weights is not None:
raise NotImplementedError("sample_weights != None is not implemented.")
self.mean_ = numpy.mean(X, axis=0, keepdims=True)
X = X - self.mean_
V = X.T @ X / X.shape[0]
if self.alpha != 0:
V += numpy.identity(V.shape[0]) * self.alpha
L, P = numpy.linalg.eig(V)
Linv = L ** (-0.5)
diag = numpy.diag(Linv)
root = P @ diag @ P.transpose()
self.coef_ = root
return self
def transform(self, X):
return (X - self.mean_) @ self.coef_
data = load_iris()
X = data.data
dec = DecorrelateTransformer()
dec.fit(X)
pred = dec.transform(X[:5])
print(pred)
[[ 0.0167562 0.52111756 -1.24946737 -0.56194325]
[-0.0727878 -0.80853732 -1.43841018 -0.37441392]
[-0.69971891 -0.09950908 -1.2138161 -0.3499275 ]
[-1.13063404 -0.13540568 -0.79087008 -0.73938966]
[-0.35790036 0.91900236 -1.04034399 -0.6509266 ]]
转换为带有两个输出的 ONNX¶
让我们尝试转换它并看看会发生什么。
def decorrelate_transformer_shape_calculator(operator):
op = operator.raw_operator
input_type = operator.inputs[0].type.__class__
input_dim = operator.inputs[0].type.shape[0]
output_type = input_type([input_dim, op.coef_.shape[1]])
operator.outputs[0].type = output_type
def decorrelate_transformer_converter(scope, operator, container):
op = operator.raw_operator
opv = container.target_opset
out = operator.outputs
X = operator.inputs[0]
dtype = guess_numpy_type(X.type)
Y1 = OnnxMatMul(
OnnxSub(X, op.mean_.astype(dtype), op_version=opv),
op.coef_.astype(dtype),
op_version=opv,
output_names=out[:1],
)
Y2 = OnnxGemm(
X,
op.coef_.astype(dtype),
(-op.mean_ @ op.coef_).astype(dtype),
op_version=opv,
alpha=1.0,
beta=1.0,
output_names=out[1:2],
)
Y1.add_to(scope, container)
Y2.add_to(scope, container)
def decorrelate_transformer_parser(scope, model, inputs, custom_parsers=None):
alias = get_model_alias(type(model))
this_operator = scope.declare_local_operator(alias, model)
# inputs
this_operator.inputs.append(inputs[0])
# outputs
cls_type = inputs[0].type.__class__
val_y1 = scope.declare_local_variable("nogemm", cls_type())
val_y2 = scope.declare_local_variable("gemm", cls_type())
this_operator.outputs.append(val_y1)
this_operator.outputs.append(val_y2)
# ends
return this_operator.outputs
注册还需要声明解析器。
update_registered_converter(
DecorrelateTransformer,
"SklearnDecorrelateTransformer",
decorrelate_transformer_shape_calculator,
decorrelate_transformer_converter,
parser=decorrelate_transformer_parser,
)
以及转换。
onx = to_onnx(dec, X.astype(numpy.float32), target_opset=14)
sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
exp = dec.transform(X.astype(numpy.float32))
results = sess.run(None, {"X": X.astype(numpy.float32)})
y1 = results[0]
y2 = results[1]
def diff(p1, p2):
p1 = p1.ravel()
p2 = p2.ravel()
d = numpy.abs(p2 - p1)
return d.max(), (d / numpy.abs(p1)).max()
print(diff(exp, y1))
print(diff(exp, y2))
(6.04657619085458e-07, 0.0002951417065406967)
(2.01757041717876e-06, 0.0005483764980468156)
脚本的总运行时间:(0 分钟 0.021 秒)