diff --git a/test/torch_npu_schema.json b/test/torch_npu_schema.json index 2a825f7d97a975ba2e7eb6576b0cb596c9633a50..85a0dd655d7003614d5187a8dc382d4e0fb537f1 100644 --- a/test/torch_npu_schema.json +++ b/test/torch_npu_schema.json @@ -2610,7 +2610,7 @@ "signature": "(query_layer, key_layer, value_layer, attention_mask, scale, keep_prob, query_transpose=False, key_transpose=False, bmm_score_transpose_a=False, bmm_score_transpose_b=False, value_transpose=False, dx_transpose=False)" }, "torch_npu.npu_fused_infer_attention_score": { - "signature": "(self, query, key, value, pse_shift, atten_mask, actual_seq_lengths, actual_seq_lengths_kv, dequant_scale1, quant_scale1, dequant_scale2, quant_scale2, quant_offset2, antiquant_scale, antiquant_offset, block_table, query_padding_size, kv_padding_size, num_heads, scale, pre_tokens, next_tokens, input_layout, key_antiquant_scale, key_antiquant_offset, value_antiquant_scale, value_antiquant_offset, key_shared_prefix, value_shared_prefix, actual_shared_prefix_len, query_rope, key_rope, num_key_value_heads, sparse_mode, inner_precise, block_size, antiquant_mode, softmax_lse_flag, key_antiquant_mode, value_antiquant_mode)" + "signature": "(self, query, key, value, pse_shift, atten_mask, actual_seq_lengths, actual_seq_lengths_kv, dequant_scale1, quant_scale1, dequant_scale2, quant_scale2, quant_offset2, antiquant_scale, antiquant_offset, block_table, query_padding_size, kv_padding_size, key_antiquant_scale, key_antiquant_offset, value_antiquant_scale, value_antiquant_offset, key_shared_prefix, value_shared_prefix, actual_shared_prefix_len, query_rope, key_rope, num_heads, scale, pre_tokens, next_tokens, input_layout, num_key_value_heads, sparse_mode, inner_precise, block_size, antiquant_mode, softmax_lse_flag, key_antiquant_mode, value_antiquant_mode)" }, "torch_npu.npu_fusion_attention": { "signature": "(*args, **kwargs)" diff --git a/torch_npu/onnx/wrapper_onnx_ops.py b/torch_npu/onnx/wrapper_onnx_ops.py index 4b2348d87139d44a65f1c2aef751a53e65013ece..b5cdd078cc69ce79509f5cc7c26d524aba7403be 100644 --- a/torch_npu/onnx/wrapper_onnx_ops.py +++ b/torch_npu/onnx/wrapper_onnx_ops.py @@ -1421,7 +1421,6 @@ def _add_onnx_ops(): torch_npu.npu_rotary_mul = _wrapper_npu_rotary_mul torch_npu.npu_prompt_flash_attention = _wrapper_npu_prompt_flash_attention torch_npu.npu_incre_flash_attention = _wrapper_npu_incre_flash_attention - torch_npu.npu_fused_infer_attention_score = _wrapper_npu_fused_infer_attention_score torch_npu.npu_masked_softmax_with_rel_pos_bias = _wrapper_npu_masked_softmax_with_rel_pos_bias torch_npu.npu_mm_all_reduce_base = _wrapper_npu_mm_all_reduce_base torch_npu.npu_weight_quant_batchmatmul = _wrapper_npu_weight_quant_batchmatmul