5
5
#include < aclnnop/aclnn_layer_norm.h>
6
6
#include < aclnnop/aclnn_repeat.h>
7
7
#include < aclnnop/aclnn_softmax.h>
8
+ #include < aclnnop/aclnn_upsample_nearest_2d.h>
8
9
#include < aclnnop/aclnn_reduce_sum.h>
9
10
10
11
#include < cmath>
@@ -486,10 +487,6 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
486
487
GGML_ASSERT (dst->ne [0 ] == 1 );
487
488
aclTensor* acl_dst = create_acl_tensor (dst);
488
489
489
- uint64_t workspaceSize = 0 ;
490
- aclOpExecutor* executor;
491
- void * workspaceAddr = nullptr ;
492
-
493
490
int64_t reduce_dims_host[] = {3 };
494
491
aclIntArray* reduce_dims = aclCreateIntArray (reduce_dims_host, 1 );
495
492
@@ -503,6 +500,41 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
503
500
aclrtStream stream = ctx.stream ();
504
501
ACL_CHECK (aclnnReduceSum (workspaceAddr, workspaceSize, executor, stream));
505
502
503
+ ACL_CHECK (aclDestroyTensor (acl_src));
504
+ ACL_CHECK (aclDestroyTensor (acl_dst));
505
+ }
506
+
507
+ void ggml_cann_upsample_nearest2d (ggml_backend_cann_context& ctx,
508
+ ggml_tensor* dst) {
509
+
510
+ ggml_tensor* src = dst->src [0 ];
511
+
512
+ aclTensor* acl_src = create_acl_tensor (src, nullptr , nullptr , 0 ,
513
+ ACL_FORMAT_NCHW);
514
+ aclTensor* acl_dst = create_acl_tensor (dst, nullptr , nullptr , 0 ,
515
+ ACL_FORMAT_NCHW);
516
+
517
+ const int scale_factor = dst->op_params [0 ];
518
+ std::vector<int64_t > output_size{dst->ne [1 ], dst->ne [0 ]};
519
+ auto output_size_array = aclCreateIntArray (output_size.data (), 2 );
520
+
521
+ uint64_t workspaceSize = 0 ;
522
+ aclOpExecutor* executor;
523
+ void * workspaceAddr = nullptr ;
524
+
525
+ aclrtStream stream = ctx.stream ();
526
+
527
+ ACL_CHECK (aclnnUpsampleNearest2dGetWorkspaceSize (acl_src, output_size_array,
528
+ acl_dst, &workspaceSize,
529
+ &executor));
530
+ if (workspaceSize > 0 ) {
531
+ workspaceAddr = ctx.alloc_buffer (workspaceSize);
532
+ }
533
+
534
+ ACL_CHECK (aclnnUpsampleNearest2d (workspaceAddr, workspaceSize, executor,
535
+ stream));
536
+
537
+ ACL_CHECK (aclDestroyIntArray (output_size_array));
506
538
ACL_CHECK (aclDestroyTensor (acl_src));
507
539
ACL_CHECK (aclDestroyTensor (acl_dst));
508
540
}
0 commit comments