/*
 *
 * Copyright (c) 2017 Texas Instruments Incorporated
 *
 * All rights reserved not granted herein.
 *
 * Limited License.
 *
 * Texas Instruments Incorporated grants a world-wide, royalty-free, non-exclusive
 * license under copyrights and patents it now or hereafter owns or controls to make,
 * have made, use, import, offer to sell and sell ("Utilize") this software subject to the
 * terms herein.  With respect to the foregoing patent license, such license is granted
 * solely to the extent that any such patent is necessary to Utilize the software alone.
 * The patent license shall not apply to any combinations which include this software,
 * other than combinations with devices manufactured by or for TI ("TI Devices").
 * No hardware patent is licensed hereunder.
 *
 * Redistributions must preserve existing copyright notices and reproduce this license
 * (including the above copyright notice and the disclaimer and (if applicable) source
 * code license limitations below) in the documentation and/or other materials provided
 * with the distribution
 *
 * Redistribution and use in binary form, without modification, are permitted provided
 * that the following conditions are met:
 *
 * *       No reverse engineering, decompilation, or disassembly of this software is
 * permitted with respect to any software provided in binary form.
 *
 * *       any redistribution and use are licensed by TI for use only with TI Devices.
 *
 * *       Nothing shall obligate TI to provide you with source code for the software
 * licensed and provided to you in object code.
 *
 * If software source code is provided to you, modification and redistribution of the
 * source code are permitted provided that the following conditions are met:
 *
 * *       any redistribution and use of the source code, including any resulting derivative
 * works, are licensed by TI for use only with TI Devices.
 *
 * *       any redistribution and use of any object code compiled from the source code
 * and any resulting derivative works, are licensed by TI for use only with TI Devices.
 *
 * Neither the name of Texas Instruments Incorporated nor the names of its suppliers
 *
 * may be used to endorse or promote products derived from this software without
 * specific prior written permission.
 *
 * DISCLAIMER.
 *
 * THIS SOFTWARE IS PROVIDED BY TI AND TI'S LICENSORS "AS IS" AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL TI AND TI'S LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */

 #include <utils/draw2d/include/draw2d.h>
 #include <utils/perf_stats/include/app_perf_stats.h>
 #include "itidl_ti.h"
 #include "app_common.h"
 #include "app_test.h"
 #include "mult_yolov5_post_copy.h"
 #include "math.h"
 
 /*
  * define this macro to enable TIDL intermediate layer traces on target
  */
 
 #undef APP_TIDL_TRACE_DUMP
 
 /*
  * This is the size of trace buffer allocated in host memory and
  * shared with target.
  */
 #define TIVX_TIDL_TRACE_DATA_SIZE  (128 * 1024 * 1024)
 
 extern const char imgnet_labels[1001][256];
 static const char *tensor_num_str[] = { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15"};
 
 #define APP_DEBUG
 #ifdef APP_DEBUG
 #define APP_PRINTF(f_, ...) printf((f_), ##__VA_ARGS__)
 #else
 #define APP_PRINTF(f_, ...)
 #endif
 
 typedef struct {
 
     /* config options */
     char tidl_config_file_path[APP_MAX_FILE_PATH];
     char tidl_network_file_path[APP_MAX_FILE_PATH];
     char input_file_path[APP_MAX_FILE_PATH];
     char input_file_list[APP_MAX_FILE_PATH];
     char output_file_path[APP_MAX_FILE_PATH];
     char ti_logo_file_path[APP_MAX_FILE_PATH];
 
     uint32_t num_input_tensors;
     uint32_t num_output_tensors;
 
     /* Input image params */
     vx_df_image df_image;
     void *data_ptr;
     tivx_utils_bmp_image_params_t imgParams;
     vx_uint32 img_width;
     vx_uint32 img_height;
     vx_uint32 img_stride;
 
     sTIDL_IOBufDesc_t   ioBufDesc;
 
     uint8_t *pInPlanes;
     uint8_t *pOutPlanes;
 
   uint8_t *temp;
   char save_type;
 
     uint8_t  *pDisplayBuf888;
     uint16_t *pDisplayBuf565;
 
     /* OpenVX references */
     vx_context context;
     vx_graph   graph;
     vx_kernel  kernel;
     vx_node    tidl_node;
 
     vx_user_data_object  config;
     vx_user_data_object  network;
     vx_user_data_object  createParams;
     vx_user_data_object  inArgs;
     vx_user_data_object  outArgs;
     vx_user_data_object  traceData;
 
     vx_tensor  input_tensors[APP_MAX_TENSORS];
     vx_tensor  output_tensors[APP_MAX_TENSORS];
 
     vx_graph   disp_graph;
     vx_node disp_node;
     vx_image disp_image;
 
     vx_user_data_object disp_params_obj;
     tivx_display_params_t disp_params;
     vx_rectangle_t disp_rect;
     vx_imagepatch_addressing_t image_addr;
 
     vx_size in_tensor_data_type[APP_MAX_TENSORS];
 
     vx_uint32 display_option;
     vx_uint32 delay_in_msecs;
     vx_uint32 num_iterations;
 
     Draw2D_Handle  pHndl;
 
     uint32_t is_interactive;
     vx_int32 test_mode;
     vx_int32 test_case;
 
     tivx_task task;
     uint32_t stop_task;
     uint32_t stop_task_done;
 
     app_perf_point_t total_perf;
     app_perf_point_t fileio_perf;
     app_perf_point_t draw_perf;
     ttevxYoloV5PostProcParams yolov5params;
     ttevxYoloV5Detections detections;
     TIDL_outArgs out_args;
 } AppObj;
 
 AppObj gAppObj;
 
 static int app_parse_cmd_line_args(AppObj *obj, int argc, char *argv[]);
 static vx_status app_init(AppObj *obj);
 static void app_deinit(AppObj *obj);
 static vx_status app_create_graph(AppObj *obj);
 static vx_status app_verify_graph(AppObj *obj);
 static vx_status app_run_graph(AppObj *obj);
 static vx_status app_run_graph_interactive(AppObj *obj);
 static void app_delete_graph(AppObj *obj);
 
 static vx_user_data_object readConfig(AppObj *obj, vx_context context, char *config_file, uint32_t *num_input_tensors, uint32_t *num_output_tensors);
 static vx_user_data_object readNetwork(vx_context context, char *network_file);
 static vx_user_data_object setCreateParams(vx_context context);
 static vx_user_data_object setInArgs(vx_context context);
 static vx_user_data_object setOutArgs(vx_context context);
 static void createInputTensors(AppObj *obj, vx_context context, vx_user_data_object config, vx_tensor *input_tensors);
 static void createOutputTensors(AppObj *obj, vx_context context, vx_user_data_object config, vx_tensor *output_tensors);
 static vx_status readInput(AppObj *obj, vx_context context, vx_user_data_object config, vx_tensor *input_tensors, char *input_file);
 static void displayOutput(AppObj *obj, vx_user_data_object config, vx_tensor *output_tensors, char *output_file);
 static vx_size getTensorDataType(vx_int32 tidl_type);
 static const char* vxTypeToStr(vx_enum type);
static vx_size vxTypeSize(vx_enum type);
 // #define APP_WRITE_PRE_PROC_OUTPUT;
 #ifdef APP_WRITE_PRE_PROC_OUTPUT
 static vx_status writePreProcOutput(char* file_name, vx_tensor output);
 #endif
 static uint32_t num_params;
 static uint32_t max_params;
 
 static const char* vxTypeToStr(vx_enum type)
 {
     switch(type)
     {
         case VX_TYPE_INT8:   return "VX_TYPE_INT8";
         case VX_TYPE_UINT8:  return "VX_TYPE_UINT8";
         case VX_TYPE_INT16:  return "VX_TYPE_INT16";
         case VX_TYPE_UINT16: return "VX_TYPE_UINT16";
         default:             return "VX_TYPE_UNKNOWN";
     }
 }
 
static vx_size vxTypeSize(vx_enum type)
{
    switch(type)
    {
        case VX_TYPE_INT8:
        case VX_TYPE_UINT8:
            return sizeof(vx_uint8);
        case VX_TYPE_INT16:
        case VX_TYPE_UINT16:
            return sizeof(vx_uint16);
        default:
            return 1;
    }
}

 
 int app_tidl_main(int argc, char* argv[])
 {
     int status = 0;
 
     AppObj *obj = &gAppObj;
 
     printf("hello: %s\n", argv[0]);
     gAppObj.save_type = '0';
 
     status = app_parse_cmd_line_args(obj, argc, argv);
     if(status == VX_SUCCESS)
     {
         status = app_init(obj);
         for(int i = 0;i<5;i++)
         {
             obj->out_args.scale[i] = obj->ioBufDesc.outTensorScale[i];
         }
     }
     if(status == VX_SUCCESS)
     {
         status = app_create_graph(obj);
     }
     if(status == VX_SUCCESS)
     {
         status = app_verify_graph(obj);
     }
 
     if(status == VX_SUCCESS)
     {
         if(obj->is_interactive)
         {
             status = app_run_graph_interactive(obj);
         }
         else
         {
             status = app_run_graph(obj);
             if(status == VX_FAILURE)
             {
                 printf("Error processing graph!\n");
             }
         }
     }
 
     app_delete_graph(obj);
     app_deinit(obj);
 
     if(obj->test_mode == 1)
     {
         if((test_result == vx_false_e) || (status != VX_SUCCESS))
         {
             printf("\n\nTEST FAILED\n\n");
             print_new_checksum_structs();
             status = (status == VX_SUCCESS) ? VX_FAILURE : status;
         }
         else
         {
             printf("\n\nTEST PASSED\n\n");
         }
     }
 
     return status;
 }
 
 static int app_init(AppObj *obj)
 {
 
     int status = 0;
 
     uint32_t num_input_tensors = 0;
     uint32_t num_output_tensors = 0;
 
     APP_PRINTF("app_tidl: Init ... \n");
 
     obj->context = vxCreateContext();
     APP_ASSERT_VALID_REF(obj->context);
 
     /* Create a vx_array object and read the config data*/
     obj->config = readConfig(obj, obj->context, &obj->tidl_config_file_path[0], &num_input_tensors, &num_output_tensors);
     APP_ASSERT_VALID_REF(obj->config)
 
     /* Save a copy of number of input/output tensors required as per config */
     obj->num_input_tensors  = num_input_tensors;
     obj->num_output_tensors = num_output_tensors;
 
     /* Create a vx_tensor object and read the network data */
     obj->network = readNetwork(obj->context, &obj->tidl_network_file_path[0]);
     APP_ASSERT_VALID_REF(obj->network)
 
     obj->createParams = setCreateParams(obj->context);
     APP_ASSERT_VALID_REF(obj->createParams)
 
     obj->inArgs = setInArgs(obj->context);
     APP_ASSERT_VALID_REF(obj->inArgs)
 
     obj->outArgs = setOutArgs(obj->context);
     APP_ASSERT_VALID_REF(obj->outArgs)
 
 #ifdef APP_TIDL_TRACE_DUMP
     obj->traceData = vxCreateUserDataObject(obj->context, "TIDL_traceData", TIVX_TIDL_TRACE_DATA_SIZE, NULL);
     APP_ASSERT_VALID_REF(obj->traceData)
 #endif
 
     obj->kernel = tivxAddKernelTIDL(obj->context, num_input_tensors, num_output_tensors);
     APP_ASSERT_VALID_REF(obj->kernel)
 
     if ((vx_true_e == tivxIsTargetEnabled(TIVX_TARGET_DISPLAY1)) && (obj->display_option == 1))
     {
         obj->disp_image = vxCreateImage(obj->context, DISPLAY_WIDTH, DISPLAY_HEIGHT, VX_DF_IMAGE_RGB);
         APP_ASSERT_VALID_REF(obj->disp_image)
 
         obj->image_addr.dim_x = DISPLAY_WIDTH;
         obj->image_addr.dim_y = DISPLAY_HEIGHT;
         obj->image_addr.stride_x = 3; /* RGB */
         obj->image_addr.stride_y = DISPLAY_WIDTH * 3;
         obj->image_addr.scale_x = VX_SCALE_UNITY;
         obj->image_addr.scale_y = VX_SCALE_UNITY;
         obj->image_addr.step_x = 1;
         obj->image_addr.step_y = 1;
 
         obj->disp_rect.start_x = 0;
         obj->disp_rect.start_y = 0;
         obj->disp_rect.end_x = DISPLAY_WIDTH;
         obj->disp_rect.end_y = DISPLAY_HEIGHT;
 
         memset(&obj->disp_params, 0, sizeof(tivx_display_params_t));
 
         obj->disp_params.opMode = TIVX_KERNEL_DISPLAY_BUFFER_COPY_MODE;
         obj->disp_params.pipeId = 0;
         obj->disp_params.outWidth = DISPLAY_WIDTH;
         obj->disp_params.outHeight = DISPLAY_HEIGHT;
         obj->disp_params.posX = (1920-DISPLAY_WIDTH)/2;
         obj->disp_params.posY = (1080-DISPLAY_HEIGHT)/2;
 
         obj->disp_params_obj = vxCreateUserDataObject(obj->context, "tivx_display_params_t", sizeof(tivx_display_params_t), &obj->disp_params);
         APP_ASSERT_VALID_REF(obj->disp_params_obj)
 
         tivxHwaLoadKernels(obj->context);
     }
 
     tivxTIDLLoadKernels(obj->context);
 
   obj->temp = tivxMemAlloc(MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3 * 3, TIVX_MEM_EXTERNAL);
     obj->pInPlanes = tivxMemAlloc(MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3, TIVX_MEM_EXTERNAL);
     if(obj->pInPlanes == NULL) {
         printf("app_tidl: ERROR: Unable to allocate memory for inPlanes, size = %d\n", MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3);
         status = -1;
     }
 
     obj->pOutPlanes = tivxMemAlloc(MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3, TIVX_MEM_EXTERNAL);
     if(obj->pOutPlanes == NULL) {
         printf("app_tidl: ERROR: Unable to allocate memory for outPlanes, size = %d\n", MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3);
         status = -1;
     }
 
     obj->pDisplayBuf888 = tivxMemAlloc(DISPLAY_WIDTH * DISPLAY_HEIGHT * 3, TIVX_MEM_EXTERNAL);
     if(obj->pDisplayBuf888 == NULL) {
         printf("app_tidl: ERROR: Unable to allocate memory for displayBuf888, size = %d\n", DISPLAY_WIDTH * DISPLAY_HEIGHT * 3);
         status = -1;
     }
 
     obj->pDisplayBuf565 = tivxMemAlloc(DISPLAY_WIDTH * DISPLAY_HEIGHT * sizeof(uint16_t), TIVX_MEM_EXTERNAL);
     if(obj->pDisplayBuf565 == NULL) {
         printf("app_tidl: ERROR: Unable to allocate memory for displayBuf565, size = %ld\n", DISPLAY_WIDTH * DISPLAY_HEIGHT * sizeof(uint16_t));
         status = -1;
     }
 
     {
       Draw2D_BufInfo sBufInfo;
       Draw2D_LinePrm sLinePrm;
       Draw2D_FontPrm sTop5Prm;
 
       char banner_file[APP_MAX_FILE_PATH];
 
       snprintf(banner_file, APP_MAX_FILE_PATH, "%s/ti_logo.bmp", obj->ti_logo_file_path);
 
       Draw2D_create(&obj->pHndl);
 
       if(obj->pHndl != NULL)
       {
           sBufInfo.bufWidth    = DISPLAY_WIDTH;
           sBufInfo.bufHeight   = DISPLAY_HEIGHT;
           sBufInfo.bufPitch[0] = DISPLAY_WIDTH * 2;
           sBufInfo.dataFormat = DRAW2D_DF_BGR16_565;
           sBufInfo.transperentColor = 0;
           sBufInfo.transperentColorFormat = DRAW2D_DF_BGR16_565;
 
           sBufInfo.bufAddr[0] = (uint8_t *)obj->pDisplayBuf565;
 
           Draw2D_setBufInfo(obj->pHndl, &sBufInfo);
 
           Draw2D_clearBuf(obj->pHndl);
 
           Draw2D_insertBmp(obj->pHndl, banner_file, 0, 0);
 
           sLinePrm.lineColor = RGB888_TO_RGB565(255, 255, 255);
           sLinePrm.lineSize  = 3;
           sLinePrm.lineColorFormat = DRAW2D_DF_BGR16_565;
 
           /* Draw a vertial line */
           Draw2D_drawLine(obj->pHndl, DISPLAY_WIDTH/2, 0, DISPLAY_WIDTH/2, DISPLAY_HEIGHT, &sLinePrm);
 
           /* green color heading */
           Draw2D_setFontColor(RGB888_TO_RGB565(0, 255, 0), RGB888_TO_RGB565(0, 0, 0), RGB888_TO_RGB565(0, 0, 0));
 
           sTop5Prm.fontIdx = 0;
           Draw2D_drawString(obj->pHndl, (DISPLAY_WIDTH/2) + 40, 140, "Top 5 classes", &sTop5Prm);
 
           Draw2D_resetFontColor();
       }
     }
 
     appPerfPointSetName(&obj->total_perf , "TOTAL");
     appPerfPointSetName(&obj->draw_perf  , "DRAW");
     appPerfPointSetName(&obj->fileio_perf, "FILEIO");
 
     APP_PRINTF("app_tidl: Init ... Done.\n");
 
     return status;
 }
 
 static void app_deinit(AppObj *obj)
 {
     APP_PRINTF("app_tidl: De-init ... \n");
 
     tivxTIDLUnLoadKernels(obj->context);
 
     if (vx_true_e == tivxIsTargetEnabled(TIVX_TARGET_DISPLAY1) && (obj->display_option == 1))
     {
         tivxHwaUnLoadKernels(obj->context);
     }
 
     vxRemoveKernel(obj->kernel);
 
     vxReleaseContext(&obj->context);
 
   tivxMemFree(obj->temp, MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3 * 3, TIVX_MEM_EXTERNAL);
     tivxMemFree(obj->pInPlanes, MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3, TIVX_MEM_EXTERNAL);
     tivxMemFree(obj->pOutPlanes, MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3, TIVX_MEM_EXTERNAL);
 
     tivxMemFree(obj->pDisplayBuf888, DISPLAY_WIDTH * DISPLAY_HEIGHT * 3, TIVX_MEM_EXTERNAL);
     tivxMemFree(obj->pDisplayBuf565 , DISPLAY_WIDTH * DISPLAY_HEIGHT * sizeof(uint16_t), TIVX_MEM_EXTERNAL);
 
 
     {
         Draw2D_delete(obj->pHndl);
     }
 
     APP_PRINTF("app_tidl: De-init ... Done.\n");
 }
 
 static void app_delete_graph(AppObj *obj)
 {
     uint32_t id;
 
     APP_PRINTF("app_tidl: Delete ... \n");
 
     if (vx_true_e == tivxIsTargetEnabled(TIVX_TARGET_DISPLAY1) && (obj->display_option == 1))
     {
         vxReleaseNode(&obj->disp_node);
         vxReleaseGraph(&obj->disp_graph);
     }
 
     #ifdef APP_TIVX_LOG_RT_ENABLE
     tivxLogRtTraceExportToFile("app_tidl.bin");
     tivxLogRtTraceDisable(obj->graph);
     #endif
 
     vxReleaseNode(&obj->tidl_node);
     vxReleaseGraph(&obj->graph);
 
     vxReleaseUserDataObject(&obj->config);
     vxReleaseUserDataObject(&obj->network);
 
     vxReleaseUserDataObject(&obj->createParams);
     vxReleaseUserDataObject(&obj->inArgs);
     vxReleaseUserDataObject(&obj->outArgs);
 
 #ifdef APP_TIDL_TRACE_DUMP
     vxReleaseUserDataObject(&obj->traceData);
 #endif
 
     for(id = 0; id < obj->num_input_tensors; id++)
     {
         vxReleaseTensor(&obj->input_tensors[id]);
     }
 
     for(id = 0; id < obj->num_output_tensors; id++)
     {
         vxReleaseTensor(&obj->output_tensors[id]);
     }
 
     if ((vx_true_e == tivxIsTargetEnabled(TIVX_TARGET_DISPLAY1)) && (obj->display_option == 1))
     {
         vxReleaseImage(&obj->disp_image);
         vxReleaseUserDataObject(&obj->disp_params_obj);
     }
 
     APP_PRINTF("app_tidl: Delete ... Done.\n");
 }
 
 static void app_show_usage(int argc, char* argv[])
 {
     printf("\n");
     printf(" TIDL Demo - (c) Texas Instruments 2018\n");
     printf(" ========================================================\n");
     printf("\n");
     printf(" Usage,\n");
     printf("  %s --cfg <config file>\n", argv[0]);
     printf("\n");
 }
 
 static void app_set_cfg_default(AppObj *obj)
 {
     snprintf(obj->tidl_config_file_path,APP_MAX_FILE_PATH, "test_data/tivx/tidl_models/mobilenetv1/config.bin");
     snprintf(obj->tidl_network_file_path,APP_MAX_FILE_PATH, "test_data/tivx/tidl_models/mobilenetv1/network.bin");
     snprintf(obj->input_file_path,APP_MAX_FILE_PATH, "test_data/psdkra/app_tidl");
     snprintf(obj->input_file_list,APP_MAX_FILE_PATH, "test_data/psdkra/app_tidl/names.txt");
     snprintf(obj->output_file_path,APP_MAX_FILE_PATH, "app_tidl_out");
     snprintf(obj->ti_logo_file_path,APP_MAX_FILE_PATH, "test_data/tivx/tidl_models/");
     obj->display_option = 1;
     obj->delay_in_msecs = 0;
     obj->num_iterations = 1;
     obj->is_interactive = 0;
     obj->test_mode      = 0;
 }
 
 static int app_parse_cfg_file(AppObj *obj, char *cfg_file_name)
 {
     FILE *fp = fopen(cfg_file_name, "r");
     char line_str[1024];
     char *token;
 
     if(fp==NULL)
     {
         printf("app_tidl: ERROR: Unable to open config file [%s]\n", cfg_file_name);
         return -1;
     }
 
     while(fgets(line_str, sizeof(line_str), fp)!=NULL)
     {
         char s[]=" \t";
 
         if (strchr(line_str, '#'))
         {
             continue;
         }
 
         /* get the first token */
         token = strtok(line_str, s);
 
         if(token != NULL)
         {
             if(strcmp(token, "tidl_config_file_path")==0)
             {
                 token = strtok(NULL, s);
                 if(token != NULL)
                 {
                   token[strlen(token)-1]=0;
                   strcpy(obj->tidl_config_file_path, token);
                 }
             }
             else
             if(strcmp(token, "tidl_network_file_path")==0)
             {
                 token = strtok(NULL, s);
                 if(token != NULL)
                 {
                   token[strlen(token)-1]=0;
                   strcpy(obj->tidl_network_file_path, token);
                   /* for testing if relevant */
                   if(strstr(obj->tidl_network_file_path, "u16") != NULL)
                   {
                     obj->test_case = 1;
                   }
                   else
                   {
                     obj->test_case = 0;
                   }
                 }
             }
             else
             if(strcmp(token, "input_file_path")==0)
             {
                 token = strtok(NULL, s);
                 if(token != NULL)
                 {
                   token[strlen(token)-1]=0;
                   strcpy(obj->input_file_path, token);
                 }
             }
             else
             if(strcmp(token, "ti_logo_file_path")==0)
             {
                 token = strtok(NULL, s);
                 if(token != NULL)
                 {
                   token[strlen(token)-1]=0;
                   strcpy(obj->ti_logo_file_path, token);
                 }
             }
             else
             if(strcmp(token, "input_file_list")==0)
             {
                 token = strtok(NULL, s);
                 if(token != NULL)
                 {
                   token[strlen(token)-1]=0;
                   strcpy(obj->input_file_list, token);
                 }
             }
             else
             if(strcmp(token, "output_file_path")==0)
             {
                 token = strtok(NULL, s);
                 if(token != NULL)
                 {
                   token[strlen(token)-1]=0;
                   strcpy(obj->output_file_path, token);
                 }
             }
             else
             if(strcmp(token, "display_option")==0)
             {
                 token = strtok(NULL, s);
                 if(token != NULL)
                 {
                   token[strlen(token)-1]=0;
                   obj->display_option = atoi(token);
                   if(obj->display_option > 1)
                       obj->display_option = 1;
                 }
             }
             else
             if(strcmp(token, "delay")==0)
             {
                 token = strtok(NULL, s);
                 if(token != NULL)
                 {
                   token[strlen(token)-1]=0;
                   obj->delay_in_msecs = atoi(token);
                   if(obj->delay_in_msecs > 2000)
                       obj->delay_in_msecs = 2000;
                 }
             }
             else
             if(strcmp(token, "num_iterations")==0)
             {
                 token = strtok(NULL, s);
                 if(token != NULL)
                 {
                   token[strlen(token)-1]=0;
                   obj->num_iterations = atoi(token);
                   if(obj->num_iterations == 0)
                       obj->num_iterations = 1;
                 }
             }
             else
             if(strcmp(token, "is_interactive")==0)
             {
                 token = strtok(NULL, s);
                 if(token != NULL)
                 {
                   token[strlen(token)-1]=0;
                   obj->is_interactive = atoi(token);
                   if(obj->is_interactive > 1)
                       obj->is_interactive = 1;
                 }
             }
             else
             if(strcmp(token, "test_mode")==0)
             {
                 token = strtok(NULL, s);
                 if(token != NULL)
                 {
                   token[strlen(token)-1]=0;
                   obj->test_mode = atoi(token);
                 }
             }
         }
         if (obj->test_mode == 1)
         {
           obj->is_interactive = 0;
           /* display_option must be set to 1 in order for the checksums
               to come out correctly */
 #ifndef x86_64
           printf("Turning display option on ... \n");
           obj->display_option = 1;
 #else
           printf("Turning display option off ... \n");
           obj->display_option = 0;
 #endif
         }
     }
 
     fclose(fp);
 
     return 0;
 }
 
 static int app_parse_cmd_line_args(AppObj *obj, int argc, char *argv[])
 {
     int i;
     vx_bool set_test_mode = vx_false_e;
 
     app_set_cfg_default(obj);
 
     if(argc==1)
     {
         app_show_usage(argc, argv);
         return -1;
     }
 
     for(i=0; i<argc; i++)
     {
         if(strcmp(argv[i], "--cfg")==0)
         {
             i++;
             if(i>=argc)
             {
                 app_show_usage(argc, argv);
             }
             app_parse_cfg_file(obj, argv[i]);
         }
         else
         if(strcmp(argv[i], "--help")==0)
         {
             app_show_usage(argc, argv);
             return -1;
         }
         else
         if(strcmp(argv[i], "--test")==0)
         {
             set_test_mode = vx_true_e;
         }
     }
 
     if (set_test_mode == vx_true_e)
     {
         obj->test_mode = 1;
         obj->is_interactive = 0;
         obj->display_option = 1;
         obj->delay_in_msecs = 100;
     }
 
     #ifdef x86_64
     printf("Turning display option off ... \n");
     obj->display_option = 0;
     obj->is_interactive = 0;
     #endif
 
     return 0;
 }
 
 static void initParam(vx_reference params[], uint32_t _max_params)
 {
    num_params  = 0;
    max_params = _max_params;
 }
 
 static void addParam(vx_reference params[], vx_reference obj)
 {
    APP_ASSERT(num_params <= max_params);
 
    params[num_params] = obj;
 
    num_params++;
 }
 
 static vx_status app_create_graph(AppObj *obj)
 {
     vx_status status = VX_SUCCESS;
 
     vx_reference params[APP_TIDL_MAX_PARAMS];
     uint32_t i;
 
     APP_PRINTF("app_tidl: Creating graph ... \n");
 
     /* Create OpenVx Graph */
     obj->graph = vxCreateGraph(obj->context);
     APP_ASSERT_VALID_REF(obj->graph)
     vxSetReferenceName((vx_reference)obj->graph, "app_tidl_graph");
 
     /* Create array of input tensors */
     createInputTensors(obj, obj->context, obj->config, obj->input_tensors);
 
     /* Create array of output tensors */
     createOutputTensors(obj, obj->context, obj->config, obj->output_tensors);
 
     /* Initialize param array */
     initParam(params, APP_TIDL_MAX_PARAMS);
 
     /* The 1st param MUST be config */
     addParam(params, (vx_reference)obj->config);
 
     /* The 2nd param MUST be network */
     addParam(params, (vx_reference)obj->network);
 
     /* The 3rd param MUST be create params */
     addParam(params, (vx_reference)obj->createParams);
 
     /* The 4th param MUST be inArgs */
     addParam(params, (vx_reference)obj->inArgs);
 
     /* The 5th param MUST be outArgs */
     addParam(params, (vx_reference)obj->outArgs);
 
 #ifdef APP_TIDL_TRACE_DUMP
     addParam(params, (vx_reference)obj->traceData);
 #else
 /* The 6th param MUST be NULL if trace data dump is not enabled */
     addParam(params, (vx_reference)NULL);
 #endif
 
     /* Create TIDL Node */
     obj->tidl_node = tivxTIDLNode(obj->graph, obj->kernel, params, obj->input_tensors, obj->output_tensors);
     APP_ASSERT_VALID_REF(obj->tidl_node)
 
     vxSetNodeTarget(obj->tidl_node, VX_TARGET_STRING, TIVX_TARGET_DSP_C7_1);
 
     if ((vx_true_e == tivxIsTargetEnabled(TIVX_TARGET_DISPLAY1)) && (obj->display_option == 1))
     {
         /* Create OpenVx Graph */
         obj->disp_graph = vxCreateGraph(obj->context);
         APP_ASSERT_VALID_REF(obj->disp_graph)
         vxSetReferenceName((vx_reference)obj->disp_graph, "Display");
 
         obj->disp_node = tivxDisplayNode(obj->disp_graph, obj->disp_params_obj, obj->disp_image);
         APP_ASSERT_VALID_REF(obj->disp_node)
 
         vxSetNodeTarget(obj->disp_node, VX_TARGET_STRING, TIVX_TARGET_DISPLAY1);
     }
 
     /* Set names for diferent OpenVX objects */
     vxSetReferenceName((vx_reference)obj->config, "Config");
     vxSetReferenceName((vx_reference)obj->network, "Network");
     vxSetReferenceName((vx_reference)obj->createParams, "CreateParams");
     vxSetReferenceName((vx_reference)obj->inArgs, "InArgs");
     vxSetReferenceName((vx_reference)obj->outArgs, "OutArgs");
 
     for(i = 0; i < obj->num_input_tensors; i++) {
         char tensor_name[] = "InputTensor_";
         char ref_name[64];
         snprintf(ref_name, 64, "%s%s", tensor_name, tensor_num_str[i]);
         vxSetReferenceName((vx_reference)obj->input_tensors[i], ref_name);
     }
 
     for(i = 0; i < obj->num_output_tensors; i++) {
         char tensor_name[] = "OutputTensor_";
         char ref_name[64];
         snprintf(ref_name, 64, "%s%s", tensor_name, tensor_num_str[i]);
         vxSetReferenceName((vx_reference)obj->output_tensors[i], ref_name);
     }
 
     vxSetReferenceName((vx_reference)obj->kernel, "TIDLKernel");
     vxSetReferenceName((vx_reference)obj->tidl_node, "TIDLNode");
 
     if (vx_true_e == tivxIsTargetEnabled(TIVX_TARGET_DISPLAY1) && (obj->display_option == 1))
     {
         vxSetReferenceName((vx_reference)obj->disp_params_obj, "DisplayParams");
         vxSetReferenceName((vx_reference)obj->disp_node, "DisplayNode");
     }
 
     APP_PRINTF("app_tidl: Creating graph ... Done.\n");
 
     return status;
 }
 
 static void app_run_task(void *app_var)
 {
     AppObj *obj = (AppObj *)app_var;
     vx_status status = VX_SUCCESS;
     appPerfStatsCpuLoadResetAll();
 
     while(!obj->stop_task)
     {
         status = app_run_graph(obj);
         if(status == VX_FAILURE)
         {
             printf("Error processing graph!\n");
             obj->stop_task = 1;
         }
     }
     obj->stop_task_done = 1;
 }
 
 static int32_t app_run_task_create(AppObj *obj)
 {
     tivx_task_create_params_t params;
     int32_t status;
 
     tivxTaskSetDefaultCreateParams(¶ms);
     params.task_main = app_run_task;
     params.app_var = obj;
 
     obj->stop_task_done = 0;
     obj->stop_task = 0;
 
     status = tivxTaskCreate(&obj->task, ¶ms);
 
     return status;
 }
 
 static void app_run_task_delete(AppObj *obj)
 {
     while(obj->stop_task_done==0)
     {
          tivxTaskWaitMsecs(100);
     }
 
     tivxTaskDelete(&obj->task);
 }
 
 static char menu[] = {
     "\n"
     "\n ================================="
     "\n Demo : TIDL Object Classification"
     "\n ================================="
     "\n"
     "\n p: Print performance statistics"
     "\n"
     "\n x: Exit"
     "\n"
     "\n Enter Choice: "
 };
 
 static vx_status app_run_graph_interactive(AppObj *obj)
 {
     vx_status status = VX_SUCCESS;
     uint32_t done = 0;
     char ch;
     FILE *fp;
     app_perf_point_t *perf_arr[1];
 
     status = app_run_task_create(obj);
     if(status != VX_SUCCESS)
     {
         printf("app_tidl: ERROR: Unable to create task\n");
     }
     else
     {
         appPerfStatsResetAll();
         while((!done) && (status == VX_SUCCESS))
         {
             printf(menu);
             ch = getchar();
             printf("\n");
 
             switch(ch)
             {
                 case 'p':
                     appPerfStatsPrintAll();
                     if(status == VX_SUCCESS)
                     {
                         status = tivx_utils_graph_perf_print(obj->graph);
                     }
                     if(status == VX_SUCCESS)
                     {
                         status = tivx_utils_graph_perf_print(obj->disp_graph);
                     }
                     appPerfPointPrint(&obj->fileio_perf);
                     appPerfPointPrint(&obj->draw_perf);
                     appPerfPointPrint(&obj->total_perf);
                     printf("\n");
                     appPerfPointPrintFPS(&obj->total_perf);
                     printf("\n");
                     break;
                 case 'e':
                     perf_arr[0] = &obj->total_perf;
                     fp = appPerfStatsExportOpenFile(".", "dl_demos_app_tidl");
                     if((NULL != fp) && (status == VX_SUCCESS))
                     {
                         appPerfStatsExportAll(fp, perf_arr, 1);
                         status = tivx_utils_graph_perf_export(fp, obj->graph);
                         appPerfStatsExportCloseFile(fp);
                         appPerfStatsResetAll();
                     }
                     else
                     {
                         printf("fp is null\n");
                     }
                     break;
                 case 'x':
                     obj->stop_task = 1;
                     done = 1;
                     break;
             }
         }
         app_run_task_delete(obj);
     }
     return status;
 }
 
 static vx_status app_verify_graph(AppObj *obj)
 {
     vx_status status = VX_SUCCESS;
 
     APP_PRINTF("app_tidl: Verifying graph ... \n");
 
     /* Verify the TIDL Graph */
     status = vxVerifyGraph(obj->graph);
     if(status != VX_SUCCESS)
     {
         printf("app_tidl: ERROR: Verifying graph ... Failed !!!\n");
         return status;
     }
 
     APP_PRINTF("app_tidl: Verifying graph ... Done.\n");
 
     if ((vx_true_e == tivxIsTargetEnabled(TIVX_TARGET_DISPLAY1)) && (obj->display_option == 1))
     {
         APP_PRINTF("app_tidl: Verifying display graph ... \n");
 
         /* Verify the TIDL Graph */
         status = vxVerifyGraph(obj->disp_graph);
         if(status != VX_SUCCESS)
         {
             printf("app_tidl: ERROR: Verifying display graph ... Failed !!!\n");
             return status;
         }
 
         APP_PRINTF("app_tidl: Verifying display graph ... Done.\n");
     }
 
     /* wait a while for prints to flush */
     tivxTaskWaitMsecs(100);
 
 #if 1
     if(status == VX_SUCCESS)
     {
         status = tivxExportGraphToDot(obj->graph,".", "vx_app_tidl");
         APP_ASSERT(status==VX_SUCCESS);
     }
 #endif
 
     #ifdef APP_TIVX_LOG_RT_ENABLE
     tivxLogRtTraceEnable(obj->graph);
     #endif
 
     return status;
 }
 
 static void saveOutput(AppObj *obj, vx_tensor *output_tensors, vx_tensor *input_tensors, char *fileName)
 {
   uint8_t color[14][3]={{0,0,0},{255,0,0},{0,255,0},{0,0,255},{255,255,0},
                         {255,0,255},{0,255,255},{127,0,0},{0,127,0},{0,0,127},
                         {127,127,0},{127,0,127},{0,127,127},{255,255,255},};
   vx_status status = VX_SUCCESS;
   vx_size output_sizes[APP_MAX_TENSOR_DIMS], input_sizes[APP_MAX_TENSOR_DIMS];
   sTIDL_IOBufDesc_t *ioBufDesc;
   ioBufDesc = &obj->ioBufDesc;
   uint8_t *color_mask = obj->temp;
   uint8_t *gray_mask = obj->temp + MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3;
   uint8_t *image_mask= obj->temp + MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3 * 2;
   int32_t width, height, outPadL, outPadT, inPadL, inPadT;//, padB, padR
   char output_file[APP_MAX_FILE_PATH];
   char file_name_temp[APP_MAX_FILE_PATH];
   char file_name_gmask[APP_MAX_FILE_PATH];
   char file_name_cmask[APP_MAX_FILE_PATH];
   char file_name_imask[APP_MAX_FILE_PATH];
   strcpy(file_name_temp, fileName);
   file_name_temp[strlen(fileName)-4] = '\0';
   //yolov5参数
   vx_int32 head_idx, h_idx, w_idx, anchor_idx;
   vx_int32 yolov5_class_stride = 3 + 5 + 8 + 6; //cls confxywh points slot
   vx_int32 bbox_num = 0;
   {
     uint64_t cur_time;
       cur_time = tivxPlatformGetTimeInUsecs();
     /*测试端到端解码接口_依赖tda4结构体*/
     // if (0) {
     //     getMult_yolov5e2eResult(&obj->ioBufDesc, obj->output_tensors, &obj->out_args, &obj->yolov5params, &obj->detections);
     //     cur_time = tivxPlatformGetTimeInUsecs() - cur_time;
     //     printf("端到端车位解码耗时: %d us \n", (int)cur_time);
     //     int num = obj->detections.num_bbox;
     //     ttevxYoloV5BBox num0 = obj->detections.buffer[0];
     //     ttevxYoloV5BBox num1 = obj->detections.buffer[1];
     //     printf("检测到%d车位 \n",(int)num);
     //     //将车位检测结果保存成二进制文件
     //     vx_char new_name[256];
     //     snprintf(new_name, APP_MAX_FILE_PATH-1, "%s%s%s%s%s%s", obj->output_file_path,"/psd/",file_name_temp,"_", "psd", ".bin");
     //     FILE *fp = fopen(new_name, "wb");
     //     if(NULL == fp)
     //     {
     //         printf("Unable to open file %s for writing!\n", new_name);
     //     }
     //     for(int i = 0; i< num ; i++)
     //     {
     //         ttevxYoloV5BBox num0 = obj->detections.buffer[i];
     //         fwrite(&num0, 4, 20, fp);
     //     }
     //     fclose(fp);
     // }
     /*测试端到端解码接口_不依赖tda4结构体*/
     if (1) {
         initMult_yolov5params(&obj->yolov5params);
         /* 兜底:确保解码使用的输入尺寸有效 */
         obj->yolov5params.inWidth[0] = obj->ioBufDesc.inWidth[0];
         obj->yolov5params.inHeight[0] = obj->ioBufDesc.inHeight[0];
         // 添加调试:打印所有输出张量的维度信息
         // printf("=== 开始处理YOLOv5检测头 ===\n");
         // for(int zh_idx = 0; zh_idx < 5; zh_idx++)  // 查看所有5个输出张量
         // {
         //     vx_size tensor_dims;
         //     vx_size tensor_sizes[APP_MAX_TENSOR_DIMS];
         //     vxQueryTensor(output_tensors[zh_idx], VX_TENSOR_NUMBER_OF_DIMS, &tensor_dims, sizeof(vx_size));
         //     vxQueryTensor(output_tensors[zh_idx], (vx_enum)VX_TENSOR_DIMS, tensor_sizes, APP_MAX_TENSOR_DIMS * sizeof(vx_size));
             
         //     printf("输出张量[%d] - ioBufDesc维度: outWidth=%d, outHeight=%d, outNumChannels=%d\n", 
         //            zh_idx, obj->ioBufDesc.outWidth[zh_idx], obj->ioBufDesc.outHeight[zh_idx], obj->ioBufDesc.outNumChannels[zh_idx]);
         //     printf("输出张量[%d] - tensor维度: num_dims=%zu, sizes=[%zu, %zu, %zu, %zu]\n", 
         //            zh_idx, tensor_dims, tensor_sizes[0], tensor_sizes[1], tensor_sizes[2], tensor_sizes[3]);
         // }
         // printf("=== 结束维度信息输出 ===\n\n");
         // return;
         // for(int zh_idx = 0; zh_idx < 3; zh_idx++)//lyp
        vx_map_id map_id_output_arr[APP_MAX_TENSOR_DIMS] = {0};
        int output_mapped[APP_MAX_TENSOR_DIMS] = {0};
        for(int zh_idx = 1; zh_idx < 4; zh_idx++)
         {
            int dst_idx = zh_idx - 1; /* 将检测头1,2,3映射到0,1,2 */
            obj->yolov5params.scale[dst_idx] = obj->out_args.scale[zh_idx];
            obj->yolov5params.outWidth[dst_idx] = obj->ioBufDesc.outWidth[zh_idx];
            obj->yolov5params.outHeight[dst_idx] = obj->ioBufDesc.outHeight[zh_idx];
            obj->yolov5params.inWidth[dst_idx] = obj->ioBufDesc.inWidth[zh_idx];
            obj->yolov5params.inHeight[dst_idx] = obj->ioBufDesc.inHeight[zh_idx];
            obj->yolov5params.outNumChannels[dst_idx] = obj->ioBufDesc.outNumChannels[zh_idx];
            obj->yolov5params.outChannelPitch[dst_idx] = obj->ioBufDesc.outChannelPitch[zh_idx];
            obj->yolov5params.outPadL[dst_idx] = obj->ioBufDesc.outPadL[zh_idx];
            obj->yolov5params.outPadT[dst_idx] = obj->ioBufDesc.outPadT[zh_idx];
            obj->yolov5params.outPadR[dst_idx] = obj->ioBufDesc.outPadR[zh_idx];
            obj->yolov5params.outPadB[dst_idx] = obj->ioBufDesc.outPadB[zh_idx];
            obj->yolov5params.outDataTypeVX[dst_idx] = (int)getTensorDataType(obj->ioBufDesc.outElementType[zh_idx]);
            printf("det_head[%d->%d] type(VX)=%d scale=%f outW=%d outH=%d outC=%d pitch=%d\n",
                   zh_idx, dst_idx,
                   obj->yolov5params.outDataTypeVX[dst_idx],
                   obj->yolov5params.scale[dst_idx],
                   obj->yolov5params.outWidth[dst_idx],
                   obj->yolov5params.outHeight[dst_idx],
                   obj->yolov5params.outNumChannels[dst_idx],
                   obj->yolov5params.outChannelPitch[dst_idx]);
            printf("det_head[%d->%d] outElementType(TIDL)=%d vxType=%s\n",
                   zh_idx, dst_idx,
                   obj->ioBufDesc.outElementType[zh_idx],
                   vxTypeToStr((vx_enum)obj->yolov5params.outDataTypeVX[dst_idx]));
             void *output_buffer;
             vx_map_id map_id_output;
             vx_size output_strides[APP_MAX_TENSOR_DIMS];
             vx_size output_start[APP_MAX_TENSOR_DIMS];
             output_start[0] = output_start[1] = output_start[2] = output_start[3] = 0;
             output_sizes[0] = ioBufDesc->outWidth[zh_idx] + ioBufDesc->outPadL[zh_idx] + ioBufDesc->outPadR[zh_idx];
             output_sizes[1] = ioBufDesc->outHeight[zh_idx] + ioBufDesc->outPadT[zh_idx] + ioBufDesc->outPadB[zh_idx];
             output_sizes[2] = ioBufDesc->outNumChannels[zh_idx];
            vx_size out_elem_size = vxTypeSize((vx_enum)obj->yolov5params.outDataTypeVX[dst_idx]);
            output_strides[0] = out_elem_size;
            output_strides[1] = output_sizes[0] * output_strides[0];
            output_strides[2] = output_sizes[1] * output_strides[1];
             vx_size tensor_dims;
             vxQueryTensor(output_tensors[zh_idx], VX_TENSOR_NUMBER_OF_DIMS, &tensor_dims, sizeof(vx_size));
             /* 获取tensor不同dim占用的内存大小 */
             tivxMapTensorPatch(output_tensors[zh_idx], tensor_dims, output_start, output_sizes, &map_id_output, output_strides, &output_buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
             status = vxGetStatus((vx_reference)output_tensors[zh_idx]);
             if(VX_SUCCESS == status)
             {
                obj->yolov5params.output_tensors[dst_idx] = output_buffer;
                map_id_output_arr[dst_idx] = map_id_output;
                output_mapped[dst_idx] = 1;
             }
         }
         printf("端到端车位解码耗时 \n");
         getMult_yolov5e2eResult(&obj->yolov5params, &obj->detections);
        for(int dst_idx = 0; dst_idx < 3; dst_idx++)
         {
            if(output_mapped[dst_idx])
             {
                tivxUnmapTensorPatch(output_tensors[dst_idx + 1], map_id_output_arr[dst_idx]);
             }
         }
         cur_time = tivxPlatformGetTimeInUsecs() - cur_time;
         printf("端到端车位解码耗时: %d us \n", (int)cur_time);
         int num = obj->detections.num_bbox;
         ttevxYoloV5BBox num0 = obj->detections.buffer[0];
         ttevxYoloV5BBox num1 = obj->detections.buffer[1];
         printf("检测到%d车位 \n",(int)num);
         //将车位检测结果保存成二进制文件
         vx_char new_name[256];
         snprintf(new_name, APP_MAX_FILE_PATH-1, "%s%s%s%s%s%s", obj->output_file_path,"/psd/",file_name_temp,"_", "psd", ".bin");
         FILE *fp = fopen(new_name, "wb");
         if(NULL == fp)
         {
             printf("Unable to open file %s for writing!\n", new_name);
         }
         for(int i = 0; i< num ; i++)
         {
             ttevxYoloV5BBox num0 = obj->detections.buffer[i];
             fwrite(&num0, 4, 20, fp);
         }
         fclose(fp);
     }  
   }
   
   //处理分割分支
 //   for(int32_t id = 3; id < 5; id++) //for mulTask_yolov5_seg lyp
 //   {
   int32_t ids_to_process[] = {0, 4};
   int32_t num_ids = 2;
   
   for(int32_t idx = 0; idx < num_ids; idx++)
   {
     int32_t id = ids_to_process[idx];
     snprintf(file_name_cmask, APP_MAX_FILE_PATH-1, "%s%s%s%d%s", "/img/", file_name_temp, "_cmask", id, ".bmp");
     snprintf(file_name_imask, APP_MAX_FILE_PATH-1, "%s%s%s%d%s", "/img/",file_name_temp, "_imask", id, ".bmp");
     snprintf(file_name_gmask, APP_MAX_FILE_PATH-1, "%s%s%s%d%s", "/img/", file_name_temp, "_gmask", id, ".bmp");
     output_sizes[0] = ioBufDesc->outWidth[id] + obj->ioBufDesc.outPadL[id] + obj->ioBufDesc.outPadR[id];
     output_sizes[1] = ioBufDesc->outHeight[id] + obj->ioBufDesc.outPadT[id] + obj->ioBufDesc.outPadB[id];
     output_sizes[2] = 1;//ioBufDesc->outNumChannels[id];
     input_sizes[0] = ioBufDesc->inWidth[0] + obj->ioBufDesc.inPadL[0] + obj->ioBufDesc.inPadR[0];
     input_sizes[1] = ioBufDesc->inHeight[0] + obj->ioBufDesc.inPadT[0] + obj->ioBufDesc.inPadB[0];
     input_sizes[2] = ioBufDesc->inNumChannels[0];
     width = ioBufDesc->outWidth[id];
     height = ioBufDesc->outHeight[id];
     outPadL = obj->ioBufDesc.outPadL[id];
     outPadT = obj->ioBufDesc.outPadT[id];
     inPadL = obj->ioBufDesc.inPadL[0];
     inPadT = obj->ioBufDesc.inPadT[0];
 
     status = vxGetStatus((vx_reference)output_tensors[id]);
 
     if(VX_SUCCESS == status)
     {
       void *output_buffer, *input_buffer;
       vx_map_id map_id_output, map_id_input;
       vx_size output_strides[APP_MAX_TENSOR_DIMS], input_strides[APP_MAX_TENSOR_DIMS];
       vx_size output_start[APP_MAX_TENSOR_DIMS], input_start[APP_MAX_TENSOR_DIMS];
 
       output_start[0] = output_start[1] = output_start[2] = output_start[3] = 0;
       input_start[0] = input_start[1] = input_start[2] = input_start[3] = 0;
 
       output_strides[0] = 1;
       output_strides[1] = output_sizes[0];
       output_strides[2] = output_sizes[1] * output_strides[1];
       input_strides[0] = 1;
       input_strides[1] = input_sizes[0];
       input_strides[2] = input_sizes[1] * input_strides[1];
 
       tivxMapTensorPatch(output_tensors[id], 3, output_start, output_sizes, &map_id_output, output_strides, &output_buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
       tivxMapTensorPatch(input_tensors[0], 3, input_start, input_sizes, &map_id_input, input_strides, &input_buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
 
       {
         uint8_t *pOut;
         uint8_t *pIn;
         pOut = (uint8_t *)output_buffer;
         pIn = (uint8_t *)input_buffer;
         // printf("模型没加argmax \n");
 
         for(int32_t i = 0; i < height; i++){
           for(int32_t j = 0; j < width; j++){
             uint8_t seg_type = pOut[output_sizes[0]*(outPadT + i) + outPadL + j];
             // if(seg_type>15)
             // {
             //     printf("seg_type 大于15,出错");
             // }
             uint8_t img_value_R = pIn[0 * input_strides[2] + input_strides[1]*(inPadT + i) + inPadL + j];
             uint8_t img_value_G = pIn[1 * input_strides[2] + input_strides[1]*(inPadT + i) + inPadL + j];
             uint8_t img_value_B = pIn[2 * input_strides[2] + input_strides[1]*(inPadT + i) + inPadL + j];
             int32_t color_chosen = seg_type < 14 ? seg_type : 13;
             color_mask[width * 3 * i + j * 3 + 0] = color[color_chosen][0];
             color_mask[width * 3 * i + j * 3 + 1] = color[color_chosen][1];
             color_mask[width * 3 * i + j * 3 + 2] = color[color_chosen][2];
             gray_mask[width * 3 * i + j * 3 + 0] = seg_type;
             gray_mask[width * 3 * i + j * 3 + 1] = seg_type;
             gray_mask[width * 3 * i + j * 3 + 2] = seg_type;
             if(seg_type > 0){
               image_mask[width * 3 * i + j * 3 + 0] = img_value_R/2 + color[color_chosen][0]/2;
               image_mask[width * 3 * i + j * 3 + 1] = img_value_G/2 + color[color_chosen][1]/2;
               image_mask[width * 3 * i + j * 3 + 2] = img_value_B/2 + color[color_chosen][2]/2;
             }else{
               image_mask[width * 3 * i + j * 3 + 0] = img_value_R;
               image_mask[width * 3 * i + j * 3 + 1] = img_value_G;
               image_mask[width * 3 * i + j * 3 + 2] = img_value_B;
             }
             
           }
         }
         
         // if(obj->save_type == '0' || obj->save_type == '1'){
         //   snprintf(output_file, APP_MAX_FILE_PATH-1, "%s/%s", obj->output_file_path, file_name_cmask);
         //   printf("save to %s!!!\n", output_file);
         //   tivx_utils_bmp_write(output_file, color_mask, width, height, (width * 3), obj->df_image);
         // }
         if(obj->save_type == '0' || obj->save_type == '3'){
           snprintf(output_file, APP_MAX_FILE_PATH-1, "%s/%s", obj->output_file_path, file_name_gmask);
           printf("save to %s!!!\n", output_file);
           tivx_utils_bmp_write(output_file, gray_mask, width, height, (width * 3), obj->df_image);
         }
         if(obj->save_type == '0' || obj->save_type == '3'){
           snprintf(output_file, APP_MAX_FILE_PATH-1, "%s/%s", obj->output_file_path, file_name_imask);
           printf("save to %s!!!\n", output_file);
           tivx_utils_bmp_write(output_file, image_mask, width, height, (width * 3), obj->df_image);
         }
       }
       tivxUnmapTensorPatch(output_tensors[id], map_id_output);
       tivxUnmapTensorPatch(input_tensors[0], map_id_input);
     }
   }
 }
 static void saveOutput_tensor(AppObj *obj, vx_tensor *output_tensors, vx_tensor *input_tensors, char *fileName)
 {
   vx_status status = VX_SUCCESS;
   vx_size output_sizes[APP_MAX_TENSOR_DIMS];
   sTIDL_IOBufDesc_t *ioBufDesc;
   ioBufDesc = &obj->ioBufDesc;
   uint8_t *color_mask = obj->temp;
   uint8_t *gray_mask = obj->temp + MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3;
   uint8_t *image_mask= obj->temp + MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3 * 2;
   int32_t width, height, outPadL, outPadT, inPadL, inPadT;//, padB, padR
   char output_file[APP_MAX_FILE_PATH];
   char file_name_temp[APP_MAX_FILE_PATH];
   char file_name_tensor[APP_MAX_FILE_PATH];
 //   char file_name_cmask[APP_MAX_FILE_PATH];
 //   char file_name_imask[APP_MAX_FILE_PATH];
   strcpy(file_name_temp, fileName);
   file_name_temp[strlen(fileName)-4] = '\0';
   //处理分割分支 lyp
   for(int32_t id = 4; id < 5; id++) //for mulTask_yolov5_seg
   {
 //   int32_t ids_to_process[] = {0, 4};
 //   int32_t num_ids = 2;
   
 //   for(int32_t idx = 0; idx < num_ids; idx++)
 //   {
 //     int32_t id = ids_to_process[idx];
     // snprintf(file_name_tensor, APP_MAX_FILE_PATH-1, "%s%s%d%s", file_name_temp, "_tensor", id, ".bmp");
     // snprintf(file_name_imask, APP_MAX_FILE_PATH-1, "%s%s%d%s", file_name_temp, "_imask", id, ".bmp");
     // snprintf(file_name_gmask, APP_MAX_FILE_PATH-1, "%s%s%d%s", file_name_temp, "_gmask", id, ".bmp");
     output_sizes[0] = ioBufDesc->outWidth[id] + obj->ioBufDesc.outPadL[id] + obj->ioBufDesc.outPadR[id];
     output_sizes[1] = ioBufDesc->outHeight[id] + obj->ioBufDesc.outPadT[id] + obj->ioBufDesc.outPadB[id];
     output_sizes[2] = 1;//ioBufDesc->outNumChannels[id];
     // input_sizes[0] = ioBufDesc->inWidth[0] + obj->ioBufDesc.inPadL[0] + obj->ioBufDesc.inPadR[0];
     // input_sizes[1] = ioBufDesc->inHeight[0] + obj->ioBufDesc.inPadT[0] + obj->ioBufDesc.inPadB[0];
     // input_sizes[2] = ioBufDesc->inNumChannels[0];
     width = ioBufDesc->outWidth[id];
     height = ioBufDesc->outHeight[id];
     outPadL = obj->ioBufDesc.outPadL[id];
     outPadT = obj->ioBufDesc.outPadT[id];
     // inPadL = obj->ioBufDesc.inPadL[0];
     // inPadT = obj->ioBufDesc.inPadT[0];
 
     status = vxGetStatus((vx_reference)output_tensors[id]);
 
     if(VX_SUCCESS == status)
     {
       void *output_buffer;
       vx_map_id map_id_output, map_id_input;
       vx_size output_strides[APP_MAX_TENSOR_DIMS];
       vx_size output_start[APP_MAX_TENSOR_DIMS];
 
       output_start[0] = output_start[1] = output_start[2] = output_start[3] = 0;
     //   input_start[0] = input_start[1] = input_start[2] = input_start[3] = 0;
 
       output_strides[0] = 1;
       output_strides[1] = output_sizes[0];
       output_strides[2] = output_sizes[1] * output_strides[1];
     //   input_strides[0] = 1;
     //   input_strides[1] = input_sizes[0];
     //   input_strides[2] = input_sizes[1] * input_strides[1];
     vx_char new_name[256];
     // snprintf(new_name, APP_MAX_FILE_PATH-1, "%s%s%d%s", obj->output_file_path, "/_tensor", id, ".bin");
     snprintf(new_name, APP_MAX_FILE_PATH-1, "%s%s%s%s%d%s", obj->output_file_path,"/bin/",file_name_temp,"_", id, ".bin");
     FILE *fp = fopen(new_name, "wb");
     if(NULL == fp)
     {
         printf("Unable to open file %s for writing!\n", new_name);
         break;
     }
 
       tivxMapTensorPatch(output_tensors[id], 3, output_start, output_sizes, &map_id_output, output_strides, &output_buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
     //   tivxMapTensorPatch(input_tensors[0], 3, input_start, input_sizes, &map_id_input, input_strides, &input_buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
 
       {
         uint8_t *pOut, *pIn;
         pOut = (uint8_t *)output_buffer;
         // pIn = (uint8_t *)input_buffer;
         // printf("模型没加argmax \n");
 
         for(int32_t c = 0; c < ioBufDesc->outNumChannels[id]; c++){
            for(int32_t i = 0; i < height; i++){
             uint8_t *pOut_1 = pOut + c * output_sizes[0] * output_sizes[1] + output_sizes[0]*(outPadT + i) + outPadL;
             uint8_t zh = pOut[c * output_sizes[0] * output_sizes[1] + output_sizes[0]*(outPadT + i) + outPadL];
             fwrite(pOut_1, 1, ioBufDesc->outWidth[id], fp);
            }
         }
         fclose(fp);
       }
       tivxUnmapTensorPatch(output_tensors[id], map_id_output);
     //   tivxUnmapTensorPatch(input_tensors[0], map_id_input);
     }
   }
 }
 static void saveOutput_16bit(AppObj *obj, vx_tensor *output_tensors, vx_tensor *input_tensors, char *fileName)
 {
   uint8_t color[14][3]={{0,0,0},{255,0,0},{0,255,0},{0,0,255},{255,255,0},
                         {255,0,255},{0,255,255},{127,0,0},{0,127,0},{0,0,127},
                         {127,127,0},{127,0,127},{0,127,127},{127,127,127},};
   vx_status status = VX_SUCCESS;
   vx_size output_sizes[APP_MAX_TENSOR_DIMS], input_sizes[APP_MAX_TENSOR_DIMS];
   sTIDL_IOBufDesc_t *ioBufDesc;
   ioBufDesc = &obj->ioBufDesc;
   uint8_t *color_mask = obj->temp;
   uint8_t *gray_mask = obj->temp + MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3;
   uint8_t *image_mask= obj->temp + MAX_IMG_WIDTH * MAX_IMG_HEIGHT * 3 * 2;
   int32_t width, height, outPadL, outPadT, inPadL, inPadT;//, padB, padR
   char output_file[APP_MAX_FILE_PATH];
   char file_name_temp[APP_MAX_FILE_PATH];
   char file_name_gmask[APP_MAX_FILE_PATH];
   char file_name_cmask[APP_MAX_FILE_PATH];
   char file_name_imask[APP_MAX_FILE_PATH];
   strcpy(file_name_temp, fileName);
   file_name_temp[strlen(fileName)-4] = '\0';
   //yolov5参数
   vx_int32 head_idx, h_idx, w_idx, anchor_idx;
   vx_int32 yolov5_class_stride = 3 + 5 + 8 + 6; //cls confxywh points slot
   ttevxYoloV5BBox *bbox_obj_arr = vxCreateUserDataObject(obj->context, "", sizeof(ttevxYoloV5BBox), NULL);
   ttevxYoloV5PostProcParams *yolov5params = vxCreateUserDataObject(obj->context, "", sizeof(ttevxYoloV5PostProcParams), NULL);
   vx_int32 bbox_num = 0;
   TIDL_outArgs* out_args = obj->outArgs;
   ttevxYoloV5Detections *detections = vxCreateUserDataObject(obj->context, "", sizeof(ttevxYoloV5Detections), NULL);
   //init
   {
     for(int i = 0;i<5;i++)
     {
         out_args->scale[i] = obj->ioBufDesc.outTensorScale[i];
     }
     yolov5params->num_anchors = TTE_YOLOV5_NUM_ANCHOR;
     yolov5params->num_classes = TTE_YOLOV5_NUM_CLASSES;
     yolov5params->conf_thresh = 0.45;
     yolov5params->nms_thresh = 0.25;   
     int i, j, k;
     for (i = 0; i < TTE_YOLOV5_NUM_HEAD; ++i) {
         for (j = 0; j < TTE_YOLOV5_NUM_ANCHOR; ++j) {
             for (k = 0; k < 2; ++k) {
                 yolov5params->anchors[i][j][k] = ANCHORS[i][j][k];
             }
         }
     }
   }
 //   for(int32_t head_idx = 2; head_idx < obj->num_output_tensors; head_idx++) // for mulTask_yolov5_det
 //   {
 //     output_sizes[0] = ioBufDesc->outWidth[head_idx] + obj->ioBufDesc.outPadL[head_idx] + obj->ioBufDesc.outPadR[head_idx];
 //     output_sizes[1] = ioBufDesc->outHeight[head_idx] + obj->ioBufDesc.outPadT[head_idx] + obj->ioBufDesc.outPadB[head_idx];
 //     output_sizes[2] = 1;//ioBufDesc->outNumChannels[id];
 //     input_sizes[0] = ioBufDesc->inWidth[0] + obj->ioBufDesc.inPadL[0] + obj->ioBufDesc.inPadR[0];
 //     input_sizes[1] = ioBufDesc->inHeight[0] + obj->ioBufDesc.inPadT[0] + obj->ioBufDesc.inPadB[0];
 //     input_sizes[2] = ioBufDesc->inNumChannels[0];
 //     width = ioBufDesc->outWidth[head_idx];
 //     height = ioBufDesc->outHeight[head_idx];
 //     outPadL = obj->ioBufDesc.outPadL[head_idx];
 //     outPadT = obj->ioBufDesc.outPadT[head_idx];
 //     inPadL = obj->ioBufDesc.inPadL[0];
 //     inPadT = obj->ioBufDesc.inPadT[0];
 //     //获取内存块
 //     void *output_buffer, *input_buffer;
 //     vx_map_id map_id_output, map_id_input;
 //     vx_size output_strides[APP_MAX_TENSOR_DIMS], input_strides[APP_MAX_TENSOR_DIMS];
 //     vx_size output_start[APP_MAX_TENSOR_DIMS], input_start[APP_MAX_TENSOR_DIMS];
 //     output_start[0] = output_start[1] = output_start[2] = output_start[3] = 0;
 //     input_start[0] = input_start[1] = input_start[2] = input_start[3] = 0;
 //     output_strides[0] = 1;
 //     output_strides[1] = output_sizes[0];
 //     output_strides[2] = output_sizes[1] * output_strides[1];
 //     input_strides[0] = 1;
 //     input_strides[1] = input_sizes[0];
 //     input_strides[2] = input_sizes[1] * input_strides[1];
 //     tivxMapTensorPatch(output_tensors[head_idx], 3, output_start, output_sizes, &map_id_output, output_strides, &output_buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
 //     tivxMapTensorPatch(input_tensors[0], 3, input_start, input_sizes, &map_id_input, input_strides, &input_buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
 //     //
 //     status = vxGetStatus((vx_reference)output_tensors[head_idx]);
 //     if(VX_SUCCESS == status)
 //     {
 //         /* 初始时将指针定位到第一个有效内存位置 */
 //         uint8_t *pOut, *pIn;
 //         pOut = (uint8_t *)output_buffer;
 //         pIn = (uint8_t *)input_buffer;
 //         vx_uint8 *raw_data = (vx_uint8*)(pOut) + (output_sizes[0] * outPadT + outPadL);
 //         /* TIDL中内存实际是CHW排布,但是以WHC记录,所以查找内存时首先定位正确的通道,再进行WH的偏移 */
 //         for (h_idx = 0; h_idx < ioBufDesc->outHeight[head_idx]; ++h_idx) {
 //             for (w_idx = 0; w_idx < ioBufDesc->outWidth[head_idx]; ++w_idx) {
 //                 for (anchor_idx = 0; anchor_idx < yolov5params->num_anchors; ++anchor_idx) {
 //                     /* 在内存片中对应[h,w]的线性位置 */
 //                     int32_t obj_hw_idx = h_idx * output_sizes[0] + w_idx;
 //                     /* 找到正确conf内存通道位置,并做hw偏移 */
 //                     int32_t obj_conf_idx = (anchor_idx * yolov5_class_stride + 4) * ioBufDesc->outChannelPitch[head_idx] + obj_hw_idx;
 //                     /* 获得conf并将conf缩放回输入尺度 */
 //                     float conf = (float)raw_data[obj_conf_idx] / out_args->scale[head_idx];
 
 //                     if (conf >= yolov5params->conf_thresh) {
 //                         /* 计算xywh和cls内存位置并转化为输出值 */
 //                         int32_t obj_x_idx = (anchor_idx * yolov5_class_stride + 0) * ioBufDesc->outChannelPitch[head_idx] + obj_hw_idx;
 //                         int32_t obj_y_idx = (anchor_idx * yolov5_class_stride + 1) * ioBufDesc->outChannelPitch[head_idx] + obj_hw_idx;
 //                         int32_t obj_w_idx = (anchor_idx * yolov5_class_stride + 2) * ioBufDesc->outChannelPitch[head_idx] + obj_hw_idx;
 //                         int32_t obj_h_idx = (anchor_idx * yolov5_class_stride + 3) * ioBufDesc->outChannelPitch[head_idx] + obj_hw_idx;
 
 //                         float x = (float)raw_data[obj_x_idx] / out_args->scale[head_idx];
 //                         x = (x * 2.0 - 0.5 + w_idx) * (float)(ioBufDesc->inWidth[0]) / (float)(ioBufDesc->outWidth[head_idx]);
 //                         float y = (float)raw_data[obj_y_idx] / out_args->scale[head_idx];
 //                         y = (y * 2.0 - 0.5 + h_idx) * (float)(ioBufDesc->inHeight[0]) / (float)(ioBufDesc->outHeight[head_idx]);
 //                         float w = (float)raw_data[obj_w_idx] / out_args->scale[head_idx];
 //                         w = (w * 2.0) * (w * 2.0) * yolov5params->anchors[head_idx][anchor_idx][0];
 //                         float h = (float)raw_data[obj_h_idx] / out_args->scale[head_idx];
 //                         h = (h * 2.0) * (h * 2.0) * yolov5params->anchors[head_idx][anchor_idx][1];
 
 //                         int32_t cls = 0;
 //                         float cls_prob = 0.0;
 //                         /* 遍历查找类别内存,找到最大的置信度进行赋值 */
 //                         {
 //                             int32_t obj_cls_idx;
 //                             int32_t max_prob_idx;
 //                             float p;
 //                             for (max_prob_idx = 0; max_prob_idx < yolov5params->num_classes; ++max_prob_idx) {
 //                                 obj_cls_idx = (anchor_idx * yolov5_class_stride + 5 + 8 + 5 + max_prob_idx) * ioBufDesc->outChannelPitch[head_idx] + obj_hw_idx;
 //                                 p = (float)raw_data[obj_cls_idx] / out_args->scale[head_idx];
 //                                 if (p > cls_prob) {
 //                                     cls_prob = p;
 //                                     cls = max_prob_idx;
 //                                 }
 //                             }
 //                         }
 //                         /* 输出概率 = 存在目标概率 * 类别置信概率 */
 //                         bbox_obj_arr[bbox_num].confidence = conf * cls_prob;
 //                         if (bbox_obj_arr[bbox_num].confidence >= yolov5params->conf_thresh) {
 //                             bbox_obj_arr[bbox_num].x1 = (int)(x - w / 2 + 0.5);
 //                             bbox_obj_arr[bbox_num].y1 = (int)(y - h / 2 + 0.5);
 //                             bbox_obj_arr[bbox_num].x2 = (int)(x + w / 2 + 0.5);
 //                             bbox_obj_arr[bbox_num].y2 = (int)(y + h / 2 + 0.5);
 //                             bbox_obj_arr[bbox_num].cls = cls;
 //                             bbox_num++;
 //                         }
 //                     }
 //                 }
 //             }
 //         } 
 //     }
 //   }
 //   //NMS
 //   bbox_num = NMS(bbox_obj_arr, bbox_num, yolov5params->nms_thresh);
 //   detections->num_bbox = bbox_num;
   //处理分割分支
   for(int32_t id = 4; id < 5; id++) //for mulTask_yolov5_seg
   {
     snprintf(file_name_cmask, APP_MAX_FILE_PATH-1, "%s%s%d%s", file_name_temp, "_cmask", id, ".bmp");
     snprintf(file_name_imask, APP_MAX_FILE_PATH-1, "%s%s%d%s", file_name_temp, "_imask", id, ".bmp");
     snprintf(file_name_gmask, APP_MAX_FILE_PATH-1, "%s%s%d%s", file_name_temp, "_gmask", id, ".bmp");
     output_sizes[0] = ioBufDesc->outWidth[id] + obj->ioBufDesc.outPadL[id] + obj->ioBufDesc.outPadR[id];
     output_sizes[1] = ioBufDesc->outHeight[id] + obj->ioBufDesc.outPadT[id] + obj->ioBufDesc.outPadB[id];
     output_sizes[2] = 1;//ioBufDesc->outNumChannels[id];
     input_sizes[0] = ioBufDesc->inWidth[0] + obj->ioBufDesc.inPadL[0] + obj->ioBufDesc.inPadR[0];
     input_sizes[1] = ioBufDesc->inHeight[0] + obj->ioBufDesc.inPadT[0] + obj->ioBufDesc.inPadB[0];
     input_sizes[2] = ioBufDesc->inNumChannels[0];
     width = ioBufDesc->outWidth[id];
     height = ioBufDesc->outHeight[id];
     outPadL = obj->ioBufDesc.outPadL[id];
     outPadT = obj->ioBufDesc.outPadT[id];
     inPadL = obj->ioBufDesc.inPadL[0];
     inPadT = obj->ioBufDesc.inPadT[0];
 
     status = vxGetStatus((vx_reference)output_tensors[id]);
 
     if(VX_SUCCESS == status)
     {
       void *output_buffer, *input_buffer;
       vx_map_id map_id_output, map_id_input;
       vx_size output_strides[APP_MAX_TENSOR_DIMS], input_strides[APP_MAX_TENSOR_DIMS];
       vx_size output_start[APP_MAX_TENSOR_DIMS], input_start[APP_MAX_TENSOR_DIMS];
 
       output_start[0] = output_start[1] = output_start[2] = output_start[3] = 0;
       input_start[0] = input_start[1] = input_start[2] = input_start[3] = 0;
 
       output_strides[0] = 1;
       output_strides[1] = output_sizes[0];
       output_strides[2] = output_sizes[1] * output_strides[1];
       input_strides[0] = 1;
       input_strides[1] = input_sizes[0];
       input_strides[2] = input_sizes[1] * input_strides[1];
 
       tivxMapTensorPatch(output_tensors[id], 3, output_start, output_sizes, &map_id_output, output_strides, &output_buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
       tivxMapTensorPatch(input_tensors[0], 3, input_start, input_sizes, &map_id_input, input_strides, &input_buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
 
       {
         uint8_t *pOut, *pIn;
         pOut = (uint16_t *)output_buffer;
         pIn = (uint8_t *)input_buffer;
         // printf("模型没加argmax \n");
 
         for(int32_t i = 0; i < height; i++){
           for(int32_t j = 0; j < width; j++){
             uint8_t seg_type = (uint8_t)((uint16_t)pOut[output_sizes[0]*(outPadT + i) + outPadL + j]);
             // if(seg_type>15)
             // {
             //     printf("seg_type 大于15,出错");
             // }
             uint8_t img_value_R = pIn[0 * input_strides[2] + input_strides[1]*(inPadT + i) + inPadL + j];
             uint8_t img_value_G = pIn[1 * input_strides[2] + input_strides[1]*(inPadT + i) + inPadL + j];
             uint8_t img_value_B = pIn[2 * input_strides[2] + input_strides[1]*(inPadT + i) + inPadL + j];
             int32_t color_chosen = seg_type < 14 ? seg_type : 13;
             color_mask[width * 3 * i + j * 3 + 0] = color[color_chosen][0];
             color_mask[width * 3 * i + j * 3 + 1] = color[color_chosen][1];
             color_mask[width * 3 * i + j * 3 + 2] = color[color_chosen][2];
             gray_mask[width * 3 * i + j * 3 + 0] = seg_type;
             gray_mask[width * 3 * i + j * 3 + 1] = seg_type;
             gray_mask[width * 3 * i + j * 3 + 2] = seg_type;
             if(seg_type > 0){
               image_mask[width * 3 * i + j * 3 + 0] = img_value_R/2 + color[color_chosen][0]/2;
               image_mask[width * 3 * i + j * 3 + 1] = img_value_G/2 + color[color_chosen][1]/2;
               image_mask[width * 3 * i + j * 3 + 2] = img_value_B/2 + color[color_chosen][2]/2;
             }else{
               image_mask[width * 3 * i + j * 3 + 0] = img_value_R;
               image_mask[width * 3 * i + j * 3 + 1] = img_value_G;
               image_mask[width * 3 * i + j * 3 + 2] = img_value_B;
             }
             
           }
         }
         
         // if(obj->save_type == '0' || obj->save_type == '1'){
         //   snprintf(output_file, APP_MAX_FILE_PATH-1, "%s/%s", obj->output_file_path, file_name_cmask);
         //   printf("save to %s!!!\n", output_file);
         //   tivx_utils_bmp_write(output_file, color_mask, width, height, (width * 3), obj->df_image);
         // }
         // if(obj->save_type == '0' || obj->save_type == '2'){
         //   snprintf(output_file, APP_MAX_FILE_PATH-1, "%s/%s", obj->output_file_path, file_name_gmask);
         //   printf("save to %s!!!\n", output_file);
         //   tivx_utils_bmp_write(output_file, gray_mask, width, height, (width * 3), obj->df_image);
         // }
         // if(obj->save_type == '0' || obj->save_type == '3'){
         //   snprintf(output_file, APP_MAX_FILE_PATH-1, "%s/%s", obj->output_file_path, file_name_imask);
         //   printf("save to %s!!!\n", output_file);
         //   tivx_utils_bmp_write(output_file, image_mask, width, height, (width * 3), obj->df_image);
         // }
       }
       tivxUnmapTensorPatch(output_tensors[id], map_id_output);
       tivxUnmapTensorPatch(input_tensors[0], map_id_input);
     }
   }
 }
 static void saveOutput_tensor_16bit(AppObj *obj, vx_tensor *output_tensors, vx_tensor *input_tensors, char *fileName)
 {
   vx_status status = VX_SUCCESS;
   vx_size output_sizes[APP_MAX_TENSOR_DIMS];
   sTIDL_IOBufDesc_t *ioBufDesc;
   ioBufDesc = &obj->ioBufDesc;
   int32_t width, height, outPadL, outPadT, inPadL, inPadT;//, padB, padR
   char output_file[APP_MAX_FILE_PATH];
   char file_name_temp[APP_MAX_FILE_PATH];
   char file_name_tensor[APP_MAX_FILE_PATH];
 //   char file_name_cmask[APP_MAX_FILE_PATH];
 //   char file_name_imask[APP_MAX_FILE_PATH];
   strcpy(file_name_temp, fileName);
   file_name_temp[strlen(fileName)-4] = '\0';
   //处理分割分支
   for(int32_t id = 4; id < 5; id++) //for mulTask_yolov5_seg
   {
     // snprintf(file_name_tensor, APP_MAX_FILE_PATH-1, "%s%s%d%s", file_name_temp, "_tensor", id, ".bmp");
     // snprintf(file_name_imask, APP_MAX_FILE_PATH-1, "%s%s%d%s", file_name_temp, "_imask", id, ".bmp");
     // snprintf(file_name_gmask, APP_MAX_FILE_PATH-1, "%s%s%d%s", file_name_temp, "_gmask", id, ".bmp");
     output_sizes[0] = ioBufDesc->outWidth[id] + obj->ioBufDesc.outPadL[id] + obj->ioBufDesc.outPadR[id];
     output_sizes[1] = ioBufDesc->outHeight[id] + obj->ioBufDesc.outPadT[id] + obj->ioBufDesc.outPadB[id];
     output_sizes[2] = 1;//ioBufDesc->outNumChannels[id];
     // input_sizes[0] = ioBufDesc->inWidth[0] + obj->ioBufDesc.inPadL[0] + obj->ioBufDesc.inPadR[0];
     // input_sizes[1] = ioBufDesc->inHeight[0] + obj->ioBufDesc.inPadT[0] + obj->ioBufDesc.inPadB[0];
     // input_sizes[2] = ioBufDesc->inNumChannels[0];
     width = ioBufDesc->outWidth[id];
     height = ioBufDesc->outHeight[id];
     outPadL = obj->ioBufDesc.outPadL[id];
     outPadT = obj->ioBufDesc.outPadT[id];
     // inPadL = obj->ioBufDesc.inPadL[0];
     // inPadT = obj->ioBufDesc.inPadT[0];
 
     status = vxGetStatus((vx_reference)output_tensors[id]);
 
     if(VX_SUCCESS == status)
     {
       void *output_buffer;
       vx_map_id map_id_output, map_id_input;
       vx_size output_strides[APP_MAX_TENSOR_DIMS];
       vx_size output_start[APP_MAX_TENSOR_DIMS];
 
       output_start[0] = output_start[1] = output_start[2] = output_start[3] = 0;
     //   input_start[0] = input_start[1] = input_start[2] = input_start[3] = 0;
 
       output_strides[0] = 1;
       output_strides[1] = output_sizes[0];
       output_strides[2] = output_sizes[1] * output_strides[1];
     //   input_strides[0] = 1;
     //   input_strides[1] = input_sizes[0];
     //   input_strides[2] = input_sizes[1] * input_strides[1];
     vx_char new_name[256];
     // snprintf(new_name, APP_MAX_FILE_PATH-1, "%s%s%d%s", obj->output_file_path, "/_tensor", id, ".bin");
     snprintf(new_name, APP_MAX_FILE_PATH-1, "%s%s%s%s%d%s", obj->output_file_path,"/bin/",file_name_temp,"_", id, ".bin");
     FILE *fp = fopen(new_name, "wb");
     if(NULL == fp)
     {
         printf("Unable to open file %s for writing!\n", new_name);
         break;
     }
 
       tivxMapTensorPatch(output_tensors[id], 3, output_start, output_sizes, &map_id_output, output_strides, &output_buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
     //   tivxMapTensorPatch(input_tensors[0], 3, input_start, input_sizes, &map_id_input, input_strides, &input_buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
 
       {
         uint16_t *pOut;
         pOut = (uint16_t *)output_buffer;
         // pIn = (uint8_t *)input_buffer;
         // printf("模型没加argmax \n");
 
         for(int32_t c = 0; c < ioBufDesc->outNumChannels[id]; c++){
            for(int32_t i = 0; i < height; i++){
             uint16_t *pOut_1 = pOut + c * output_sizes[0] * output_sizes[1] + output_sizes[0]*(outPadT + i) + outPadL;
             uint16_t zh = pOut[c * output_sizes[0] * output_sizes[1] + output_sizes[0]*(outPadT + i) + outPadL];
             fwrite(pOut_1, 2, ioBufDesc->outWidth[id], fp);
            }
         }
         fclose(fp);
       }
       tivxUnmapTensorPatch(output_tensors[id], map_id_output);
     //   tivxUnmapTensorPatch(input_tensors[0], map_id_input);
     }
   }
 }
 static void saveOutput_tensor_32bit(AppObj *obj, vx_tensor *output_tensors, vx_tensor *input_tensors, char *fileName)
 {
   vx_status status = VX_SUCCESS;
   vx_size output_sizes[APP_MAX_TENSOR_DIMS];
   sTIDL_IOBufDesc_t *ioBufDesc;
   ioBufDesc = &obj->ioBufDesc;
   int32_t width, height, outPadL, outPadT, inPadL, inPadT;//, padB, padR
   char output_file[APP_MAX_FILE_PATH];
   char file_name_temp[APP_MAX_FILE_PATH];
   char file_name_tensor[APP_MAX_FILE_PATH];
 //   char file_name_cmask[APP_MAX_FILE_PATH];
 //   char file_name_imask[APP_MAX_FILE_PATH];
   strcpy(file_name_temp, fileName);
   file_name_temp[strlen(fileName)-4] = '\0';
   //处理分割分支
   for(int32_t id = 4; id < 5; id++) //for mulTask_yolov5_seg
   {
     // snprintf(file_name_tensor, APP_MAX_FILE_PATH-1, "%s%s%d%s", file_name_temp, "_tensor", id, ".bmp");
     // snprintf(file_name_imask, APP_MAX_FILE_PATH-1, "%s%s%d%s", file_name_temp, "_imask", id, ".bmp");
     // snprintf(file_name_gmask, APP_MAX_FILE_PATH-1, "%s%s%d%s", file_name_temp, "_gmask", id, ".bmp");
     output_sizes[0] = ioBufDesc->outWidth[id] + obj->ioBufDesc.outPadL[id] + obj->ioBufDesc.outPadR[id];
     output_sizes[1] = ioBufDesc->outHeight[id] + obj->ioBufDesc.outPadT[id] + obj->ioBufDesc.outPadB[id];
     output_sizes[2] = 1;//ioBufDesc->outNumChannels[id];
     // input_sizes[0] = ioBufDesc->inWidth[0] + obj->ioBufDesc.inPadL[0] + obj->ioBufDesc.inPadR[0];
     // input_sizes[1] = ioBufDesc->inHeight[0] + obj->ioBufDesc.inPadT[0] + obj->ioBufDesc.inPadB[0];
     // input_sizes[2] = ioBufDesc->inNumChannels[0];
     width = ioBufDesc->outWidth[id];
     height = ioBufDesc->outHeight[id];
     outPadL = obj->ioBufDesc.outPadL[id];
     outPadT = obj->ioBufDesc.outPadT[id];
     // inPadL = obj->ioBufDesc.inPadL[0];
     // inPadT = obj->ioBufDesc.inPadT[0];
 
     status = vxGetStatus((vx_reference)output_tensors[id]);
 
     if(VX_SUCCESS == status)
     {
       void *output_buffer;
       vx_map_id map_id_output, map_id_input;
       vx_size output_strides[APP_MAX_TENSOR_DIMS];
       vx_size output_start[APP_MAX_TENSOR_DIMS];
 
       output_start[0] = output_start[1] = output_start[2] = output_start[3] = 0;
     //   input_start[0] = input_start[1] = input_start[2] = input_start[3] = 0;
 
       output_strides[0] = 1;
       output_strides[1] = output_sizes[0];
       output_strides[2] = output_sizes[1] * output_strides[1];
     //   input_strides[0] = 1;
     //   input_strides[1] = input_sizes[0];
     //   input_strides[2] = input_sizes[1] * input_strides[1];
     vx_char new_name[256];
     // snprintf(new_name, APP_MAX_FILE_PATH-1, "%s%s%d%s", obj->output_file_path, "/_tensor", id, ".bin");
     snprintf(new_name, APP_MAX_FILE_PATH-1, "%s%s%s%s%d%s", obj->output_file_path,"/bin/",file_name_temp,"_", id, ".bin");
     FILE *fp = fopen(new_name, "wb");
     if(NULL == fp)
     {
         printf("Unable to open file %s for writing!\n", new_name);
         break;
     }
 
       tivxMapTensorPatch(output_tensors[id], 3, output_start, output_sizes, &map_id_output, output_strides, &output_buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
     //   tivxMapTensorPatch(input_tensors[0], 3, input_start, input_sizes, &map_id_input, input_strides, &input_buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
 
       {
         vx_float32 *pOut;
         pOut = (vx_float32 *)output_buffer;
         // pIn = (uint8_t *)input_buffer;
         // printf("模型没加argmax \n");
 
         for(int32_t c = 0; c < ioBufDesc->outNumChannels[id]; c++){
            for(int32_t i = 0; i < height; i++){
             vx_float32 *pOut_1 = pOut + c * output_sizes[0] * output_sizes[1] + output_sizes[0]*(outPadT + i) + outPadL;
             vx_float32 zh = pOut[c * output_sizes[0] * output_sizes[1] + output_sizes[0]*(outPadT + i) + outPadL];
             fwrite(pOut_1, 4, ioBufDesc->outWidth[id], fp);
            }
         }
         fclose(fp);
       }
       tivxUnmapTensorPatch(output_tensors[id], map_id_output);
     //   tivxUnmapTensorPatch(input_tensors[0], map_id_input);
     }
   }
 }
 
 static vx_status app_run_graph_for_one_frame(AppObj *obj, char *curFileName, vx_uint32 counter)
 {
     vx_status status = VX_SUCCESS;
 
     vx_char input_file_name[APP_MAX_FILE_PATH];
     vx_char output_file_name[APP_MAX_FILE_PATH];
     vx_uint32 tensor_actual_checksum = 0, display_actual_checksum = 0;
 
     appPerfPointBegin(&obj->total_perf);
 
     snprintf(input_file_name, APP_MAX_FILE_PATH-1, "%s/%s",
           obj->input_file_path,
           curFileName
           );
 
     APP_PRINTF("app_tidl: Reading input file %s ... ", input_file_name);
 
     appPerfPointBegin(&obj->fileio_perf);
 
     /* Read input from file and poplulate the input tensors */
     if(status == VX_SUCCESS)
     {
         status = readInput(obj, obj->context, obj->config, obj->input_tensors, &input_file_name[0]);
     }
     appPerfPointEnd(&obj->fileio_perf);
 
     APP_PRINTF("Done!\n");
 
 #ifdef APP_WRITE_PRE_PROC_OUTPUT
     vx_char pre_proc_file_name[APP_MAX_FILE_PATH];
 
     snprintf(pre_proc_file_name, APP_MAX_FILE_PATH-1, "%s/%s",
           obj->output_file_path,
           "pre_proc_out"
           );
 
     writePreProcOutput(pre_proc_file_name, obj->input_tensors[0]);
 #endif
 
     uint64_t cur_time;
     APP_PRINTF("app_tidl: Running Graph ... ");
     /* Execute the network */
     if(status == VX_SUCCESS)
     {
           cur_time = tivxPlatformGetTimeInUsecs();
         status = vxProcessGraph(obj->graph);
           cur_time = tivxPlatformGetTimeInUsecs() - cur_time;
     }
     APP_PRINTF("Done!\n");
     APP_PRINTF("20260122==============!\n");
     printf("time of rmfsd: %d us \n", (int)cur_time);
     if(status == VX_SUCCESS)
     {
         saveOutput_tensor(obj, obj->output_tensors, obj->input_tensors,curFileName);
         // saveOutput_tensor_16bit(obj, obj->output_tensors, obj->input_tensors,curFileName);
     }
     if(status == VX_SUCCESS)
     {
         saveOutput(obj, obj->output_tensors, obj->input_tensors,curFileName);
         appPerfPointBegin(&obj->draw_perf);
         appPerfPointEnd(&obj->draw_perf);
     }
     tivx_utils_bmp_read_release(&obj->imgParams);
 //    if(status == VX_SUCCESS)
 //    {
 //        snprintf(output_file_name, APP_MAX_FILE_PATH-1, "%s/%s",
 //              obj->output_file_path,
 //              curFileName
 //              );
 //        
 //    saveOutput(obj, obj->output_tensors, obj->input_tensors,curFileName);
    
 //        appPerfPointBegin(&obj->draw_perf);
    
 //        /* Display the output */
 //        displayOutput(obj, obj->config, obj->output_tensors, &output_file_name[0]);
    
    
 //        appPerfPointEnd(&obj->draw_perf);
    
 //        if ((vx_true_e == tivxIsTargetEnabled(TIVX_TARGET_DISPLAY1)) && (obj->display_option == 1)) {
 //            /* At this point, the output is ready to copy the updated buffer */
 //            if(status == VX_SUCCESS)
 //            {
 //                status = vxCopyImagePatch(obj->disp_image,
 //                                        &obj->disp_rect,
 //                                        0,
 //                                        &obj->image_addr,
 //                                        (void *)obj->pDisplayBuf888,
 //                                        VX_WRITE_ONLY,
 //                                        VX_MEMORY_TYPE_HOST
 //                                        );
 //            }
 //            APP_PRINTF("app_tidl: Running display graph ... \n");
 //            /* Execute the display graph */
 //            if(status == VX_SUCCESS)
 //            {
 //                status = vxProcessGraph(obj->disp_graph);
 //            }
 //            APP_PRINTF("app_tidl: Running display graph ... Done.\n");
 //        }
 //        /* Check that you are within the first n frames, where n is the number
 //            of samples in the checksums_expected */
 //        if ((obj->test_mode == 1) &&
 //            (counter < ((sizeof(checksums_expected[0])/sizeof(checksums_expected[0][0]))-1)))
 //        {
 //            /* numOutputbuf is 1 here, but for the sake of generalizing
 //                in the future, the loop will remain */
 //            if(vx_false_e == app_test_check_tensor(obj->output_tensors[0],
 //                                                    getTensorDataType(obj->ioBufDesc.outElementType[0]),
 //                                                    obj->ioBufDesc.outWidth[0], obj->ioBufDesc.outPadL[0],
 //                                                    obj->ioBufDesc.outPadR[0], obj->ioBufDesc.outHeight[0],
 //                                                    obj->ioBufDesc.outPadT[0], obj->ioBufDesc.outPadB[0],
 //                                                    obj->ioBufDesc.outNumChannels[0], checksums_expected[obj->test_case*2][counter],
 //                                                    &tensor_actual_checksum))
 //            {
 //                test_result = vx_false_e;
 //            }
 //            /* in case test fails and needs to change */
 //            populate_gatherer(0 + (obj->test_case*2), counter, tensor_actual_checksum);
 //#ifndef x86_64
 //            if(vx_false_e == app_test_check_image(obj->disp_image, checksums_expected[1+(2*obj->test_case)][counter],
 //                                                    &display_actual_checksum))
 //            {
 //                test_result = vx_false_e;
 //            }
 //            populate_gatherer(1 + (obj->test_case*2), counter, display_actual_checksum);
 //#endif
    
 //        }
 //    }
 
     appPerfPointEnd(&obj->total_perf);
 
 #ifdef APP_TIDL_TRACE_DUMP
     tivx_utils_tidl_trace_write(obj->traceData, curFileName);
 #endif
     // printf("exit_begin\n");
     // exit(0);
     // printf("exit_end\n");
 
     return status;
 }
 
 static vx_status app_run_graph(AppObj *obj)
 {
     vx_status status = VX_SUCCESS;
     vx_char curFileName[APP_MAX_FILE_PATH];
     uint64_t cur_time;
     uint64_t max_frames = INT64_MAX;
     vx_uint32 counter = 0;
     FILE* test_case_file;
     uint32_t cur_iteration;
 
     printf("network file: %s\n", obj->tidl_network_file_path);
     printf("config  file: %s\n", obj->tidl_config_file_path);
 
     if (obj->test_mode == 1)
     {
         /* This way if the application failed at any point up to here
             the test mode would return a failure */
         test_result = vx_true_e;
         obj->num_iterations = 1;
         max_frames = (sizeof(checksums_expected[0])/sizeof(checksums_expected[0][0])) + TEST_BUFFER;
     }
     for(cur_iteration=0; cur_iteration<obj->num_iterations; cur_iteration++)
     {
         printf("Iteration %d of %d ... \n", cur_iteration, obj->num_iterations);
 
         test_case_file =  fopen(obj->input_file_list,"r");
         if(test_case_file==NULL)
         {
             break;
         }
         int zh = 0;
         while (fgets(curFileName, sizeof(curFileName), test_case_file) && (counter < max_frames))
         {
             curFileName[strlen(curFileName) - 1] = 0;
             zh ++;
 
             cur_time = tivxPlatformGetTimeInUsecs();
 
             APP_PRINTF("Classifying input %s ...\n", curFileName);
             if(status == VX_SUCCESS)
             {
                 status = app_run_graph_for_one_frame(obj, curFileName, counter++);
             }
             printf("完成图像张数: %d\n", zh);
             APP_PRINTF("Classifying input %s ...Done!\n", curFileName);
 
             cur_time = tivxPlatformGetTimeInUsecs() - cur_time;
             /* convert to msecs */
             cur_time = cur_time;
 
             printf("cur_time:==%ld\n",cur_time);
 
             if(cur_time < obj->delay_in_msecs)
             {
                 tivxTaskWaitMsecs(obj->delay_in_msecs - cur_time);
             }
             
             /* user asked to stop processing */
             if(obj->stop_task || (status != VX_SUCCESS))
             {
                 break;
             }
        }
        fclose(test_case_file);
 
        if(obj->stop_task || (status != VX_SUCCESS))
        {
            break;
        }
     }
     printf("exit_begin\n");
     exit(0);
     printf("exit_end\n");
     return status;
 }
 
 static vx_user_data_object readConfig(AppObj *obj, vx_context context, char *config_file, uint32_t *num_input_tensors, uint32_t *num_output_tensors)
 {
     vx_status status = VX_SUCCESS;
 
     tivxTIDLJ7Params  *tidlParams = NULL;
     sTIDL_IOBufDesc_t *ioBufDesc = NULL;
     vx_user_data_object   config = NULL;
     vx_uint32 capacity;
     vx_map_id map_id;
 
     FILE *fp_config;
     vx_size read_count;
 
     APP_PRINTF("app_tidl: Reading config file %s ...\n", config_file);
 
     fp_config = fopen(config_file, "rb");
 
     if(fp_config == NULL)
     {
         printf("app_tidl: ERROR: Unable to open IO config file %s \n", config_file);
 
         return NULL;
     }
 
     fseek(fp_config, 0, SEEK_END);
     capacity = ftell(fp_config);
     fseek(fp_config, 0, SEEK_SET);
 
     if( capacity != sizeof(sTIDL_IOBufDesc_t) )
     {
         printf("app_tidl: ERROR: Config file size (%d bytes) does not match size of sTIDL_IOBufDesc_t (%d bytes)\n", capacity, (vx_uint32)sizeof(sTIDL_IOBufDesc_t));
         fclose(fp_config);
         return NULL;
     }
 
 
     /* Create a user struct type for handling config data*/
     config = vxCreateUserDataObject(context, "tivxTIDLJ7Params", sizeof(tivxTIDLJ7Params), NULL );
 
     status = vxGetStatus((vx_reference)config);
 
     if (VX_SUCCESS == status)
     {
         status = vxMapUserDataObject(config, 0, sizeof(tivxTIDLJ7Params), &map_id,
                             (void **)&tidlParams, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0);
 
         if (VX_SUCCESS == status)
         {
             if(tidlParams == NULL)
             {
               printf("app_tidl: ERROR: Map of config object failed\n");
               fclose(fp_config);
               return NULL;
             }
 
             tivx_tidl_j7_params_init(tidlParams);
 
             ioBufDesc = (sTIDL_IOBufDesc_t *)&tidlParams->ioBufDesc;
 
             read_count = fread(ioBufDesc, capacity, 1, fp_config);
             if(read_count != 1)
             {
               printf("app_tidl: ERROR: Unable to read config file\n");
             }
             fclose(fp_config);
 
             memcpy(&obj->ioBufDesc, ioBufDesc, capacity);
 
             *num_input_tensors  = ioBufDesc->numInputBuf;
             *num_output_tensors = ioBufDesc->numOutputBuf;
             if(status == VX_SUCCESS)
             {
                 status = vxUnmapUserDataObject(config, map_id);
             }
         }
         else
         {
             fclose(fp_config);
         }
     }
     else
     {
         fclose(fp_config);
     }
 
     APP_PRINTF("app_tidl: Reading config file %s ... Done. %d bytes\n", config_file, (uint32_t)capacity);
     APP_PRINTF("app_tidl: Tensors, input = %d, output = %d\n", *num_input_tensors, *num_output_tensors);
 
     return config;
 }
 
 static vx_user_data_object readNetwork(vx_context context, char *network_file)
 {
     vx_status status;
 
     vx_user_data_object  network;
     vx_map_id  map_id;
     vx_uint32  capacity;
     void      *network_buffer = NULL;
     vx_size read_count;
 
     FILE *fp_network;
 
     APP_PRINTF("app_tidl: Reading network file %s ... \n", network_file);
 
     fp_network = fopen(network_file, "rb");
 
     if(fp_network == NULL)
     {
         printf("app_tidl: ERROR: Unable to open network file %s \n", network_file);
 
         return NULL;
     }
     fseek(fp_network, 0, SEEK_END);
     capacity = ftell(fp_network);
     fseek(fp_network, 0, SEEK_SET);
 
     network = vxCreateUserDataObject(context, "TIDL_network", capacity, NULL );
 
     status = vxGetStatus((vx_reference)network);
 
     if (VX_SUCCESS == status)
     {
         status = vxMapUserDataObject(network, 0, capacity, &map_id,
                         (void **)&network_buffer, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0);
 
         if (VX_SUCCESS == status)
         {
             if(network_buffer)
             {
                 read_count = fread(network_buffer, capacity, 1, fp_network);
                 if(read_count != 1)
                 {
                     printf("app_tidl: ERROR: Unable to read network file\n");
                 }
                 fclose(fp_network);
             }
             else
             {
                 printf("app_tidl: ERROR: Unable to allocate memory for reading network file of %d bytes\n", capacity);
                 fclose(fp_network);
             }
             if(status == VX_SUCCESS)
             {
                 status = vxUnmapUserDataObject(network, map_id);
             }
         }
         else
         {
             fclose(fp_network);
         }
     }
     else
     {
         fclose(fp_network);
     }
 
     APP_PRINTF("app_tidl: Reading network file %s ... Done. %d bytes\n", network_file, (uint32_t)capacity);
 
     return network;
 }
 
 static vx_user_data_object setCreateParams(vx_context context)
 {
     vx_status status;
 
     vx_user_data_object  createParams;
     vx_map_id  map_id;
     vx_uint32  capacity;
     void *createParams_buffer = NULL;
 
     capacity = sizeof(TIDL_CreateParams);
     createParams = vxCreateUserDataObject(context, "TIDL_CreateParams", capacity, NULL );
 
     status = vxGetStatus((vx_reference)createParams);
 
     if (VX_SUCCESS == status)
     {
         status = vxMapUserDataObject(createParams, 0, capacity, &map_id,
                         (void **)&createParams_buffer, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0);
 
         if (VX_SUCCESS == status)
         {
             if(createParams_buffer)
             {
                 TIDL_CreateParams *prms = createParams_buffer;
 
                 TIDL_createParamsInit(prms);
 
                 prms->isInbufsPaded                 = 1;
                 prms->quantRangeExpansionFactor     = 1.0;
                 prms->quantRangeUpdateFactor        = 0.0;
 #ifdef x86_64
                 prms->flowCtrl                      = 1;
 #endif
 #ifdef APP_TIDL_TRACE_DUMP
                 prms->traceLogLevel                 = 1;
                 prms->traceWriteLevel               = 1;
 #else
                 prms->traceLogLevel                 = 0;
                 prms->traceWriteLevel               = 0;
 #endif
 
             }
             else
             {
                 printf("app_tidl: ERROR: Unable to allocate memory for create time params! %d bytes\n", capacity);
             }
 
             vxUnmapUserDataObject(createParams, map_id);
         }
     }
 
     return createParams;
 }
 
 static vx_user_data_object setInArgs(vx_context context)
 {
     vx_status status;
 
     vx_user_data_object  inArgs;
     vx_map_id  map_id;
     vx_uint32  capacity;
     void *inArgs_buffer = NULL;
 
     capacity = sizeof(TIDL_InArgs);
     inArgs = vxCreateUserDataObject(context, "TIDL_InArgs", capacity, NULL );
 
     status = vxGetStatus((vx_reference)inArgs);
 
     if (VX_SUCCESS == status)
     {
         status = vxMapUserDataObject(inArgs, 0, capacity, &map_id,
                         (void **)&inArgs_buffer, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0);
 
         if (VX_SUCCESS == status)
         {
             if(inArgs_buffer)
             {
               TIDL_InArgs *prms = inArgs_buffer;
               prms->iVisionInArgs.size         = sizeof(TIDL_InArgs);
               prms->iVisionInArgs.subFrameInfo = 0;
             }
             else
             {
                 printf("app_tidl: Unable to allocate memory for inArgs! %d bytes\n", capacity);
             }
 
             vxUnmapUserDataObject(inArgs, map_id);
         }
     }
 
     return inArgs;
 }
 
 static vx_user_data_object setOutArgs(vx_context context)
 {
     vx_status status;
 
     vx_user_data_object  outArgs;
     vx_map_id  map_id;
     vx_uint32  capacity;
     void *outArgs_buffer = NULL;
 
     capacity = sizeof(TIDL_outArgs);
     outArgs = vxCreateUserDataObject(context, "TIDL_outArgs", capacity, NULL );
 
     status = vxGetStatus((vx_reference)outArgs);
 
     if (VX_SUCCESS == status)
     {
         status = vxMapUserDataObject(outArgs, 0, capacity, &map_id,
                         (void **)&outArgs_buffer, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0);
 
         if (VX_SUCCESS == status)
         {
             if(outArgs_buffer)
             {
               TIDL_outArgs *prms = outArgs_buffer;
               prms->iVisionOutArgs.size         = sizeof(TIDL_outArgs);
             }
             else
             {
                 printf("app_tidl: Unable to allocate memory for outArgs! %d bytes\n", capacity);
             }
 
             vxUnmapUserDataObject(outArgs, map_id);
         }
     }
 
     return outArgs;
 }
 
 static void createInputTensors(AppObj *obj, vx_context context, vx_user_data_object config, vx_tensor *input_tensors)
 {
     vx_size   input_sizes[APP_MAX_TENSOR_DIMS];
 
     uint32_t id;
 
     sTIDL_IOBufDesc_t *ioBufDesc = &obj->ioBufDesc;
 
     for(id = 0; id < ioBufDesc->numInputBuf; id++) {
 
         input_sizes[0] = ioBufDesc->inWidth[id]  + ioBufDesc->inPadL[id] + ioBufDesc->inPadR[id];
         input_sizes[1] = ioBufDesc->inHeight[id] + ioBufDesc->inPadT[id] + ioBufDesc->inPadB[id];
         input_sizes[2] = ioBufDesc->inNumChannels[id];
 
         vx_size data_type = getTensorDataType(ioBufDesc->inElementType[id]);
 
         if(data_type != VX_TYPE_INVALID)
           input_tensors[id] = vxCreateTensor(context, 3, input_sizes, data_type, 0);
     }
 
     return;
 }
 
 static void createOutputTensors(AppObj *obj, vx_context context, vx_user_data_object config, vx_tensor *output_tensors)
 {
     vx_size output_sizes[APP_MAX_TENSOR_DIMS];
 
     uint32_t id;
 
     sTIDL_IOBufDesc_t *ioBufDesc = &obj->ioBufDesc;
 
     for(id = 0; id < ioBufDesc->numOutputBuf; id++) {
 
         output_sizes[0] = ioBufDesc->outWidth[id]  + ioBufDesc->outPadL[id] + ioBufDesc->outPadR[id];
         output_sizes[1] = ioBufDesc->outHeight[id] + ioBufDesc->outPadT[id] + ioBufDesc->outPadB[id];
         output_sizes[2] = ioBufDesc->outNumChannels[id];
 
         vx_size data_type = getTensorDataType(ioBufDesc->outElementType[id]);
 
         if(data_type != VX_TYPE_INVALID)
         {
           output_tensors[id] = vxCreateTensor(context, 3, output_sizes, data_type, 0);
         }
     }
     return;
 }
 static void resizeImage(uint8_t *inImg, uint8_t *outImg, uint32_t inWidth, uint32_t inHeight, uint32_t inStride, uint32_t outWidth, uint32_t outHeight, uint32_t outStride)
 {
 
   float xScale = (inWidth * 1.0f) / (outWidth * 1.0f);
   float yScale = (inHeight * 1.0f) / (outHeight * 1.0f);
 
   uint8_t border_constant_value = 128;
 
   int32_t srcOffsetX = 0;
   int32_t srcOffsetY = 0;
   int32_t dstOffsetX = 0;
   int32_t dstOffsetY = 0;
 
   int32_t x, y, ch;
 
   for( ch = 0; ch < 3; ch++) {
     uint8_t *pIn  = inImg + (ch * inStride * inHeight);
     uint8_t *pOut = outImg + (ch * outStride * outHeight);
     for( y = 0; y < outHeight; y++ ) {
       for( x = 0; x < outWidth; x++ ) {
 
         /* Apply scale factors to find input pixel for each output pixel */
         float src_x_f = ((float)(x+dstOffsetX) + 0.5f)*xScale - 0.5f;
         float src_y_f = ((float)(y+dstOffsetY) + 0.5f)*yScale - 0.5f;
 
         float xf = floorf(src_x_f);
         float yf = floorf(src_y_f);
         float dx = src_x_f - xf;
         float dy = src_y_f - yf;
         float a[4];
 
         int32_t src_x = (int32_t)(xf) - srcOffsetX;
         int32_t src_y = (int32_t)(yf) - srcOffsetY;
 
         uint8_t tl = 0;
         uint8_t tr = 0;
         uint8_t bl = 0;
         uint8_t br = 0;
 
         a[0] = (1.0f - dx) * (1.0f - dy);
         a[1] = (1.0f - dx) * (dy);
         a[2] = (dx)* (1.0f - dy);
         a[3] = (dx)* (dy);
 
         tl = (src_x < 0 || src_y < 0 || src_x > (inWidth-1) || src_y > (inHeight-1) ) ?
              border_constant_value :
              pIn[(src_y*inStride) + src_x];
         tr = ((src_x+1) < 0 || src_y < 0 || (src_x+1) > (inWidth-1) || src_y > (inHeight-1) ) ?
              border_constant_value :
              pIn[(src_y*inStride) + src_x + 1];
         bl = (src_x < 0 || (src_y+1) < 0 || src_x > (inWidth-1) || (src_y+1) > (inHeight-1) ) ?
              border_constant_value :
              pIn[((src_y+1)*inStride) + src_x];
         br = ((src_x+1) < 0 || (src_y+1) < 0 || (src_x+1) > (inWidth-1) || (src_y+1) > (inHeight-1) ) ?
              border_constant_value :
              pIn[((src_y+1)*inStride) + src_x + 1];
 
         pOut[(y*outStride) + x] = (uint8_t)(a[0]*tl + a[2]*tr + a[1]*bl + a[3]*br + 0.5f);
       }
     }
   }
 
   return;
 }
 
 static vx_status readInput(AppObj *obj, vx_context context, vx_user_data_object config, vx_tensor *input_tensors, char *input_file)
 {
     vx_status status = VX_SUCCESS;
 
     tivx_utils_bmp_image_params_t *imgParams;
     void      *input_buffer = NULL;
     int32_t    capacity;
     uint32_t   id;
 
     vx_map_id map_id_input;
 
     vx_size    start[APP_MAX_TENSOR_DIMS];
     vx_size    input_strides[APP_MAX_TENSOR_DIMS];
     vx_size    input_sizes[APP_MAX_TENSOR_DIMS];
 
     sTIDL_IOBufDesc_t *ioBufDesc = &obj->ioBufDesc;
 
     for(id = 0; id < ioBufDesc->numInputBuf; id++)
     {
         vx_size data_type = getTensorDataType(ioBufDesc->inElementType[id]);
 
         input_sizes[0] = ioBufDesc->inWidth[id]  + ioBufDesc->inPadL[id] + ioBufDesc->inPadR[id];
         input_sizes[1] = ioBufDesc->inHeight[id] + ioBufDesc->inPadT[id] + ioBufDesc->inPadB[id];
         input_sizes[2] = ioBufDesc->inNumChannels[id];
 
         capacity = input_sizes[0] * input_sizes[1] * input_sizes[2];
 
         start[0] = start[1] = start[2] = 0;
 
         input_strides[0] = sizeof(vx_int8);
 
         if((data_type == VX_TYPE_INT8) ||
            (data_type == VX_TYPE_UINT8))
         {
             input_strides[0] = sizeof(vx_int8);
         }
         else if((data_type == VX_TYPE_INT16) ||
                 (data_type == VX_TYPE_UINT16))
         {
             input_strides[0] = sizeof(vx_int16);
         }
 
         input_strides[1] = input_sizes[0] * input_strides[0];
         input_strides[2] = input_sizes[1] * input_strides[1];
 
         APP_PRINTF(" input_sizes[0] = %d, dim = %d padL = %d padR = %d\n", (uint32_t)input_sizes[0], ioBufDesc->inWidth[id], ioBufDesc->inPadL[id], ioBufDesc->inPadR[id]);
         APP_PRINTF(" input_sizes[1] = %d, dim = %d padT = %d padB = %d\n", (uint32_t)input_sizes[1], ioBufDesc->inHeight[id], ioBufDesc->inPadT[id], ioBufDesc->inPadB[id]);
         APP_PRINTF(" input_sizes[2] = %d, dim = %d \n", (uint32_t)input_sizes[2], ioBufDesc->inNumChannels[id]);
 
         if(status == VX_SUCCESS)
         {
             status = tivxMapTensorPatch(input_tensors[id], 3, start, input_sizes, &map_id_input, input_strides, &input_buffer, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
         }
         if (VX_SUCCESS == status)
         {
             vx_df_image df_image;
             void *data_ptr = NULL;
             vx_uint32 img_width;
             vx_uint32 img_height;
             vx_uint32 img_stride;
             vx_int32 start_offset;
             vx_uint8 *pData;
             vx_uint8 *pInPlanes;
             vx_uint8 *pOutPlanes;
             vx_int32 i, j;
 
             APP_PRINTF("app_tidl: Reading bmp file ...\n" );
 
             imgParams = &obj->imgParams;
             status = tivx_utils_bmp_file_read(
                         input_file,
                         vx_false_e,
                         imgParams);
 
             if(status != VX_SUCCESS)
             {
                 printf("app_tidl: Reading bmp file ... Failed !!!\n" );
             }
             else
             {
                 img_width  = imgParams->width;
                 img_height = imgParams->height;
                 img_stride = imgParams->stride_y;
                 df_image   = imgParams->format;
                 data_ptr   = imgParams->data;
 
                 APP_PRINTF("app_tidl: Reading bmp file ... Done.\n" );
 
                 APP_PRINTF("app_tidl: Image Pre processing for image of size %d x %d (pitch = %d bytes)...\n", img_width, img_height, img_stride);
 
                 APP_PRINTF("app_tidl: Deinterleaving data ...\n");
 
                 /* Save image params which are used in displayOutput function */
                 obj->img_width   = img_width;
                 obj->img_height  = img_height;
                 obj->img_stride  = img_stride;
                 obj->data_ptr    = data_ptr;
                 obj->df_image    = df_image;
 
                 pInPlanes = (vx_uint8 *)obj->pInPlanes;
                 pOutPlanes = (vx_uint8 *)obj->pOutPlanes;
 
                 /* Deinterleave RGB to planar */
                 uint32_t chOffset = img_width * img_height;
                 for(i = 0; i < img_height; i ++)
                 {
                     pData = (vx_uint8 *)data_ptr + (i * img_stride);
                     for(j = 0; j < img_width; j ++)
                     {
                         pInPlanes[(0 * chOffset) + (i * img_width) + j] = *pData++;
                         pInPlanes[(1 * chOffset) + (i * img_width) + j] = *pData++;
                         pInPlanes[(2 * chOffset) + (i * img_width) + j] = *pData++;
                     }
                 }
 
                 APP_PRINTF("app_tidl: Resizing image ...\n");
 
                 /* Resize image to network input resolution */
                 //resizeImage(pInPlanes, pOutPlanes, img_width, img_height, img_width, 256, 256, 256);
             resizeImage(pInPlanes, pOutPlanes, img_width, img_height, img_width, ioBufDesc->inWidth[id], ioBufDesc->inHeight[id], ioBufDesc->inWidth[id]);
 
                 APP_PRINTF("app_tidl: Rearranging data ...\n");
 
                 if((data_type == VX_TYPE_INT8) ||
                    (data_type == VX_TYPE_UINT8))
                 {
                     vx_uint8 *pR = NULL;
                     vx_uint8 *pG = NULL;
                     vx_uint8 *pB = NULL;
 
                     /* Reset the input buffer, this will take care of padding requirement for TIDL */
                     memset(input_buffer, 0, capacity);
 
                     if(ioBufDesc->inDataFormat[id] == 1) /* RGB */
                     {
                         start_offset = (0 * input_strides[2]) + (ioBufDesc->inPadT[id] * input_strides[1]) + (ioBufDesc->inPadL[id] * input_strides[0]);
                         pR = (vx_uint8 *)input_buffer + start_offset;
 
                         start_offset = (1 * input_strides[2]) + (ioBufDesc->inPadT[id] * input_strides[1]) + (ioBufDesc->inPadL[id] * input_strides[0]);
                         pG = (vx_uint8 *)input_buffer + start_offset;
 
                         start_offset = (2 * input_strides[2]) + (ioBufDesc->inPadT[id] * input_strides[1]) + (ioBufDesc->inPadL[id] * input_strides[0]);
                         pB = (vx_uint8 *)input_buffer + start_offset;
                     }
                     else if(ioBufDesc->inDataFormat[id] == 0) /* BGR */
                     {
                         start_offset = (0 * input_strides[2]) + (ioBufDesc->inPadT[id] * input_strides[1]) + (ioBufDesc->inPadL[id] * input_strides[0]);
                         pB = (vx_uint8 *)input_buffer + start_offset;
 
                         start_offset = (1 * input_strides[2]) + (ioBufDesc->inPadT[id] * input_strides[1]) + (ioBufDesc->inPadL[id] * input_strides[0]);
                         pG = (vx_uint8 *)input_buffer + start_offset;
 
                         start_offset = (2 * input_strides[2]) + (ioBufDesc->inPadT[id] * input_strides[1]) + (ioBufDesc->inPadL[id] * input_strides[0]);
                         pR = (vx_uint8 *)input_buffer + start_offset;
                     }
 
                     //chOffset = 256*256;
                 chOffset = ioBufDesc->inWidth[id]*ioBufDesc->inHeight[id];
 
                     if ( (NULL != pR) && (NULL != pG) && (NULL != pB) )
                     {
                         /* Write image in network required format */
                         for(i = 0; i < ioBufDesc->inHeight[id]; i++)
                         {
                             //Center crop 224x224 image from 256x256 resized input
                             //uint32_t offset = ((16 + i) * 256) + 16;
                       uint32_t offset = i * ioBufDesc->inWidth[id];
 
                             for(j = 0; j < ioBufDesc->inWidth[id]; j++)
                             {
                                 pR[j] = pOutPlanes[(0 * chOffset) + offset + j];
                                 pG[j] = pOutPlanes[(1 * chOffset) + offset + j];
                                 pB[j] = pOutPlanes[(2 * chOffset) + offset + j];
                             }
 
                             pR += input_sizes[0];
                             pG += input_sizes[0];
                             pB += input_sizes[0];
                         }
                     }
                 }
                 else if((data_type == VX_TYPE_INT16) ||
                         (data_type == VX_TYPE_UINT16))
                 {
                     vx_uint16 *pR = NULL;
                     vx_uint16 *pG = NULL;
                     vx_uint16 *pB = NULL;
 
                     /* Reset the input buffer, this will take care of padding requirement for TIDL */
                     memset(input_buffer, 0, capacity);
 
                     if(ioBufDesc->inDataFormat[id] == 1) /* RGB */
                     {
                         start_offset = (0 * input_strides[2]) + (ioBufDesc->inPadT[id] * input_strides[1]) + (ioBufDesc->inPadL[id] * input_strides[0]);
                         pR = (vx_uint16 *)((vx_uint8 *)input_buffer + start_offset);
 
                         start_offset = (1 * input_strides[2]) + (ioBufDesc->inPadT[id] * input_strides[1]) + (ioBufDesc->inPadL[id] * input_strides[0]);
                         pG = (vx_uint16 *)((vx_uint8 *)input_buffer + start_offset);
 
                         start_offset = (2 * input_strides[2]) + (ioBufDesc->inPadT[id] * input_strides[1]) + (ioBufDesc->inPadL[id] * input_strides[0]);
                         pB = (vx_uint16 *)((vx_uint8 *)input_buffer + start_offset);
                     }
                     else if(ioBufDesc->inDataFormat[id] == 0) /* BGR */
                     {
                         start_offset = (0 * input_strides[2]) + (ioBufDesc->inPadT[id] * input_strides[1]) + (ioBufDesc->inPadL[id] * input_strides[0]);
                         pB = (vx_uint16 *)((vx_uint8 *)input_buffer + start_offset);
 
                         start_offset = (1 * input_strides[2]) + (ioBufDesc->inPadT[id] * input_strides[1]) + (ioBufDesc->inPadL[id] * input_strides[0]);
                         pG = (vx_uint16 *)((vx_uint8 *)input_buffer + start_offset);
 
                         start_offset = (2 * input_strides[2]) + (ioBufDesc->inPadT[id] * input_strides[1]) + (ioBufDesc->inPadL[id] * input_strides[0]);
                         pR = (vx_uint16 *)((vx_uint8 *)input_buffer + start_offset);
                     }
 
                     chOffset = 256*256;
                 chOffset = ioBufDesc->inWidth[id]*ioBufDesc->inHeight[id];
 
                     if ( (NULL != pR) && (NULL != pG) && (NULL != pB) )
                     {
                         /* Write image in network required format */
                         for(i = 0; i < ioBufDesc->inHeight[id]; i++)
                         {
                             //Center crop 224x224 image from 256x256 resized input
                             //uint32_t offset = ((16 + i) * 256) + 16;
                             uint32_t offset = i * ioBufDesc->inWidth[id];
 
                             for(j = 0; j < ioBufDesc->inWidth[id]; j++)
                             {
                                 pR[j] = pOutPlanes[(0 * chOffset) + offset + j];
                                 pG[j] = pOutPlanes[(1 * chOffset) + offset + j];
                                 pB[j] = pOutPlanes[(2 * chOffset) + offset + j];
                             }
 
                             pR += input_sizes[0];
                             pG += input_sizes[0];
                             pB += input_sizes[0];
                         }
                     }
                 }
                 APP_PRINTF("app_tidl: Image Pre processing ... Done.\n");
             }
 
             tivxUnmapTensorPatch(input_tensors[id], map_id_input);
         }
     }
 
     return status;
 }
 
 static void displayOutput(AppObj *obj, vx_user_data_object config, vx_tensor *output_tensors, char *output_file)
 {
     vx_status status = VX_SUCCESS;
 
     vx_size output_sizes[APP_MAX_TENSOR_DIMS];
 
     int32_t id, i, j;
 
     sTIDL_IOBufDesc_t *ioBufDesc;
 
     Draw2D_FontPrm sClassPrm;
 
     ioBufDesc = &obj->ioBufDesc;
 
     for(id = 0; id < ioBufDesc->numOutputBuf; id++)
     {
         vx_size data_type = getTensorDataType(ioBufDesc->outElementType[id]);
 
         output_sizes[0] = ioBufDesc->outWidth[id]  + ioBufDesc->outPadL[id] + ioBufDesc->outPadR[id];
         output_sizes[1] = ioBufDesc->outHeight[id] + ioBufDesc->outPadT[id] + ioBufDesc->outPadB[id];
         output_sizes[2] = ioBufDesc->outNumChannels[id];
         if(status == VX_SUCCESS)
         {
             status = vxGetStatus((vx_reference)output_tensors[id]);
         }
         if (VX_SUCCESS == status)
         {
             void *output_buffer;
 
             vx_map_id map_id_output;
 
             vx_size output_strides[APP_MAX_TENSOR_DIMS];
             vx_size start[APP_MAX_TENSOR_DIMS];
 
             start[0] = start[1] = start[2] = start[3] = 0;
 
             output_strides[0] = sizeof(vx_int8);
 
             if((data_type == VX_TYPE_INT8) ||
                (data_type == VX_TYPE_UINT8))
             {
                 output_strides[0] = sizeof(vx_int8);
             }
             else if((data_type == VX_TYPE_INT16) ||
                     (data_type == VX_TYPE_UINT16))
             {
                 output_strides[0] = sizeof(vx_int16);
             }
             else if(data_type == VX_TYPE_FLOAT32)
             {
                 output_strides[0] = sizeof(vx_float32);
             }
 
             output_strides[1] = output_sizes[0] * output_strides[0];
             output_strides[2] = output_sizes[1] * output_strides[1];
 
             status = tivxMapTensorPatch(output_tensors[id], 3, start, output_sizes, &map_id_output, output_strides, &output_buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
 
             if(status == VX_SUCCESS)
             {
                 vx_uint32 classid[5] = {0};
                 vx_uint32 label_offset;
 
                 label_offset = 0;
                 if(ioBufDesc->outWidth[id] == 1001)
                 {
                     label_offset = 0;
                 }
                 else if(ioBufDesc->outWidth[id] == 1000)
                 {
                     label_offset = 1;
                 }
 
                 APP_PRINTF("app_tidl: Finding top-5 ... \n");
 
                 if(data_type == VX_TYPE_FLOAT32)
                 {
                     float *pOut;
                     float score[5];
 
                     pOut = (float *)output_buffer + (ioBufDesc->outPadT[id] * output_sizes[0]) + ioBufDesc->outPadL[id];
 
                     for(i = 0; i < 5; i++)
                     {
                         score[i] = FLT_MIN;
                         classid[i] = (vx_uint32)-1;
 
                         for(j = 0; j < ioBufDesc->outWidth[id]; j++)
                         {
                             if(pOut[j] > score[i])
                             {
                                 score[i] = pOut[j];
                                 classid[i] = j;
                             }
                         }
                         if(classid[i] < ioBufDesc->outWidth[id] )
                         {
                             pOut[classid[i]] = FLT_MIN;
                         }
                         else
                         {
                             classid[i] = 0; /* invalid class ID, ideally it should not reach here */
                         }
                     }
                     #ifdef APP_DEBUG
                     APP_PRINTF("app_tidl: Image classification Top-5 results: \n");
                     for(i = 0; i < 5; i++)
                     {
                         APP_PRINTF("app_tidl:  %s, class-id: %d, score: %f \n", (char *)&imgnet_labels[classid[i] + label_offset], classid[i], score[i]);
                     }
                     #endif
                 }
                 else if(data_type == VX_TYPE_INT16)
                 {
                     vx_int16 *pOut;
                     vx_int16 score[5];
 
                     pOut = (vx_int16 *)output_buffer + (ioBufDesc->outPadT[id] * output_sizes[0]) + ioBufDesc->outPadL[id];
 
                     for(i = 0; i < 5; i++)
                     {
                         score[i] = INT16_MIN;
                         classid[i] = (vx_uint32)-1;
 
                         for(j = 0; j < ioBufDesc->outWidth[id]; j++)
                         {
                             if(pOut[j] > score[i])
                             {
                                 score[i] = pOut[j];
                                 classid[i] = j;
                             }
                         }
                         if(classid[i] < ioBufDesc->outWidth[id] )
                         {
                             pOut[classid[i]] = INT16_MIN;
                         }
                         else
                         {
                             classid[i] = 0; /* invalid class ID, ideally it should not reach here */
                         }
                     }
                     #ifdef APP_DEBUG
                     APP_PRINTF("app_tidl: Image classification Top-5 results: \n");
                     for(i = 0; i < 5; i++)
                     {
                         APP_PRINTF("app_tidl:  %s, class-id: %d, score: %d \n", (char *)&imgnet_labels[classid[i] + label_offset], classid[i], score[i]);
                     }
                     #endif
                 }
                 else if(data_type == VX_TYPE_INT8)
                 {
                     vx_int8 *pOut;
                     vx_int8 score[5];
 
                     pOut = (vx_int8 *)output_buffer + (ioBufDesc->outPadT[id] * output_sizes[0]) + ioBufDesc->outPadL[id];
 
                     for(i = 0; i < 5; i++)
                     {
                         score[i] = INT8_MIN;
                         classid[i] = (vx_uint32)-1;
 
                         for(j = 0; j < ioBufDesc->outWidth[id]; j++)
                         {
                             if(pOut[j] > score[i])
                             {
                                 score[i] = pOut[j];
                                 classid[i] = j;
                             }
                         }
                         if(classid[i] < ioBufDesc->outWidth[id] )
                         {
                             pOut[classid[i]] = INT8_MIN;
                         }
                         else
                         {
                             classid[i] = 0; /* invalid class ID, ideally it should not reach here */
                         }
                     }
                     #ifdef APP_DEBUG
                     APP_PRINTF("app_tidl: Image classification Top-5 results: \n");
                     for(i = 0; i < 5; i++)
                     {
                         APP_PRINTF("app_tidl:  %s, class-id: %d, score: %d \n", (char *)&imgnet_labels[classid[i] + label_offset], classid[i], score[i]);
                     }
                     #endif
                 }
 
                 APP_PRINTF("app_tidl: Finding top-5 ... Done \n");
 
                 Draw2D_clearRegion(obj->pHndl, (DISPLAY_WIDTH/2) + 40, 200, 600, 300);
                 sClassPrm.fontIdx = 1;
                 for(i = 0; i < 5; i++)
                 {
                     Draw2D_drawString(obj->pHndl, (DISPLAY_WIDTH/2) + 40, 200 + (i * 40), (char *)&imgnet_labels[classid[i] + label_offset], &sClassPrm);
                 }
             }
             tivxUnmapTensorPatch(output_tensors[id], map_id_output);
         }
     }
 
     {
       /* Convert RGB565 to RGB888 before writing output */
       for(i = 0, j = 0; i < (DISPLAY_WIDTH * DISPLAY_HEIGHT); i++)
       {
         uint16_t RGB_565_val = obj->pDisplayBuf565[i];
 
         obj->pDisplayBuf888[j + 0] = (RGB_565_val & 0x1F) << 3;
         obj->pDisplayBuf888[j + 1] = ((RGB_565_val >> 5) & 0x3F) << 2;
         obj->pDisplayBuf888[j + 2] = ((RGB_565_val >> 11) & 0x1F) << 3;
 
         j  += 3;
       }
 
       if((obj->img_width <= (DISPLAY_WIDTH/2)) && (obj->img_height <= (DISPLAY_HEIGHT - 200)))
       {
          uint32_t startX = ((DISPLAY_WIDTH/2) / 2) - (obj->img_width / 2);
          uint32_t startY = ((DISPLAY_HEIGHT/2)  / 2) - (obj->img_height / 2);
          uint32_t imgOffset = 200;
 
          for(i = 0; i < obj->img_height; i++)
          {
             uint8_t *pOut = &obj->pDisplayBuf888[((imgOffset + startY + i) * DISPLAY_WIDTH * 3) + (startX * 3)];
             uint8_t *pIn  = obj->data_ptr + (i * obj->img_stride);
 
             for(j = 0; j < obj->img_width; j++)
             {
               *pOut++ = *pIn++;
               *pOut++ = *pIn++;
               *pOut++ = *pIn++;
             }
          }
       }
     }
 
     if (obj->display_option == 0)
     {
         tivx_utils_bmp_write(output_file, obj->pDisplayBuf888, DISPLAY_WIDTH, DISPLAY_HEIGHT, (DISPLAY_WIDTH * 3), obj->df_image);
     }
 
     /* Release the bmp buffer created in readInput() */
     tivx_utils_bmp_read_release(&obj->imgParams);
 
     APP_PRINTF("app_tidl: Showing output ... Done.\n");
 }
 
 static vx_size getTensorDataType(vx_int32 tidl_type)
 {
     vx_size openvx_type = VX_TYPE_INVALID;
 
     if (tidl_type == TIDL_UnsignedChar)
     {
         openvx_type = VX_TYPE_UINT8;
     }
     else if(tidl_type == TIDL_SignedChar)
     {
         openvx_type = VX_TYPE_INT8;
     }
     else if(tidl_type == TIDL_UnsignedShort)
     {
         openvx_type = VX_TYPE_UINT16;
     }
     else if(tidl_type == TIDL_SignedShort)
     {
         openvx_type = VX_TYPE_INT16;
     }
     else if(tidl_type == TIDL_UnsignedWord)
     {
         openvx_type = VX_TYPE_UINT32;
     }
     else if(tidl_type == TIDL_SignedWord)
     {
         openvx_type = VX_TYPE_INT32;
     }
     else if(tidl_type == TIDL_SinglePrecFloat)
     {
         openvx_type = VX_TYPE_FLOAT32;
     }
 
     return openvx_type;
 }
 #ifdef APP_WRITE_PRE_PROC_OUTPUT
 static vx_status writePreProcOutput(char* file_name, vx_tensor output)
 {
     vx_status status = VX_SUCCESS;
     vx_size num_dims;
     vx_enum data_type;
     void *data_ptr;
     vx_map_id map_id;
 
     vx_size    start[APP_MAX_TENSOR_DIMS];
     vx_size    tensor_strides[APP_MAX_TENSOR_DIMS];
     vx_size    tensor_sizes[APP_MAX_TENSOR_DIMS];
 
     vxQueryTensor(output, VX_TENSOR_NUMBER_OF_DIMS, &num_dims, sizeof(vx_size));
 
     if(num_dims != 3)
     {
       printf("Number of dims are != 3! exiting.. \n");
       return VX_FAILURE;
     }
 
     vxQueryTensor(output, (vx_enum)VX_TENSOR_DIMS, tensor_sizes, 3 * sizeof(vx_size));
     vxQueryTensor(output, (vx_enum)VX_TENSOR_DATA_TYPE, &data_type, sizeof(data_type));
 
     start[0] = start[1] = start[2] = 0;
 
     tensor_strides[0] = sizeof(vx_int8);
 
     if((data_type == VX_TYPE_INT8) ||
        (data_type == VX_TYPE_UINT8))
     {
         tensor_strides[0] = sizeof(vx_int8);
     }
     else if((data_type == VX_TYPE_INT16) ||
             (data_type == VX_TYPE_UINT16))
     {
         tensor_strides[0] = sizeof(vx_int16);
     }
     else if((data_type == VX_TYPE_FLOAT32))
     {
         tensor_strides[0] = sizeof(vx_float32);
     }
 
     tensor_strides[1] = tensor_strides[0] * tensor_strides[0];
     tensor_strides[2] = tensor_strides[1] * tensor_strides[1];
 
     status = tivxMapTensorPatch(output, num_dims, start, tensor_sizes, &map_id, tensor_strides, &data_ptr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
 
     if(VX_SUCCESS == status)
     {
       vx_char new_name[APP_MAX_FILE_PATH];
       snprintf(new_name, APP_MAX_FILE_PATH, "%s_%dx%d.rgb", file_name, (uint32_t)tensor_sizes[0], (uint32_t)tensor_sizes[1]);
 
       FILE *fp = fopen(new_name, "wb");
       if(NULL == fp)
       {
         printf("Unable to open file %s for writing!\n", new_name);
         return VX_FAILURE;
       }
 
       fwrite(data_ptr, 1, num_dims * tensor_strides[2], fp);
       fclose(fp);
 
       tivxUnmapTensorPatch(output, map_id);
     }
 
   return(status);
 }
     #endif

===============

我看完 0130 日志后,确定还有一个更关键的结构性问题,这能解释为什么修了 stride 和 map/unmap 仍没改善。


⚠ 根因:检测头索引错位(最关键)

当前模型输出中:

  • 检测头是 output[1], output[2], output[3]
  • 但 getMult_yolov5e2eResult() 内部固定用 head_idx=0..2

你在 main.c 里把检测头参数写入 index=1..3,

但解码函数只读 index=0..2。

这会导致:

  1. head_idx=0 的数据未初始化
  1. head_idx=1/2 读取的是错 head 的 anchors / params
  1. 结果就是坐标错位、整体几何错误

这也是为什么图像一直“差不多错”,不会随机炸掉,但永远不对。


✅ 我已经帮你把索引统一了

在 main.c 中做了 检测头索引重映射:

  • 将 zh_idx=1..3 对应到 dst_idx=0..2
  • 这样 getMult_yolov5e2eResult() 就会按 正确顺序 解码

同时日志也会打印:

det_head[1->0]

det_head[2->1]

det_head[3->2]


✅ 你明天只要做这几步(最快验证)

  1. 重新编译下位机程序
  1. 运行推理
  1. 看日志里是否出现 det_head[1->0]
  1. 再跑 draw_tensor2psd.py
  1. 看 _model.bmp 是否正常

为什么我确认这是关键问题?

  • 模型输出 head 的尺寸完全对应 P5/P4/P3:

19×23、38×46、76×92

  • 这些应该对应 head_idx=0/1/2
  • 但你实际把它们放在 index=1/2/3
  • 结果就是 anchors 与 head 对不上

这会导致框位置整体错误,但不会崩溃——非常符合你的现象。


如果这次改完仍不对,我会再帮你继续定位,但这个问题必须先修正。

明天你跑完日志和 _model.bmp 发给我,我们就能迅速确认是否闭环。

Logo

有“AI”的1024 = 2048,欢迎大家加入2048 AI社区

更多推荐