#include #include "armnn/IRuntime.hpp" #include "armnn/INetwork.hpp" #include "armnn/Types.hpp" #include "armnnDeserializer/IDeserializer.hpp" #include "armnnTfLiteParser/ITfLiteParser.hpp" #include "armnnOnnxParser/IOnnxParser.hpp" using namespace armnn; class Ann { public: int load(const char *modelPath, const char *inputName, const char *outputName, bool fastMath, bool saveCachedNetwork, const char *cachedNetworkPath) { BindingPointInfo inputInfo; BindingPointInfo outputInfo; INetworkPtr network = loadModel(modelPath, inputName, outputName, inputInfo, outputInfo); auto n = network.get(); IOptimizedNetworkPtr optNet = OptimizeNetwork(n, fastMath, saveCachedNetwork, cachedNetworkPath); NetworkId netId; Status status = runtime->LoadNetwork(netId, std::move(optNet)); inputInfos[netId] = inputInfo; outputInfos[netId] = outputInfo; return netId; } void embed(NetworkId netId, const void *inputData, void *outputData) { const BindingPointInfo *inputInfo = &inputInfos[netId]; const BindingPointInfo *outputInfo = &outputInfos[netId]; InputTensors inputTensors = {{inputInfo->first, ConstTensor{inputInfo->second, inputData}}}; OutputTensors outputTensors = {{outputInfo->first, armnn::Tensor{outputInfo->second, outputData}}}; runtime->EnqueueWorkload(netId, inputTensors, outputTensors); } void unload(NetworkId netId) { runtime->UnloadNetwork(netId); } unsigned long shape(NetworkId netId, bool isInput) { const TensorShape shape = (isInput ? inputInfos : outputInfos)[netId].second.GetShape(); unsigned long s = 0; for (unsigned int d = 0; d < shape.GetNumDimensions(); d++) s |= ((unsigned long)shape[d]) << (d * 16); // stores up to 4 16-bit values in a 64-bit value return s; } Ann(int tuningLevel, const char *tuningFile) { IRuntime::CreationOptions runtimeOptions; BackendOptions backendOptions{"GpuAcc", { {"TuningLevel", tuningLevel}, {"MemoryOptimizerStrategy", "ConstantMemoryStrategy"}, // SingleAxisPriorityList or ConstantMemoryStrategy }}; if (tuningFile) backendOptions.AddOption({"TuningFile", tuningFile}); runtimeOptions.m_BackendOptions.emplace_back(backendOptions); runtime = IRuntime::CreateRaw(runtimeOptions); }; ~Ann() { IRuntime::Destroy(runtime); }; private: INetworkPtr loadModel(const char *modelPath, const char *inputName, const char *outputName, BindingPointInfo &inputInfo, BindingPointInfo &outputInfo) { const auto path = std::string(modelPath); if (path.rfind(".tflite") == path.length() - 7) // endsWith() { auto parser = armnnTfLiteParser::ITfLiteParser::CreateRaw(); INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath); auto inputBinding = parser->GetNetworkInputBindingInfo(0, inputName); inputInfo = getInputTensorInfo(inputBinding.first, inputBinding.second); outputInfo = parser->GetNetworkOutputBindingInfo(0, outputName); return network; } else if (path.rfind(".onnx") == path.length() - 5) // endsWith() { auto parser = armnnOnnxParser::IOnnxParser::CreateRaw(); INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath); auto inputBinding = parser->GetNetworkInputBindingInfo(inputName); inputInfo = getInputTensorInfo(inputBinding.first, inputBinding.second); outputInfo = parser->GetNetworkOutputBindingInfo(outputName); return network; } else { std::ifstream ifs(path, std::ifstream::in | std::ifstream::binary); auto parser = armnnDeserializer::IDeserializer::CreateRaw(); INetworkPtr network = parser->CreateNetworkFromBinary(ifs); auto inputBinding = parser->GetNetworkInputBindingInfo(0, inputName); inputInfo = getInputTensorInfo(inputBinding.m_BindingId, inputBinding.m_TensorInfo); auto outputBinding = parser->GetNetworkOutputBindingInfo(0, outputName); outputInfo = {outputBinding.m_BindingId, outputBinding.m_TensorInfo}; return network; } } BindingPointInfo getInputTensorInfo(LayerBindingId inputBindingId, TensorInfo &info) { const auto newInfo = TensorInfo{info.GetShape(), info.GetDataType(), info.GetQuantizationScale(), info.GetQuantizationOffset(), true}; return {inputBindingId, newInfo}; } IOptimizedNetworkPtr OptimizeNetwork(INetwork *network, bool fastMath, bool saveCachedNetwork, const char *cachedNetworkPath) { const bool allowExpandedDims = false; const ShapeInferenceMethod shapeInferenceMethod = ShapeInferenceMethod::ValidateOnly; OptimizerOptionsOpaque options; options.SetReduceFp32ToFp16(false); options.SetShapeInferenceMethod(shapeInferenceMethod); options.SetAllowExpandedDims(allowExpandedDims); BackendOptions gpuAcc("GpuAcc", {{"FastMathEnabled", fastMath}}); if (cachedNetworkPath) { gpuAcc.AddOption({"SaveCachedNetwork", saveCachedNetwork}); gpuAcc.AddOption({"CachedNetworkFilePath", cachedNetworkPath}); } options.AddModelOption(gpuAcc); // No point in using ARMNN for CPU, use ONNX instead. // BackendOptions cpuAcc("CpuAcc", // { // {"FastMathEnabled", true}, // {"NumberOfThreads", 0}, // }); // options.AddModelOption(cpuAcc); BackendOptions allowExDimOpt("AllowExpandedDims", {{"AllowExpandedDims", allowExpandedDims}}); options.AddModelOption(allowExDimOpt); BackendOptions shapeInferOpt("ShapeInferenceMethod", {{"InferAndValidate", shapeInferenceMethod == ShapeInferenceMethod::InferAndValidate}}); options.AddModelOption(shapeInferOpt); std::vector backends = {BackendId("GpuAcc")}; return Optimize(*network, backends, runtime->GetDeviceSpec(), options); } IRuntime *runtime; std::map inputInfos; std::map outputInfos; }; extern "C" void *init(int logLevel, int tuningLevel, const char *tuningFile) { LogSeverity level = static_cast(logLevel); ConfigureLogging(true, true, level); Ann *ann = new Ann(tuningLevel, tuningFile); return ann; } extern "C" void destroy(void *ann) { delete ((Ann *)ann); } extern "C" int load(void *ann, const char *path, const char *inputName, const char *ouputName, bool fastMath, bool saveCachedNetwork, const char *cachedNetworkPath) { return ((Ann *)ann)->load(path, inputName, ouputName, fastMath, saveCachedNetwork, cachedNetworkPath); } extern "C" void unload(void *ann, NetworkId netId) { ((Ann *)ann)->unload(netId); } extern "C" void embed(void *ann, NetworkId netId, void *inputData, void *outputData) { ((Ann *)ann)->embed(netId, inputData, outputData); } extern "C" unsigned long shape(void *ann, NetworkId netId, bool isInput) { return ((Ann *)ann)->shape(netId, isInput); }