Compare commits
1 commit
Author | SHA1 | Date | |
---|---|---|---|
|
2452cae043 |
10 changed files with 450 additions and 2 deletions
11
docker/mlaccel-armnn.yml
Normal file
11
docker/mlaccel-armnn.yml
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
version: "3.8"
|
||||||
|
|
||||||
|
# ML acceleration on supported Mali ARM GPUs using ARM-NN
|
||||||
|
|
||||||
|
services:
|
||||||
|
mlaccel:
|
||||||
|
devices:
|
||||||
|
- /dev/mali0:/dev/mali0
|
||||||
|
volumes:
|
||||||
|
- /lib/firmware/mali_csffw.bin:/lib/firmware/mali_csffw.bin:ro # Mali firmware for your chipset
|
||||||
|
- /usr/lib/libmali-valhall-g610-g6p0-gbm.so:/usr/lib/libmali.so:ro # Mali driver for you chipset
|
|
@ -13,20 +13,42 @@ ENV VIRTUAL_ENV="/opt/venv" PATH="/opt/venv/bin:${PATH}"
|
||||||
COPY poetry.lock pyproject.toml ./
|
COPY poetry.lock pyproject.toml ./
|
||||||
RUN poetry install --sync --no-interaction --no-ansi --no-root --only main
|
RUN poetry install --sync --no-interaction --no-ansi --no-root --only main
|
||||||
|
|
||||||
FROM python:3.11-slim-bookworm@sha256:1bc6a3e9356d64ea632791653bc71a56340e8741dab66434ab2739ebf6aed29d
|
ARG TARGETPLATFORM
|
||||||
|
ENV ARMNN_PATH=/opt/armnn
|
||||||
|
COPY ann /opt/ann
|
||||||
|
RUN test "$TARGETPLATFORM = linux/arm64" && \
|
||||||
|
mkdir /opt/armnn && \
|
||||||
|
curl -SL "https://github.com/ARM-software/armnn/releases/download/v23.11/ArmNN-linux-aarch64.tar.gz" | tar -zx -C /opt/armnn && \
|
||||||
|
cd /opt/ann && \
|
||||||
|
sh build.sh
|
||||||
|
|
||||||
|
|
||||||
|
FROM python:3.11-slim-bookworm@sha256:1bc6a3e9356d64ea632791653bc71a56340e8741dab66434ab2739ebf6aed29d
|
||||||
|
ARG TARGETPLATFORM
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends tini libmimalloc2.0 && rm -rf /var/lib/apt/lists/*
|
RUN apt-get update && apt-get install -y --no-install-recommends tini libmimalloc2.0 && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN test "$TARGETPLATFORM = linux/arm64" && \
|
||||||
|
apt-get update && apt-get install -y --no-install-recommends ocl-icd-libopencl1 mesa-opencl-icd && \
|
||||||
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
|
mkdir --parents /etc/OpenCL/vendors && \
|
||||||
|
echo "/usr/lib/libmali.so" > /etc/OpenCL/vendors/mali.icd && \
|
||||||
|
mkdir /opt/armnn && \
|
||||||
|
mkdir /opt/ann
|
||||||
|
|
||||||
WORKDIR /usr/src/app
|
WORKDIR /usr/src/app
|
||||||
ENV NODE_ENV=production \
|
ENV NODE_ENV=production \
|
||||||
TRANSFORMERS_CACHE=/cache \
|
TRANSFORMERS_CACHE=/cache \
|
||||||
PYTHONDONTWRITEBYTECODE=1 \
|
PYTHONDONTWRITEBYTECODE=1 \
|
||||||
PYTHONUNBUFFERED=1 \
|
PYTHONUNBUFFERED=1 \
|
||||||
PATH="/opt/venv/bin:$PATH" \
|
PATH="/opt/venv/bin:$PATH" \
|
||||||
PYTHONPATH=/usr/src
|
PYTHONPATH=/usr/src \
|
||||||
|
LD_LIBRARY_PATH=/opt/armnn
|
||||||
|
|
||||||
COPY --from=builder /opt/venv /opt/venv
|
COPY --from=builder /opt/venv /opt/venv
|
||||||
|
COPY --from=builder /opt/armnn/libarmnn.so.?? /opt/armnn/libarmnnOnnxParser.so.?? /opt/armnn/libarmnnDeserializer.so.?? /opt/armnn/libarmnnTfLiteParser.so.?? /opt/armnn/libprotobuf.so.?.??.?.? /opt/ann/libann.s[o] /opt/armnn
|
||||||
|
COPY ann/ann.py /usr/src/ann/ann.py
|
||||||
COPY start.sh log_conf.json ./
|
COPY start.sh log_conf.json ./
|
||||||
COPY app .
|
COPY app .
|
||||||
|
|
||||||
ENTRYPOINT ["tini", "--"]
|
ENTRYPOINT ["tini", "--"]
|
||||||
CMD ["./start.sh"]
|
CMD ["./start.sh"]
|
||||||
|
|
196
machine-learning/ann/ann.cpp
Normal file
196
machine-learning/ann/ann.cpp
Normal file
|
@ -0,0 +1,196 @@
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#include "armnn/IRuntime.hpp"
|
||||||
|
#include "armnn/INetwork.hpp"
|
||||||
|
#include "armnn/Types.hpp"
|
||||||
|
#include "armnnDeserializer/IDeserializer.hpp"
|
||||||
|
#include "armnnTfLiteParser/ITfLiteParser.hpp"
|
||||||
|
#include "armnnOnnxParser/IOnnxParser.hpp"
|
||||||
|
|
||||||
|
using namespace armnn;
|
||||||
|
|
||||||
|
class Ann
|
||||||
|
{
|
||||||
|
|
||||||
|
public:
|
||||||
|
int load(const char *modelPath, const char *inputName, const char *outputName, bool fastMath, bool saveCachedNetwork, const char *cachedNetworkPath)
|
||||||
|
{
|
||||||
|
BindingPointInfo inputInfo;
|
||||||
|
BindingPointInfo outputInfo;
|
||||||
|
INetworkPtr network = loadModel(modelPath, inputName, outputName, inputInfo, outputInfo);
|
||||||
|
|
||||||
|
auto n = network.get();
|
||||||
|
|
||||||
|
IOptimizedNetworkPtr optNet = OptimizeNetwork(n, fastMath, saveCachedNetwork, cachedNetworkPath);
|
||||||
|
NetworkId netId;
|
||||||
|
Status status = runtime->LoadNetwork(netId, std::move(optNet));
|
||||||
|
inputInfos[netId] = inputInfo;
|
||||||
|
outputInfos[netId] = outputInfo;
|
||||||
|
return netId;
|
||||||
|
}
|
||||||
|
|
||||||
|
void embed(NetworkId netId, const void *inputData, void *outputData)
|
||||||
|
{
|
||||||
|
const BindingPointInfo *inputInfo = &inputInfos[netId];
|
||||||
|
const BindingPointInfo *outputInfo = &outputInfos[netId];
|
||||||
|
InputTensors inputTensors = {{inputInfo->first, ConstTensor{inputInfo->second, inputData}}};
|
||||||
|
OutputTensors outputTensors = {{outputInfo->first, armnn::Tensor{outputInfo->second, outputData}}};
|
||||||
|
runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
|
||||||
|
}
|
||||||
|
|
||||||
|
void unload(NetworkId netId)
|
||||||
|
{
|
||||||
|
runtime->UnloadNetwork(netId);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long shape(NetworkId netId, bool isInput)
|
||||||
|
{
|
||||||
|
const TensorShape shape = (isInput ? inputInfos : outputInfos)[netId].second.GetShape();
|
||||||
|
unsigned long s = 0;
|
||||||
|
for (unsigned int d = 0; d < shape.GetNumDimensions(); d++)
|
||||||
|
s |= ((unsigned long)shape[d]) << (d * 16); // stores up to 4 16-bit values in a 64-bit value
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ann(int tuningLevel, const char *tuningFile)
|
||||||
|
{
|
||||||
|
IRuntime::CreationOptions runtimeOptions;
|
||||||
|
BackendOptions backendOptions{"GpuAcc",
|
||||||
|
{
|
||||||
|
{"TuningLevel", tuningLevel},
|
||||||
|
{"MemoryOptimizerStrategy", "ConstantMemoryStrategy"}, // SingleAxisPriorityList or ConstantMemoryStrategy
|
||||||
|
}};
|
||||||
|
if (tuningFile)
|
||||||
|
backendOptions.AddOption({"TuningFile", tuningFile});
|
||||||
|
runtimeOptions.m_BackendOptions.emplace_back(backendOptions);
|
||||||
|
runtime = IRuntime::CreateRaw(runtimeOptions);
|
||||||
|
};
|
||||||
|
~Ann()
|
||||||
|
{
|
||||||
|
IRuntime::Destroy(runtime);
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
INetworkPtr loadModel(const char *modelPath, const char *inputName, const char *outputName, BindingPointInfo &inputInfo, BindingPointInfo &outputInfo)
|
||||||
|
{
|
||||||
|
const auto path = std::string(modelPath);
|
||||||
|
if (path.rfind(".tflite") == path.length() - 7) // endsWith()
|
||||||
|
{
|
||||||
|
auto parser = armnnTfLiteParser::ITfLiteParser::CreateRaw();
|
||||||
|
INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath);
|
||||||
|
auto inputBinding = parser->GetNetworkInputBindingInfo(0, inputName);
|
||||||
|
inputInfo = getInputTensorInfo(inputBinding.first, inputBinding.second);
|
||||||
|
outputInfo = parser->GetNetworkOutputBindingInfo(0, outputName);
|
||||||
|
return network;
|
||||||
|
}
|
||||||
|
else if (path.rfind(".onnx") == path.length() - 5) // endsWith()
|
||||||
|
{
|
||||||
|
auto parser = armnnOnnxParser::IOnnxParser::CreateRaw();
|
||||||
|
INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath);
|
||||||
|
auto inputBinding = parser->GetNetworkInputBindingInfo(inputName);
|
||||||
|
inputInfo = getInputTensorInfo(inputBinding.first, inputBinding.second);
|
||||||
|
outputInfo = parser->GetNetworkOutputBindingInfo(outputName);
|
||||||
|
return network;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::ifstream ifs(path, std::ifstream::in | std::ifstream::binary);
|
||||||
|
auto parser = armnnDeserializer::IDeserializer::CreateRaw();
|
||||||
|
INetworkPtr network = parser->CreateNetworkFromBinary(ifs);
|
||||||
|
auto inputBinding = parser->GetNetworkInputBindingInfo(0, inputName);
|
||||||
|
inputInfo = getInputTensorInfo(inputBinding.m_BindingId, inputBinding.m_TensorInfo);
|
||||||
|
auto outputBinding = parser->GetNetworkOutputBindingInfo(0, outputName);
|
||||||
|
outputInfo = {outputBinding.m_BindingId, outputBinding.m_TensorInfo};
|
||||||
|
return network;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BindingPointInfo getInputTensorInfo(LayerBindingId inputBindingId, TensorInfo &info)
|
||||||
|
{
|
||||||
|
const auto newInfo = TensorInfo{info.GetShape(), info.GetDataType(),
|
||||||
|
info.GetQuantizationScale(),
|
||||||
|
info.GetQuantizationOffset(),
|
||||||
|
true};
|
||||||
|
return {inputBindingId, newInfo};
|
||||||
|
}
|
||||||
|
|
||||||
|
IOptimizedNetworkPtr OptimizeNetwork(INetwork *network, bool fastMath, bool saveCachedNetwork, const char *cachedNetworkPath)
|
||||||
|
{
|
||||||
|
const bool allowExpandedDims = false;
|
||||||
|
const ShapeInferenceMethod shapeInferenceMethod = ShapeInferenceMethod::ValidateOnly;
|
||||||
|
|
||||||
|
OptimizerOptionsOpaque options;
|
||||||
|
options.SetReduceFp32ToFp16(false);
|
||||||
|
options.SetShapeInferenceMethod(shapeInferenceMethod);
|
||||||
|
options.SetAllowExpandedDims(allowExpandedDims);
|
||||||
|
|
||||||
|
BackendOptions gpuAcc("GpuAcc", {{"FastMathEnabled", fastMath}});
|
||||||
|
if (cachedNetworkPath)
|
||||||
|
{
|
||||||
|
gpuAcc.AddOption({"SaveCachedNetwork", saveCachedNetwork});
|
||||||
|
gpuAcc.AddOption({"CachedNetworkFilePath", cachedNetworkPath});
|
||||||
|
}
|
||||||
|
options.AddModelOption(gpuAcc);
|
||||||
|
|
||||||
|
// No point in using ARMNN for CPU, use ONNX instead.
|
||||||
|
// BackendOptions cpuAcc("CpuAcc",
|
||||||
|
// {
|
||||||
|
// {"FastMathEnabled", true},
|
||||||
|
// {"NumberOfThreads", 0},
|
||||||
|
// });
|
||||||
|
// options.AddModelOption(cpuAcc);
|
||||||
|
|
||||||
|
BackendOptions allowExDimOpt("AllowExpandedDims",
|
||||||
|
{{"AllowExpandedDims", allowExpandedDims}});
|
||||||
|
options.AddModelOption(allowExDimOpt);
|
||||||
|
BackendOptions shapeInferOpt("ShapeInferenceMethod",
|
||||||
|
{{"InferAndValidate", shapeInferenceMethod == ShapeInferenceMethod::InferAndValidate}});
|
||||||
|
options.AddModelOption(shapeInferOpt);
|
||||||
|
|
||||||
|
std::vector<BackendId> backends = {BackendId("GpuAcc")};
|
||||||
|
return Optimize(*network, backends, runtime->GetDeviceSpec(), options);
|
||||||
|
}
|
||||||
|
IRuntime *runtime;
|
||||||
|
std::map<NetworkId, BindingPointInfo> inputInfos;
|
||||||
|
std::map<NetworkId, BindingPointInfo> outputInfos;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern "C" void *init(int logLevel, int tuningLevel, const char *tuningFile)
|
||||||
|
{
|
||||||
|
LogSeverity level = static_cast<LogSeverity>(logLevel);
|
||||||
|
ConfigureLogging(true, true, level);
|
||||||
|
|
||||||
|
Ann *ann = new Ann(tuningLevel, tuningFile);
|
||||||
|
return ann;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void destroy(void *ann)
|
||||||
|
{
|
||||||
|
delete ((Ann *)ann);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" int load(void *ann,
|
||||||
|
const char *path,
|
||||||
|
const char *inputName,
|
||||||
|
const char *ouputName,
|
||||||
|
bool fastMath,
|
||||||
|
bool saveCachedNetwork,
|
||||||
|
const char *cachedNetworkPath)
|
||||||
|
{
|
||||||
|
return ((Ann *)ann)->load(path, inputName, ouputName, fastMath, saveCachedNetwork, cachedNetworkPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void unload(void *ann, NetworkId netId)
|
||||||
|
{
|
||||||
|
((Ann *)ann)->unload(netId);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void embed(void *ann, NetworkId netId, void *inputData, void *outputData)
|
||||||
|
{
|
||||||
|
((Ann *)ann)->embed(netId, inputData, outputData);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" unsigned long shape(void *ann, NetworkId netId, bool isInput)
|
||||||
|
{
|
||||||
|
return ((Ann *)ann)->shape(netId, isInput);
|
||||||
|
}
|
124
machine-learning/ann/ann.py
Normal file
124
machine-learning/ann/ann.py
Normal file
|
@ -0,0 +1,124 @@
|
||||||
|
import time
|
||||||
|
from ctypes import CDLL, c_bool, c_char_p, c_int, c_ulong, c_void_p
|
||||||
|
from os.path import exists
|
||||||
|
from typing import Dict, Tuple
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from numpy.typing import NDArray
|
||||||
|
|
||||||
|
libann = CDLL("libann.so")
|
||||||
|
libann.init.argtypes = c_int, c_int, c_char_p
|
||||||
|
libann.init.restype = c_void_p
|
||||||
|
libann.load.argtypes = c_void_p, c_char_p, c_char_p, c_char_p, c_bool, c_bool, c_char_p
|
||||||
|
libann.load.restype = c_int
|
||||||
|
libann.embed.argtypes = c_void_p, c_int, c_void_p, c_void_p
|
||||||
|
libann.unload.argtypes = c_void_p, c_int
|
||||||
|
libann.destroy.argtypes = (c_void_p,)
|
||||||
|
libann.shape.argtypes = (c_void_p, c_int, c_bool)
|
||||||
|
libann.shape.restype = c_ulong
|
||||||
|
|
||||||
|
|
||||||
|
class Ann:
|
||||||
|
def __init__(self, log_level=3, tuning_level=1, tuning_file: str = None) -> None:
|
||||||
|
if tuning_file and not exists(tuning_file):
|
||||||
|
raise ValueError("tuning_file must point to an existing (possibly empty) file!")
|
||||||
|
if tuning_level == 0 and tuning_file is None:
|
||||||
|
raise ValueError("tuning_level == 0 reads existing tuning information and requires a tuning_file")
|
||||||
|
if tuning_level < 0 or tuning_level > 3:
|
||||||
|
raise ValueError("tuning_level must be 0 (load from tuning_file), 1, 2 or 3.")
|
||||||
|
if log_level < 0 or log_level > 5:
|
||||||
|
raise ValueError("log_level must be 0 (trace), 1 (debug), 2 (info), 3 (warning), 4 (error) or 5 (fatal)")
|
||||||
|
self.ann = libann.init(log_level, tuning_level, tuning_file.encode("utf-8") if tuning_file else None)
|
||||||
|
self.output_shapes: Dict[int, Tuple[int, ...]] = {}
|
||||||
|
self.input_shapes: Dict[int, Tuple[int, ...]] = {}
|
||||||
|
|
||||||
|
def __del__(self) -> None:
|
||||||
|
libann.destroy(self.ann)
|
||||||
|
|
||||||
|
def load(
|
||||||
|
self,
|
||||||
|
model_path: str,
|
||||||
|
input_name="input_tensor",
|
||||||
|
output_name="output_tensor",
|
||||||
|
fast_math=True,
|
||||||
|
save_cached_network=False,
|
||||||
|
cached_network_path: str = None,
|
||||||
|
) -> int:
|
||||||
|
if not (exists(model_path) and model_path.endswith((".armnn", ".tflite", ".onnx"))):
|
||||||
|
raise ValueError("model_path must be a file with extension .armnn, .tflite or .onnx")
|
||||||
|
if cached_network_path and not exists(cached_network_path):
|
||||||
|
raise ValueError("cached_network_path must point to an existing (possibly empty) file!")
|
||||||
|
if save_cached_network and cached_network_path is None:
|
||||||
|
raise ValueError("save_cached_network is True, cached_network_path must be specified!")
|
||||||
|
net_id = libann.load(
|
||||||
|
self.ann,
|
||||||
|
model_path.encode("utf-8"),
|
||||||
|
input_name.encode("utf-8"),
|
||||||
|
output_name.encode("utf-8"),
|
||||||
|
fast_math,
|
||||||
|
save_cached_network,
|
||||||
|
cached_network_path.encode("utf-8") if cached_network_path else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.input_shapes[net_id] = self.shape(net_id, input=True)
|
||||||
|
self.output_shapes[net_id] = self.shape(net_id, input=False)
|
||||||
|
return net_id
|
||||||
|
|
||||||
|
def unload(self, network_id: int) -> None:
|
||||||
|
libann.unload(self.ann, network_id)
|
||||||
|
del self.output_shapes[network_id]
|
||||||
|
|
||||||
|
def embed(self, network_id: int, input_tensor: NDArray) -> NDArray:
|
||||||
|
net_input_shape = self.input_shapes[network_id]
|
||||||
|
if input_tensor.shape != net_input_shape:
|
||||||
|
raise ValueError(f"input_tensor shape {input_tensor.shape} != network input shape {net_input_shape}")
|
||||||
|
output_tensor = np.ndarray(self.output_shapes[network_id], dtype=np.float32)
|
||||||
|
libann.embed(
|
||||||
|
self.ann, network_id, input_tensor.ctypes.data_as(c_void_p), output_tensor.ctypes.data_as(c_void_p)
|
||||||
|
)
|
||||||
|
return output_tensor
|
||||||
|
|
||||||
|
def shape(self, network_id: int, input=False) -> Tuple[int]:
|
||||||
|
s = libann.shape(self.ann, network_id, input)
|
||||||
|
a = []
|
||||||
|
while s != 0:
|
||||||
|
a.append(s & 0xFFFF)
|
||||||
|
s >>= 16
|
||||||
|
return tuple(a)
|
||||||
|
|
||||||
|
|
||||||
|
def test():
|
||||||
|
iterations = 1
|
||||||
|
start = time.perf_counter_ns()
|
||||||
|
ann = Ann(tuning_level=0, tuning_file="gpu.tuning")
|
||||||
|
net = ann.load("/tmp/tiny-clip-b1-fp16.armnn", save_cached_network=False, cached_network_path="cached.network")
|
||||||
|
end = time.perf_counter_ns()
|
||||||
|
# cached_network_path saves 1.2 seconds
|
||||||
|
print("loading took ", (end - start) / 1000000)
|
||||||
|
img = np.load("/tmp/img.npy")
|
||||||
|
# img = np.repeat(img, 2, 0)
|
||||||
|
|
||||||
|
start = time.perf_counter_ns()
|
||||||
|
# warmup
|
||||||
|
dummy = np.ndarray(ann.shape(net, input=True), dtype=np.float32)
|
||||||
|
ann.embed(net, dummy)
|
||||||
|
end = time.perf_counter_ns()
|
||||||
|
# tuning_file saves 18 seconds for tuning level 3
|
||||||
|
print("warmup took ", (end - start) / 1000000)
|
||||||
|
|
||||||
|
start = time.perf_counter_ns()
|
||||||
|
for i in range(iterations):
|
||||||
|
embedding = ann.embed(net, img)
|
||||||
|
end = time.perf_counter_ns()
|
||||||
|
per_sample = (end - start) / (1000000 * iterations)
|
||||||
|
|
||||||
|
# print(embedding)
|
||||||
|
# np.save("/tmp/ann_fp16.npy", embedding)
|
||||||
|
print("embedding took ", per_sample)
|
||||||
|
|
||||||
|
ann.unload(net)
|
||||||
|
del ann # important to save tuning file
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test()
|
1
machine-learning/ann/build.sh
Normal file
1
machine-learning/ann/build.sh
Normal file
|
@ -0,0 +1 @@
|
||||||
|
g++ -shared -O3 -o libann.so -fuse-ld=gold -std=c++17 -I$ARMNN_PATH/include -larmnn -larmnnDeserializer -larmnnTfLiteParser -larmnnOnnxParser -L$ARMNN_PATH ann.cpp
|
1
machine-learning/export/.gitignore
vendored
Normal file
1
machine-learning/export/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
armnn*
|
4
machine-learning/export/build-converter.sh
Executable file
4
machine-learning/export/build-converter.sh
Executable file
|
@ -0,0 +1,4 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
cd armnn-23.11/
|
||||||
|
g++ -o ../armnnconverter -O1 -DARMNN_ONNX_PARSER -DARMNN_SERIALIZER -DARMNN_TF_LITE_PARSER -fuse-ld=gold -std=c++17 -Iinclude -Isrc/armnnUtils -Ithird-party -larmnn -larmnnDeserializer -larmnnTfLiteParser -larmnnOnnxParser -larmnnSerializer -L../armnn src/armnnConverter/ArmnnConverter.cpp
|
8
machine-learning/export/download-armnn.sh
Executable file
8
machine-learning/export/download-armnn.sh
Executable file
|
@ -0,0 +1,8 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# binaries
|
||||||
|
mkdir armnn
|
||||||
|
curl -SL "https://github.com/ARM-software/armnn/releases/download/v23.11/ArmNN-linux-aarch64.tar.gz" | tar -zx -C armnn
|
||||||
|
|
||||||
|
# source to build ArmnnConverter
|
||||||
|
curl -SL "https://github.com/ARM-software/armnn/archive/refs/tags/v23.11.tar.gz" | tar -zx
|
|
@ -22,4 +22,5 @@ dependencies:
|
||||||
- pip:
|
- pip:
|
||||||
- multilingual-clip
|
- multilingual-clip
|
||||||
- onnx-simplifier
|
- onnx-simplifier
|
||||||
|
- git+https://github.com/fyfrey/TinyNeuralNetwork.git
|
||||||
category: main
|
category: main
|
||||||
|
|
80
machine-learning/export/tiny.py
Normal file
80
machine-learning/export/tiny.py
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
import open_clip
|
||||||
|
import torch
|
||||||
|
from tinynn.converter import TFLiteConverter
|
||||||
|
|
||||||
|
|
||||||
|
class Wrapper(torch.nn.Module):
|
||||||
|
def __init__(self, device: torch.device):
|
||||||
|
super().__init__()
|
||||||
|
self.device = device
|
||||||
|
self.model = open_clip.create_model(
|
||||||
|
"ViT-B-32",
|
||||||
|
pretrained="openai",
|
||||||
|
precision="fp16" if device.type == "cuda" else "fp32",
|
||||||
|
jit=False,
|
||||||
|
require_pretrained=True,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, input_tensor: torch.FloatTensor):
|
||||||
|
embedding = self.model.encode_image(input_tensor.half() if self.device.type == "cuda" else input_tensor)
|
||||||
|
return embedding.float()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if platform.machine() not in ("x86_64", "AMD64"):
|
||||||
|
raise RuntimeError(f"Can only run on x86_64 / AMD64, not {platform.machine()}")
|
||||||
|
|
||||||
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
if device.type != "cuda":
|
||||||
|
logging.warning("No CUDA available, cannot create fp16 model! "
|
||||||
|
"proceeding to create a fp32 model (use only for testing)")
|
||||||
|
|
||||||
|
model = Wrapper(device)
|
||||||
|
model = model.to(device)
|
||||||
|
for param in model.parameters():
|
||||||
|
param.requires_grad = False
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
dummy_input = torch.rand((1, 3, 224, 224))
|
||||||
|
dummy_input = dummy_input.to(device)
|
||||||
|
|
||||||
|
dummy_out = model(dummy_input)
|
||||||
|
print(dummy_out.dtype, dummy_out.device, dummy_out.shape)
|
||||||
|
|
||||||
|
jit = torch.jit.trace(model, dummy_input)
|
||||||
|
output_name = "output_tensor"
|
||||||
|
list(jit.graph.outputs())[0].setDebugName(output_name)
|
||||||
|
tflite_model_path = "tiny-clip.tflite"
|
||||||
|
output_path = os.path.join("out", tflite_model_path)
|
||||||
|
|
||||||
|
converter = TFLiteConverter(jit, dummy_input, output_path, nchw_transpose=True)
|
||||||
|
# segfaults on ARM, must run on x86_64 / AMD64
|
||||||
|
converter.convert()
|
||||||
|
|
||||||
|
armnn_model_path = "tiny-clip.armnn"
|
||||||
|
os.environ.LD_LIBRARY_PATH = "armnn"
|
||||||
|
subprocess.run(
|
||||||
|
[
|
||||||
|
"./ArmnnConverter",
|
||||||
|
"-f",
|
||||||
|
"tflite-binary",
|
||||||
|
"-m",
|
||||||
|
tflite_model_path,
|
||||||
|
"-i",
|
||||||
|
"input_tensor",
|
||||||
|
"-o",
|
||||||
|
"output_name",
|
||||||
|
"-p",
|
||||||
|
armnn_model_path,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
with torch.no_grad():
|
||||||
|
main()
|
Loading…
Reference in a new issue