diff --git a/services/ml/compilation_impl_nn.cc b/services/ml/compilation_impl_nn.cc index 2245ec10783682..143a2cba4a728b 100644 --- a/services/ml/compilation_impl_nn.cc +++ b/services/ml/compilation_impl_nn.cc @@ -33,8 +33,11 @@ CompilationImplNN::CompilationImplNN(const ModelImplNN* model, } CompilationImplNN::~CompilationImplNN() { - // ANeuralNetworksCompilation_free(nn_compilation_); - // The nn_compilation_ will be deleted in execution phase. +#if defined(OS_ANDROID) + ANeuralNetworksCompilation_free(nn_compilation_); +#else + IE(ie_compilation_free)(ie_compilation_); +#endif } void CompilationImplNN::Finish(int32_t preference, FinishCallback callback) { diff --git a/services/ml/execution_impl_nn.cc b/services/ml/execution_impl_nn.cc index 3324265c8b69ae..4bc67f78d1d0b2 100644 --- a/services/ml/execution_impl_nn.cc +++ b/services/ml/execution_impl_nn.cc @@ -14,8 +14,6 @@ namespace ml { -// TODO:: CompilationImplNN* => std::unique so that -// ie_compilation_free(ie_compilation_); can host in class CompilationImplNN. ExecutionImplNN::ExecutionImplNN(const CompilationImplNN* compilation, mojo::ScopedSharedBufferHandle memory) : operands_(compilation->operands_), @@ -23,14 +21,10 @@ ExecutionImplNN::ExecutionImplNN(const CompilationImplNN* compilation, inputs_(compilation->inputs_), outputs_(compilation->outputs_), memory_(std::move(memory)), -#if defined(OS_ANDROID) - nn_compilation_(compilation->nn_compilation_) { -#else - ie_compilation_(compilation->ie_compilation_) { -#endif + compilation_impl_(compilation) { #if defined(OS_LINUX) || defined(OS_WIN) // Create Execution - IE(ie_execution_create)(ie_compilation_, &ie_execution_); + IE(ie_execution_create)(compilation_impl_->ie_compilation_, &ie_execution_); #endif uint32_t total_length = 0; inputs_info_.reserve(inputs_.size()); @@ -54,9 +48,7 @@ ExecutionImplNN::ExecutionImplNN(const CompilationImplNN* compilation, ExecutionImplNN::~ExecutionImplNN() { #if defined(OS_ANDROID) - ANeuralNetworksCompilation_free(nn_compilation_); #else - IE(ie_compilation_free)(ie_compilation_); IE(ie_execution_free)(ie_execution_); #endif DLOG(INFO) << "ANeuralNetworksCompilation_free"; @@ -91,8 +83,8 @@ void ExecutionImplNN::StartCompute(mojom::UserBufferPtr user_buffer, int32_t result = 0; #if defined(OS_ANDROID) ANeuralNetworksExecution* nn_execution; - result = - ANeuralNetworksExecution_create(nn_compilation_, &nn_execution); + result = ANeuralNetworksExecution_create(compilation_impl_->nn_compilation_, + &nn_execution); #endif for (size_t i = 0; i < inputs_info_.size(); ++i) { std::unique_ptr& info = inputs_info_[i]; @@ -101,8 +93,8 @@ void ExecutionImplNN::StartCompute(mojom::UserBufferPtr user_buffer, nn_execution, i, NULL, static_cast(info->mapping.get()), info->length); #else - result = IE(ie_execution_set_input)(ie_execution_, i, - info->mapping.get(), info->length); + result = IE(ie_execution_set_input)(ie_execution_, i, info->mapping.get(), + info->length); #endif } @@ -113,8 +105,8 @@ void ExecutionImplNN::StartCompute(mojom::UserBufferPtr user_buffer, nn_execution, i, NULL, static_cast(info->mapping.get()), info->length); #else - result = IE(ie_execution_set_output)( - ie_execution_, i, info->mapping.get(), info->length); + result = IE(ie_execution_set_output)(ie_execution_, i, info->mapping.get(), + info->length); #endif } diff --git a/services/ml/execution_impl_nn.h b/services/ml/execution_impl_nn.h index 09e10513ba7107..4d613f3226cbbd 100644 --- a/services/ml/execution_impl_nn.h +++ b/services/ml/execution_impl_nn.h @@ -8,6 +8,7 @@ #include #include "base/macros.h" +#include "base/memory/scoped_refptr.h" #include "services/ml/common.h" #include "services/ml/compilation_impl_nn.h" #include "services/ml/model_impl_nn.h" @@ -28,8 +29,7 @@ namespace ml { class ExecutionImplNN : public mojom::Execution { public: - ExecutionImplNN(const CompilationImplNN*, - mojo::ScopedSharedBufferHandle); + ExecutionImplNN(const CompilationImplNN*, mojo::ScopedSharedBufferHandle); ~ExecutionImplNN() override; void StartCompute(mojom::UserBufferPtr user_buffer, @@ -46,13 +46,11 @@ class ExecutionImplNN : public mojom::Execution { std::vector> inputs_info_; std::vector> outputs_info_; mojo::ScopedSharedBufferHandle memory_; - + const CompilationImplNN* compilation_impl_; #if defined(OS_LINUX) || defined(OS_WIN) - ie_compilation_t* ie_compilation_; ie_execution_t* ie_execution_; -#else - ANeuralNetworksCompilation* nn_compilation_; #endif + DISALLOW_COPY_AND_ASSIGN(ExecutionImplNN); }; diff --git a/third_party/blink/renderer/modules/ml/neural_network_context.h b/third_party/blink/renderer/modules/ml/neural_network_context.h index 63a516442d4f24..00681e64dd6c67 100644 --- a/third_party/blink/renderer/modules/ml/neural_network_context.h +++ b/third_party/blink/renderer/modules/ml/neural_network_context.h @@ -34,6 +34,7 @@ class NeuralNetworkContext final : public ScriptWrappable, static const unsigned long kTensorFloat32 = 3; static const unsigned long kTensorInt32 = 4; static const unsigned long kTensorQuant8Asymm = 5; + static const unsigned long kBool = 6; static const unsigned long kTensorQuant8SymmPerChannel = 11; static const unsigned long kTensorQuant8AsymmSigned = 14; diff --git a/third_party/blink/renderer/modules/ml/v2/BUILD.gn b/third_party/blink/renderer/modules/ml/v2/BUILD.gn index 3e86fb92e76db7..19455ee96be378 100644 --- a/third_party/blink/renderer/modules/ml/v2/BUILD.gn +++ b/third_party/blink/renderer/modules/ml/v2/BUILD.gn @@ -20,10 +20,24 @@ blink_modules_sources("v2") { "ops/binary.h", "ops/constant.cc", "ops/constant.h", + "ops/conv.cc", + "ops/conv.h", "ops/input.cc", "ops/input.h", + "ops/matmul.cc", + "ops/matmul.h", "ops/output.cc", "ops/output.h", + "ops/pooling.cc", + "ops/pooling.h", + "ops/relu.cc", + "ops/relu.h", + "ops/reshape.cc", + "ops/reshape.h", + "ops/softmax.cc", + "ops/softmax.h", + "ops/transpose.cc", + "ops/transpose.h", ] public_deps = [ diff --git a/third_party/blink/renderer/modules/ml/v2/nn_compilation.cpp b/third_party/blink/renderer/modules/ml/v2/nn_compilation.cpp index ecd0fb2e224d41..951d83cb18e163 100644 --- a/third_party/blink/renderer/modules/ml/v2/nn_compilation.cpp +++ b/third_party/blink/renderer/modules/ml/v2/nn_compilation.cpp @@ -70,7 +70,7 @@ void NNCompilation::OnCreateExecution( if (result_code == ml::mojom::blink::NOT_ERROR) { resolver->Resolve(MakeGarbageCollected( - std::move(init_params), std::move(name_index_))); + std::move(init_params), name_index_)); } else { resolver->Reject(MakeGarbageCollected( DOMExceptionCode::kInvalidStateError, diff --git a/third_party/blink/renderer/modules/ml/v2/nn_context.cpp b/third_party/blink/renderer/modules/ml/v2/nn_context.cpp index 3024a14ffaffbc..f0601834c76c5d 100644 --- a/third_party/blink/renderer/modules/ml/v2/nn_context.cpp +++ b/third_party/blink/renderer/modules/ml/v2/nn_context.cpp @@ -16,11 +16,26 @@ #include "third_party/blink/renderer/modules/ml/v2/nn_model.h" #include "third_party/blink/renderer/modules/ml/v2/ops/binary.h" #include "third_party/blink/renderer/modules/ml/v2/ops/constant.h" +#include "third_party/blink/renderer/modules/ml/v2/ops/conv.h" #include "third_party/blink/renderer/modules/ml/v2/ops/input.h" +#include "third_party/blink/renderer/modules/ml/v2/ops/matmul.h" +#include "third_party/blink/renderer/modules/ml/v2/ops/pooling.h" +#include "third_party/blink/renderer/modules/ml/v2/ops/relu.h" +#include "third_party/blink/renderer/modules/ml/v2/ops/reshape.h" +#include "third_party/blink/renderer/modules/ml/v2/ops/softmax.h" +#include "third_party/blink/renderer/modules/ml/v2/ops/transpose.h" #include "third_party/blink/renderer/platform/bindings/exception_code.h" namespace blink { +int32_t product(const WTF::Vector& dims) { + uint32_t prod = 1; + for (auto dim : dims) + prod *= dim; + + return prod; +} + namespace { bool InvalidData(ExceptionState& state) { @@ -104,6 +119,26 @@ bool InvalidOperandValue(const OperandDescriptor* descriptor, return invalid ? InvalidData(exception_state) : false; } +bool InvalidStrides(WTF::Vector& padding, + WTF::Vector& strides, + WTF::Vector& dilations, + ExceptionState& state) { + if (padding.IsEmpty()) { + padding = Vector(4, 0); + } + if (strides.IsEmpty()) { + strides = Vector(2, 1); + } + if (dilations.IsEmpty()) { + dilations = Vector(2, 1); + } + if (product(padding) < 0 || product(strides) <= 0 || + product(dilations) <= 0) { + return InvalidData(state); + } + return false; +} + } // namespace NNContext::NNContext(NavigatorML* navigator_ml) @@ -140,6 +175,73 @@ Operand* NNContext::mul(Operand* primary, Operand* secondary) { return MakeGarbageCollected(kBinaryTypeMul, primary, secondary); } +Operand* NNContext::conv2d(Operand* input, + Operand* filter, + WTF::Vector padding, + WTF::Vector strides, + WTF::Vector dilations, + int32_t groups, + String layout, + ExceptionState& state) { + if (InvalidStrides(padding, strides, dilations, state)) + return nullptr; + return MakeGarbageCollected(input, filter, std::move(padding), + std::move(strides), std::move(dilations), + groups, layout); +} + +Operand* NNContext::averagePool2d(Operand* input, + WTF::Vector window_dimensions, + WTF::Vector padding, + WTF::Vector strides, + WTF::Vector dilations, + String layout, + ExceptionState& state) { + if (InvalidStrides(padding, strides, dilations, state)) + return nullptr; + if (window_dimensions.IsEmpty()) + window_dimensions = WTF::Vector(2, 0); + return MakeGarbageCollected( + input, std::move(window_dimensions), std::move(padding), + std::move(strides), std::move(dilations), layout, kPoolingTypeAverage); +} + +Operand* NNContext::maxPool2d(Operand* input, + WTF::Vector window_dimensions, + WTF::Vector padding, + WTF::Vector strides, + WTF::Vector dilations, + String layout, + ExceptionState& state) { + if (InvalidStrides(padding, strides, dilations, state)) + return nullptr; + if (window_dimensions.IsEmpty()) + window_dimensions = WTF::Vector(2, 0); + return MakeGarbageCollected( + input, std::move(window_dimensions), std::move(padding), + std::move(strides), std::move(dilations), layout, kPoolingTypeMax); +} + +Operand* NNContext::reshape(Operand* input, WTF::Vector new_shape) { + return MakeGarbageCollected(input, std::move(new_shape)); +} + +Operand* NNContext::softmax(Operand* input) { + return MakeGarbageCollected(input); +} + +Operand* NNContext::relu(Operand* input) { + return MakeGarbageCollected(input); +} + +Operand* NNContext::matmul(Operand* a, Operand* b) { + return MakeGarbageCollected(a, b); +} + +Operand* NNContext::transpose(Operand* input, WTF::Vector new_shape) { + return MakeGarbageCollected(input, std::move(new_shape)); +} + ScriptPromise NNContext::createModel(ScriptState* script_state, const NamedOperandVector& outputs) { auto* resolver = MakeGarbageCollected(script_state); diff --git a/third_party/blink/renderer/modules/ml/v2/nn_context.h b/third_party/blink/renderer/modules/ml/v2/nn_context.h index 365b86ed1d9b0f..996f92b719da3e 100644 --- a/third_party/blink/renderer/modules/ml/v2/nn_context.h +++ b/third_party/blink/renderer/modules/ml/v2/nn_context.h @@ -26,6 +26,8 @@ class NavigatorML; using NamedOperandVector = HeapVector>; +int32_t product(const WTF::Vector&); + class NNContext final : public ScriptWrappable, public ExecutionContextLifecycleObserver { DEFINE_WRAPPERTYPEINFO(); @@ -42,7 +44,34 @@ class NNContext final : public ScriptWrappable, ExceptionState&); Operand* add(Operand*, Operand*); Operand* mul(Operand*, Operand*); + Operand* conv2d(Operand*, + Operand*, + WTF::Vector, + WTF::Vector, + WTF::Vector, + int32_t, + String, + ExceptionState&); + Operand* averagePool2d(Operand*, + WTF::Vector, + WTF::Vector, + WTF::Vector, + WTF::Vector, + String, + ExceptionState&); + Operand* maxPool2d(Operand*, + WTF::Vector, + WTF::Vector, + WTF::Vector, + WTF::Vector, + String, + ExceptionState&); + Operand* reshape(Operand*, WTF::Vector); + Operand* softmax(Operand*); + Operand* relu(Operand*); + Operand* matmul(Operand*, Operand*); ScriptPromise createModel(ScriptState*, const NamedOperandVector&); + Operand* transpose(Operand*, WTF::Vector); // ExecutionContextLifecycleObserver overrides. void ContextDestroyed() override; diff --git a/third_party/blink/renderer/modules/ml/v2/nn_context.idl b/third_party/blink/renderer/modules/ml/v2/nn_context.idl index 1aabb6594e8fae..f7432f052a4434 100644 --- a/third_party/blink/renderer/modules/ml/v2/nn_context.idl +++ b/third_party/blink/renderer/modules/ml/v2/nn_context.idl @@ -7,10 +7,29 @@ interface NNContext { [RaisesException] Operand input(DOMString name, OperandDescriptor desc); [RaisesException] Operand constant(OperandDescriptor desc, [AllowShared] ArrayBufferView data); - // element-wise binary operatioins. + // element-wise binary operations. Operand add(Operand primary, Operand secondary); Operand mul(Operand primary, Operand secondary); + [RaisesException] Operand conv2d(Operand input, Operand filter, + optional sequence padding = [], optional sequence strides = [], + optional sequence dilations = [], optional long groups = 1, + optional OperandLayout layout = "nchw"); + + // Pooling operations. + [RaisesException] Operand averagePool2d(Operand input, optional sequence windowDimensions = [], + optional sequence padding = [], optional sequence strides = [], + optional sequence dilations = [], optional OperandLayout layout = "nchw"); + [RaisesException] Operand maxPool2d(Operand input, optional sequence windowDimensions = [], + optional sequence padding = [], optional sequence strides = [], + optional sequence dilations = [], optional OperandLayout layout = "nchw"); + + Operand reshape(Operand input, sequence newShape); + Operand softmax(Operand input); + Operand relu(Operand input); + Operand matmul(Operand a, Operand b); + Operand transpose(Operand input, optional sequence permutation=[]); + // Create Model [CallWith=ScriptState] Promise createModel(sequence outputs); }; diff --git a/third_party/blink/renderer/modules/ml/v2/nn_model.cpp b/third_party/blink/renderer/modules/ml/v2/nn_model.cpp index 1652303b9049c7..bb4539b06ee18c 100644 --- a/third_party/blink/renderer/modules/ml/v2/nn_model.cpp +++ b/third_party/blink/renderer/modules/ml/v2/nn_model.cpp @@ -9,6 +9,8 @@ #include "services/ml/public/mojom/constants.mojom-blink.h" #include "third_party/blink/renderer/core/dom/document.h" #include "third_party/blink/renderer/core/dom/dom_exception.h" +#include "third_party/blink/renderer/core/typed_arrays/dom_array_buffer.h" +#include "third_party/blink/renderer/core/typed_arrays/dom_typed_array.h" #include "third_party/blink/renderer/modules/ml/neural_network_context.h" #include "third_party/blink/renderer/modules/ml/v2/nn_compilation.h" #include "third_party/blink/renderer/modules/ml/v2/nn_context.h" @@ -143,10 +145,67 @@ void NNModel::FinishCreatingModel(NamedOperandVector* outputs) { return; } -void NNModel::AddFuseOperand() { +// Types not prefaced by ANEURALNETWORKS_TENSOR_* represent scalar values and +// must have no dimensions. +void NNModel::AddScalarOperand(uint32_t index, int value) { model_info_->operands.push_back(ml::mojom::blink::Operand::New( static_cast(NeuralNetworkContext::kInt32), WTF::Vector(), 0, 0)); + + // setOperandValue + NotShared data = + NotShared(DOMInt32Array::Create(&value, 1)); + SetOperandValue(index, data.View()); +} + +void NNModel::AddScalarOperand(uint32_t index, float value) { + model_info_->operands.push_back(ml::mojom::blink::Operand::New( + static_cast(NeuralNetworkContext::kFloat32), + WTF::Vector(), 0, 0)); + + // setOperandValue + NotShared data = + NotShared(DOMFloat32Array::Create(&value, 1)); + SetOperandValue(index, data.View()); +} + +void NNModel::AddScalarOperand(uint32_t index, bool value) { + model_info_->operands.push_back(ml::mojom::blink::Operand::New( + static_cast(NeuralNetworkContext::kBool), + WTF::Vector(), 0, 0)); + + // setOperandValue + int8_t convert = value ? 1 : 0; + NotShared data = + NotShared(DOMInt8Array::Create(&convert, 1)); + SetOperandValue(index, data.View()); +} + +// Types prefaced with ANEURALNETWORKS_TENSOR_* must be used for tensor data +// (i.e., tensors with at least one dimension). +void NNModel::AddBiasOperand(uint32_t index, uint32_t output_channel) { + model_info_->operands.push_back(ml::mojom::blink::Operand::New( + static_cast(NeuralNetworkContext::kTensorFloat32), + WTF::Vector(1, output_channel), 0, 0)); + + // setOperandValue + Vector value(output_channel, 0); + NotShared data = NotShared( + DOMFloat32Array::Create(value.data(), output_channel)); + SetOperandValue(index, data.View()); +} + +void NNModel::AddTensorOperand(uint32_t index, + Vector dimensions, + Vector value) { + model_info_->operands.push_back(ml::mojom::blink::Operand::New( + static_cast(NeuralNetworkContext::kTensorInt32), + std::move(dimensions), 0, 0)); + + // setOperandValue + NotShared data = NotShared( + DOMInt32Array::Create(value.data(), value.size())); + SetOperandValue(index, data.View()); } void NNModel::AddUnspecifiedOperand() { diff --git a/third_party/blink/renderer/modules/ml/v2/nn_model.h b/third_party/blink/renderer/modules/ml/v2/nn_model.h index dca85a210a397d..fcaaa09dd3e968 100644 --- a/third_party/blink/renderer/modules/ml/v2/nn_model.h +++ b/third_party/blink/renderer/modules/ml/v2/nn_model.h @@ -31,7 +31,13 @@ class NNModel final : public ScriptWrappable { ScriptPromise createCompilation(ScriptState*, const CompilationOptions* options); void BuildNeuralNetworkModel(Operand*); - void AddFuseOperand(); + void AddScalarOperand(uint32_t index, int value); + void AddScalarOperand(uint32_t index, float value); + void AddScalarOperand(uint32_t index, bool value); + void AddBiasOperand(uint32_t index, uint32_t output_channel); + void AddTensorOperand(uint32_t index, + Vector dimensions, + Vector value); void AddUnspecifiedOperand(); void AddOperand(const OperandDescriptor* descriptor); void SetOperandValue(uint32_t index, DOMArrayBufferView* data); diff --git a/third_party/blink/renderer/modules/ml/v2/operand.cpp b/third_party/blink/renderer/modules/ml/v2/operand.cpp index cdd6b49436ee46..b272cad664089c 100644 --- a/third_party/blink/renderer/modules/ml/v2/operand.cpp +++ b/third_party/blink/renderer/modules/ml/v2/operand.cpp @@ -21,6 +21,10 @@ void Operand::AddLayer(NNModel* model, uint32_t& index) { LOG(INFO) << "Operand."; } +Vector Operand::GetDimensions() { + return Vector(); +} + void Operand::SetIndex(uint32_t index) { index_ = index; } diff --git a/third_party/blink/renderer/modules/ml/v2/operand.h b/third_party/blink/renderer/modules/ml/v2/operand.h index 1d9f5921c7065f..d348077f7af597 100644 --- a/third_party/blink/renderer/modules/ml/v2/operand.h +++ b/third_party/blink/renderer/modules/ml/v2/operand.h @@ -26,6 +26,7 @@ class Operand : public ScriptWrappable { // be copied to Mojo::ModelInfo->memory which is total memory, and map // input/output name to index that will be used in execution phase. virtual void AddLayer(NNModel* model, uint32_t& index); + virtual Vector GetDimensions(); void SetIndex(uint32_t index); uint32_t Index(); diff --git a/third_party/blink/renderer/modules/ml/v2/ops/binary.cc b/third_party/blink/renderer/modules/ml/v2/ops/binary.cc index 4fab69a8afedb8..5ad165762d94ec 100644 --- a/third_party/blink/renderer/modules/ml/v2/ops/binary.cc +++ b/third_party/blink/renderer/modules/ml/v2/ops/binary.cc @@ -4,8 +4,6 @@ #include "third_party/blink/renderer/modules/ml/v2/ops/binary.h" -#include "third_party/blink/renderer/core/typed_arrays/dom_array_buffer.h" -#include "third_party/blink/renderer/core/typed_arrays/dom_typed_array.h" #include "third_party/blink/renderer/modules/ml/neural_network_context.h" #include "third_party/blink/renderer/platform/heap/heap_allocator.h" #include "third_party/blink/renderer/platform/wtf/vector.h" @@ -18,19 +16,14 @@ Binary::Binary(BinaryType type, Operand* primary, Operand* secondary) void Binary::AddLayer(NNModel* model, uint32_t& index) { // Add FuseCode Operand defined in Android NN API. uint32_t fuse_index = index++; - model->AddFuseOperand(); - // setOperandValue - int32_t fuse_code = 0; - NotShared fuse_data = - NotShared(DOMInt32Array::Create(&fuse_code, 1)); - model->SetOperandValue(fuse_index, fuse_data.View()); + model->AddScalarOperand(fuse_index, 0); // Add element-wise binary output operand. uint32_t output_index = index++; Operand::SetIndex(output_index); model->AddUnspecifiedOperand(); // addOperation - int32_t operation_type = 100; + int32_t operation_type = -1; switch (type_) { case kBinaryTypeAdd: operation_type = NeuralNetworkContext::kAdd; diff --git a/third_party/blink/renderer/modules/ml/v2/ops/constant.cc b/third_party/blink/renderer/modules/ml/v2/ops/constant.cc index 22892798ae16ea..1cd49bd3eb6e93 100644 --- a/third_party/blink/renderer/modules/ml/v2/ops/constant.cc +++ b/third_party/blink/renderer/modules/ml/v2/ops/constant.cc @@ -22,6 +22,10 @@ void Constant::AddLayer(NNModel* model, uint32_t& index) { model->SetOperandValue(constant_index, data_); } +Vector Constant::GetDimensions() { + return descriptor_->dimensions(); +} + void Constant::Trace(Visitor* visitor) const { visitor->Trace(descriptor_); visitor->Trace(data_); diff --git a/third_party/blink/renderer/modules/ml/v2/ops/constant.h b/third_party/blink/renderer/modules/ml/v2/ops/constant.h index e89b5cad9d9ffe..e99bfeddc4afb7 100644 --- a/third_party/blink/renderer/modules/ml/v2/ops/constant.h +++ b/third_party/blink/renderer/modules/ml/v2/ops/constant.h @@ -18,6 +18,7 @@ class Constant final : public Operand { ~Constant() override = default; void AddLayer(NNModel* model, uint32_t& index) override; + Vector GetDimensions() override; // Interface required by garbage collection. void Trace(Visitor*) const override; diff --git a/third_party/blink/renderer/modules/ml/v2/ops/conv.cc b/third_party/blink/renderer/modules/ml/v2/ops/conv.cc new file mode 100644 index 00000000000000..759503417adb95 --- /dev/null +++ b/third_party/blink/renderer/modules/ml/v2/ops/conv.cc @@ -0,0 +1,110 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "third_party/blink/renderer/modules/ml/v2/ops/conv.h" + +#include + +#include "third_party/blink/renderer/modules/ml/neural_network_context.h" + +namespace blink { + +namespace { + +uint32_t GetOutputChannel(Member& operand, const String& layout) { + Vector dimensions = operand->GetDimensions(); + if (dimensions.IsEmpty()) + return 0; + + if (layout == "nchw") { + return dimensions[0]; + } else { + return dimensions[3]; + } +} + +} // namespace + +Conv::Conv(Operand* input, + Operand* filter, + WTF::Vector padding, + WTF::Vector strides, + WTF::Vector dilations, + int32_t groups, + String layout) + : Output({input, filter}), + padding_(std::move(padding)), + strides_(std::move(strides)), + dilations_(std::move(dilations)), + groups_(groups), + layout_(std::move(layout)) {} + +void Conv::AddLayer(NNModel* model, uint32_t& index) { + Vector input_indexes; + // Add input and filter index to input_indexes. + for (auto& input : Output::Inputs()) { + input_indexes.push_back(input->Index()); + } + + // Add a empty bias operand that is used in Android NN API. + uint32_t bias_index = index++; + model->AddBiasOperand(bias_index, + GetOutputChannel(Output::Inputs()[1], layout_)); + input_indexes.push_back(bias_index); + + // Add padding opeand and set the value. + for (auto padding : padding_) { + uint32_t padding_index = index++; + model->AddScalarOperand(padding_index, padding); + input_indexes.push_back(padding_index); + } + + // Add strides / dilations Operand and set the value. + bool atrous = product(dilations_) != 1 ? true : false; + bool depthwise = groups_ != 1 ? true : false; + // The new design still need to be align with Android NN API in order to work + // on Android Platform, but we can only use explicit padding. + int32_t operation_type = depthwise ? NeuralNetworkContext::kDepthwiseConv2D + : NeuralNetworkContext::kConv2D; + if (atrous) { + for (auto dilation : dilations_) { + uint32_t dilation_index = index++; + model->AddScalarOperand(dilation_index, dilation); + input_indexes.push_back(dilation_index); + } + operation_type = depthwise ? NeuralNetworkContext::kAtrousDepthwiseConv2D + : NeuralNetworkContext::kAtrousConv2D; + } else { + for (auto stride : strides_) { + uint32_t stride_index = index++; + model->AddScalarOperand(stride_index, stride); + input_indexes.push_back(stride_index); + } + } + + if (depthwise) { + uint32_t depthwise_index = index++; + model->AddScalarOperand(depthwise_index, groups_); + input_indexes.push_back(depthwise_index); + } + + // Add fused code operand and set the value. + uint32_t fuse_index = index++; + model->AddScalarOperand(fuse_index, 0); + input_indexes.push_back(fuse_index); + + // Add layout operand and set the value. + uint32_t layout_index = index++; + model->AddScalarOperand(layout_index, layout_ == "nchw" ? true : false); + input_indexes.push_back(layout_index); + + // Add conv output operand. + uint32_t output_index = index++; + Operand::SetIndex(output_index); + model->AddUnspecifiedOperand(); + + model->AddOperation(operation_type, input_indexes, {output_index}); +} + +} // namespace blink diff --git a/third_party/blink/renderer/modules/ml/v2/ops/conv.h b/third_party/blink/renderer/modules/ml/v2/ops/conv.h new file mode 100644 index 00000000000000..58c47fed4c4582 --- /dev/null +++ b/third_party/blink/renderer/modules/ml/v2/ops/conv.h @@ -0,0 +1,38 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_CONV_H_ +#define THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_CONV_H_ + +#include "third_party/blink/renderer/modules/ml/v2/nn_model.h" +#include "third_party/blink/renderer/modules/ml/v2/operand.h" +#include "third_party/blink/renderer/modules/ml/v2/ops/output.h" +#include "third_party/blink/renderer/platform/wtf/vector.h" + +namespace blink { + +class Conv final : public Output { + public: + Conv(Operand*, + Operand*, + WTF::Vector, + WTF::Vector, + WTF::Vector, + int32_t, + String); + ~Conv() override = default; + + void AddLayer(NNModel* model, uint32_t& index) override; + + private: + Vector padding_; + Vector strides_; + Vector dilations_; + int32_t groups_; + String layout_; +}; + +} // namespace blink + +#endif // THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_CONV_H_ diff --git a/third_party/blink/renderer/modules/ml/v2/ops/matmul.cc b/third_party/blink/renderer/modules/ml/v2/ops/matmul.cc new file mode 100644 index 00000000000000..838241f1cfd31f --- /dev/null +++ b/third_party/blink/renderer/modules/ml/v2/ops/matmul.cc @@ -0,0 +1,47 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "third_party/blink/renderer/modules/ml/v2/ops/matmul.h" + +#include + +#include "third_party/blink/renderer/modules/ml/neural_network_context.h" + +namespace blink { + +MatMul::MatMul(Operand* a, Operand* b) : Output({a, b}) {} + +void MatMul::AddLayer(NNModel* model, uint32_t& index) { + Vector input_indexes; + // Add input index to input_indexes. + for (auto& input : Output::Inputs()) { + input_indexes.push_back(input->Index()); + } + + // We can't get the bias size. + uint32_t bias_index = index++; + model->AddUnspecifiedOperand(); + input_indexes.push_back(bias_index); + + // Add fused code operand and set the value. + uint32_t fuse_index = index++; + model->AddScalarOperand(fuse_index, 0); + input_indexes.push_back(fuse_index); + + // There are no MatMul defined in Android NN API, We use kFullyConnected + // instead of MatMul. + uint32_t matmul_index = index++; + model->AddScalarOperand(matmul_index, 0); + input_indexes.push_back(matmul_index); + + // Add MatMul output operand. + uint32_t output_index = index++; + Operand::SetIndex(output_index); + model->AddUnspecifiedOperand(); + + model->AddOperation(NeuralNetworkContext::kFullyConnected, input_indexes, + {output_index}); +} + +} // namespace blink diff --git a/third_party/blink/renderer/modules/ml/v2/ops/matmul.h b/third_party/blink/renderer/modules/ml/v2/ops/matmul.h new file mode 100644 index 00000000000000..6a5a7863e82e63 --- /dev/null +++ b/third_party/blink/renderer/modules/ml/v2/ops/matmul.h @@ -0,0 +1,24 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_MATMUL_H_ +#define THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_MATMUL_H_ + +#include "third_party/blink/renderer/modules/ml/v2/nn_model.h" +#include "third_party/blink/renderer/modules/ml/v2/operand.h" +#include "third_party/blink/renderer/modules/ml/v2/ops/output.h" + +namespace blink { + +class MatMul final : public Output { + public: + MatMul(Operand*, Operand*); + ~MatMul() override = default; + + void AddLayer(NNModel* model, uint32_t& index) override; +}; + +} // namespace blink + +#endif // THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_MATMUL_H_ diff --git a/third_party/blink/renderer/modules/ml/v2/ops/pooling.cc b/third_party/blink/renderer/modules/ml/v2/ops/pooling.cc new file mode 100644 index 00000000000000..4f570763baba6c --- /dev/null +++ b/third_party/blink/renderer/modules/ml/v2/ops/pooling.cc @@ -0,0 +1,87 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "third_party/blink/renderer/modules/ml/v2/ops/pooling.h" + +#include + +#include "third_party/blink/renderer/modules/ml/neural_network_context.h" + +namespace blink { + +Pooling::Pooling(Operand* input, + WTF::Vector window_dimensions, + WTF::Vector padding, + WTF::Vector strides, + WTF::Vector dilations, + String layout, + PoolingType type) + : Output({input}), + window_dimensions_(std::move(window_dimensions)), + padding_(std::move(padding)), + strides_(std::move(strides)), + dilations_(std::move(dilations)), + layout_(std::move(layout)), + type_(type) {} + +void Pooling::AddLayer(NNModel* model, uint32_t& index) { + Vector input_indexes; + // Add input index to input_indexes. + for (auto& input : Output::Inputs()) { + input_indexes.push_back(input->Index()); + } + + // Add padding opeand and set the value. + for (auto padding : padding_) { + uint32_t padding_index = index++; + model->AddScalarOperand(padding_index, padding); + input_indexes.push_back(padding_index); + } + + // Add strides Operand and set the value. + for (auto stride : strides_) { + uint32_t stride_index = index++; + model->AddScalarOperand(stride_index, stride); + input_indexes.push_back(stride_index); + } + + // Add filters Operand and set the value. + for (auto filter : window_dimensions_) { + uint32_t filter_index = index++; + model->AddScalarOperand(filter_index, filter); + input_indexes.push_back(filter_index); + } + + // Add fused code operand and set the value. + uint32_t fuse_index = index++; + model->AddScalarOperand(fuse_index, 0); + input_indexes.push_back(fuse_index); + + // Add layout operand and set the value. + uint32_t layout_index = index++; + model->AddScalarOperand(layout_index, layout_ == "nchw" ? 1 : 0); + input_indexes.push_back(layout_index); + + // Add conv output operand. + uint32_t output_index = index++; + Operand::SetIndex(output_index); + model->AddUnspecifiedOperand(); + + int32_t operation_type = -1; + switch (type_) { + case kPoolingTypeAverage: + operation_type = NeuralNetworkContext::kAveragePool2D; + break; + case kPoolingTypeMax: + operation_type = NeuralNetworkContext::kMaxPool2D; + break; + default: + LOG(ERROR) << "The operation isn't supported"; + NOTREACHED(); + } + + model->AddOperation(operation_type, input_indexes, {output_index}); +} + +} // namespace blink diff --git a/third_party/blink/renderer/modules/ml/v2/ops/pooling.h b/third_party/blink/renderer/modules/ml/v2/ops/pooling.h new file mode 100644 index 00000000000000..0d18931170233c --- /dev/null +++ b/third_party/blink/renderer/modules/ml/v2/ops/pooling.h @@ -0,0 +1,45 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_POOLING_H_ +#define THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_POOLING_H_ + +#include "third_party/blink/renderer/modules/ml/v2/nn_model.h" +#include "third_party/blink/renderer/modules/ml/v2/operand.h" +#include "third_party/blink/renderer/modules/ml/v2/ops/output.h" +#include "third_party/blink/renderer/platform/wtf/vector.h" + +namespace blink { + +enum PoolingType { + kPoolingTypeAverage = 0, + kPoolingTypeL2, + kPoolingTypeMax, +}; + +class Pooling final : public Output { + public: + Pooling(Operand*, + WTF::Vector, + WTF::Vector, + WTF::Vector, + WTF::Vector, + String, + PoolingType); + ~Pooling() override = default; + + void AddLayer(NNModel* model, uint32_t& index) override; + + private: + Vector window_dimensions_; + Vector padding_; + Vector strides_; + Vector dilations_; + String layout_; + PoolingType type_; +}; + +} // namespace blink + +#endif // THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_POOLING_H_ diff --git a/third_party/blink/renderer/modules/ml/v2/ops/relu.cc b/third_party/blink/renderer/modules/ml/v2/ops/relu.cc new file mode 100644 index 00000000000000..ee9bdd58fff520 --- /dev/null +++ b/third_party/blink/renderer/modules/ml/v2/ops/relu.cc @@ -0,0 +1,31 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "third_party/blink/renderer/modules/ml/v2/ops/relu.h" + +#include + +#include "third_party/blink/renderer/modules/ml/neural_network_context.h" + +namespace blink { + +Relu::Relu(Operand* input) : Output({input}) {} + +void Relu::AddLayer(NNModel* model, uint32_t& index) { + Vector input_indexes; + // Add input index to input_indexes. + for (auto& input : Output::Inputs()) { + input_indexes.push_back(input->Index()); + } + + // Add Softmax output operand. + uint32_t output_index = index++; + Operand::SetIndex(output_index); + model->AddUnspecifiedOperand(); + + model->AddOperation(NeuralNetworkContext::kRelu, input_indexes, + {output_index}); +} + +} // namespace blink diff --git a/third_party/blink/renderer/modules/ml/v2/ops/relu.h b/third_party/blink/renderer/modules/ml/v2/ops/relu.h new file mode 100644 index 00000000000000..25e71876613402 --- /dev/null +++ b/third_party/blink/renderer/modules/ml/v2/ops/relu.h @@ -0,0 +1,25 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_RELU_H_ +#define THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_RELU_H_ + +#include "third_party/blink/renderer/modules/ml/v2/nn_model.h" +#include "third_party/blink/renderer/modules/ml/v2/operand.h" +#include "third_party/blink/renderer/modules/ml/v2/ops/output.h" +#include "third_party/blink/renderer/platform/wtf/vector.h" + +namespace blink { + +class Relu final : public Output { + public: + explicit Relu(Operand*); + ~Relu() override = default; + + void AddLayer(NNModel* model, uint32_t& index) override; +}; + +} // namespace blink + +#endif // THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_RELU_H_ diff --git a/third_party/blink/renderer/modules/ml/v2/ops/reshape.cc b/third_party/blink/renderer/modules/ml/v2/ops/reshape.cc new file mode 100644 index 00000000000000..9c8d823ef7b3d7 --- /dev/null +++ b/third_party/blink/renderer/modules/ml/v2/ops/reshape.cc @@ -0,0 +1,39 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "third_party/blink/renderer/modules/ml/v2/ops/reshape.h" + +#include + +#include "third_party/blink/renderer/modules/ml/neural_network_context.h" + +namespace blink { + +Reshape::Reshape(Operand* input, WTF::Vector new_shape) + : Output({input}), new_shape_(new_shape) {} + +void Reshape::AddLayer(NNModel* model, uint32_t& index) { + Vector input_indexes; + // Add input index to input_indexes. + for (auto& input : Output::Inputs()) { + input_indexes.push_back(input->Index()); + } + + // Add new shape operand and set the value. + uint32_t new_shape_index = index++; + // The new shape is 1-D tensor. + Vector new_shape_dims(1, new_shape_.size()); + model->AddTensorOperand(new_shape_index, new_shape_dims, new_shape_); + input_indexes.push_back(new_shape_index); + + // Add Reshape output operand. + uint32_t output_index = index++; + Operand::SetIndex(output_index); + model->AddUnspecifiedOperand(); + + model->AddOperation(NeuralNetworkContext::kReshape, input_indexes, + {output_index}); +} + +} // namespace blink diff --git a/third_party/blink/renderer/modules/ml/v2/ops/reshape.h b/third_party/blink/renderer/modules/ml/v2/ops/reshape.h new file mode 100644 index 00000000000000..476bea38cd4c29 --- /dev/null +++ b/third_party/blink/renderer/modules/ml/v2/ops/reshape.h @@ -0,0 +1,28 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_RESHAPE_H_ +#define THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_RESHAPE_H_ + +#include "third_party/blink/renderer/modules/ml/v2/nn_model.h" +#include "third_party/blink/renderer/modules/ml/v2/operand.h" +#include "third_party/blink/renderer/modules/ml/v2/ops/output.h" +#include "third_party/blink/renderer/platform/wtf/vector.h" + +namespace blink { + +class Reshape final : public Output { + public: + Reshape(Operand*, WTF::Vector); + ~Reshape() override = default; + + void AddLayer(NNModel* model, uint32_t& index) override; + + private: + Vector new_shape_; +}; + +} // namespace blink + +#endif // THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_RESHAPE_H_ diff --git a/third_party/blink/renderer/modules/ml/v2/ops/softmax.cc b/third_party/blink/renderer/modules/ml/v2/ops/softmax.cc new file mode 100644 index 00000000000000..303fc914c640ac --- /dev/null +++ b/third_party/blink/renderer/modules/ml/v2/ops/softmax.cc @@ -0,0 +1,37 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "third_party/blink/renderer/modules/ml/v2/ops/softmax.h" + +#include + +#include "third_party/blink/renderer/modules/ml/neural_network_context.h" + +namespace blink { + +Softmax::Softmax(Operand* input) : Output({input}) {} + +void Softmax::AddLayer(NNModel* model, uint32_t& index) { + Vector input_indexes; + // Add input index to input_indexes. + for (auto& input : Output::Inputs()) { + input_indexes.push_back(input->Index()); + } + + // Add beta operand for scaling factor. + uint32_t beta_index = index++; + float beta = 1.0; + model->AddScalarOperand(beta_index, beta); + input_indexes.push_back(beta_index); + + // Add Softmax output operand. + uint32_t output_index = index++; + Operand::SetIndex(output_index); + model->AddUnspecifiedOperand(); + + model->AddOperation(NeuralNetworkContext::kSoftmax, input_indexes, + {output_index}); +} + +} // namespace blink diff --git a/third_party/blink/renderer/modules/ml/v2/ops/softmax.h b/third_party/blink/renderer/modules/ml/v2/ops/softmax.h new file mode 100644 index 00000000000000..6a7f8826dae2ac --- /dev/null +++ b/third_party/blink/renderer/modules/ml/v2/ops/softmax.h @@ -0,0 +1,25 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_SOFTMAX_H_ +#define THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_SOFTMAX_H_ + +#include "third_party/blink/renderer/modules/ml/v2/nn_model.h" +#include "third_party/blink/renderer/modules/ml/v2/operand.h" +#include "third_party/blink/renderer/modules/ml/v2/ops/output.h" +#include "third_party/blink/renderer/platform/wtf/vector.h" + +namespace blink { + +class Softmax final : public Output { + public: + explicit Softmax(Operand*); + ~Softmax() override = default; + + void AddLayer(NNModel* model, uint32_t& index) override; +}; + +} // namespace blink + +#endif // THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_SOFTMAX_H_ diff --git a/third_party/blink/renderer/modules/ml/v2/ops/transpose.cc b/third_party/blink/renderer/modules/ml/v2/ops/transpose.cc new file mode 100644 index 00000000000000..6458d5ef221edf --- /dev/null +++ b/third_party/blink/renderer/modules/ml/v2/ops/transpose.cc @@ -0,0 +1,41 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "third_party/blink/renderer/modules/ml/v2/ops/transpose.h" + +#include + +#include "third_party/blink/renderer/modules/ml/neural_network_context.h" + +namespace blink { + +Transpose::Transpose(Operand* input, WTF::Vector permutation) + : Output({input}), permutation_(permutation) {} + +void Transpose::AddLayer(NNModel* model, uint32_t& index) { + Vector input_indexes; + // Add input index to input_indexes. + for (auto& input : Output::Inputs()) { + input_indexes.push_back(input->Index()); + } + + // Add permutation operand and set the value. + if (!permutation_.IsEmpty()) { + uint32_t permutation_index = index++; + // The new shape is 1-D tensor. + Vector permutation_dims(1, permutation_.size()); + model->AddTensorOperand(permutation_index, permutation_dims, permutation_); + input_indexes.push_back(permutation_index); + } + + // Add Reshape output operand. + uint32_t output_index = index++; + Operand::SetIndex(output_index); + model->AddUnspecifiedOperand(); + + model->AddOperation(NeuralNetworkContext::kTranspose, input_indexes, + {output_index}); +} + +} // namespace blink diff --git a/third_party/blink/renderer/modules/ml/v2/ops/transpose.h b/third_party/blink/renderer/modules/ml/v2/ops/transpose.h new file mode 100644 index 00000000000000..f8604a1b2f5d5b --- /dev/null +++ b/third_party/blink/renderer/modules/ml/v2/ops/transpose.h @@ -0,0 +1,28 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_TRANSPOSE_H_ +#define THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_TRANSPOSE_H_ + +#include "third_party/blink/renderer/modules/ml/v2/nn_model.h" +#include "third_party/blink/renderer/modules/ml/v2/operand.h" +#include "third_party/blink/renderer/modules/ml/v2/ops/output.h" +#include "third_party/blink/renderer/platform/wtf/vector.h" + +namespace blink { + +class Transpose final : public Output { + public: + Transpose(Operand*, WTF::Vector); + ~Transpose() override = default; + + void AddLayer(NNModel* model, uint32_t& index) override; + + private: + Vector permutation_; +}; + +} // namespace blink + +#endif // THIRD_PARTY_BLINK_RENDERER_MODULES_ML_OPS_TRANSPOSE_H_