From 3805771121162c3e0806198acd18619c6cd6394b Mon Sep 17 00:00:00 2001 From: Beacontownfc <19636977267@qq.com> Date: Thu, 22 Jun 2023 05:53:10 +0000 Subject: [PATCH 1/2] improve layer norm --- src/TensorFlowNET.Core/APIs/tf.nn.cs | 18 +++++++++++++++ .../Normalization/LayerNormalization.cs | 15 ++++++++++++- .../Layers/LayersTest.cs | 22 +++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/TensorFlowNET.Core/APIs/tf.nn.cs b/src/TensorFlowNET.Core/APIs/tf.nn.cs index e0c29bfa7..08b88c3d6 100644 --- a/src/TensorFlowNET.Core/APIs/tf.nn.cs +++ b/src/TensorFlowNET.Core/APIs/tf.nn.cs @@ -14,8 +14,10 @@ You may obtain a copy of the License at limitations under the License. ******************************************************************************/ +using System.Xml.Linq; using Tensorflow.Operations; using Tensorflow.Operations.Activation; +//using static System.Formats.Asn1.AsnWriter; using static Tensorflow.Binding; namespace Tensorflow @@ -125,6 +127,22 @@ public Tensor[] fused_batch_norm(Tensor x, is_training: is_training, name: name, exponential_avg_factor: exponential_avg_factor); + public Tensor batch_normalization(Tensor x, + Tensor mean, + Tensor variance, + Tensor offset, + Tensor scale, + float variance_epsilon, + string name = null) + { + var inv = math_ops.rsqrt(variance + variance_epsilon); + tf_with(ops.name_scope(name, "batchnorm", (x, mean, variance, scale, offset)), scope => + { + if (scale != null) inv *= scale; + }); + if (offset != null) return x * math_ops.cast(inv, x.dtype) + math_ops.cast(offset - mean * inv, dtype: x.dtype); + else return x * math_ops.cast(inv, x.dtype) + math_ops.cast(-mean * inv, dtype: x.dtype); + } public Tensor max_pool(Tensor value, int[] ksize, int[] strides, string padding, string data_format = "NHWC", string name = null) => nn_ops.max_pool(value, ksize, strides, padding, data_format: data_format, name: name); diff --git a/src/TensorFlowNET.Keras/Layers/Normalization/LayerNormalization.cs b/src/TensorFlowNET.Keras/Layers/Normalization/LayerNormalization.cs index 1898f24c8..69bdfbaa0 100644 --- a/src/TensorFlowNET.Keras/Layers/Normalization/LayerNormalization.cs +++ b/src/TensorFlowNET.Keras/Layers/Normalization/LayerNormalization.cs @@ -153,9 +153,22 @@ protected override Tensors Call(Tensors inputs, Tensors state = null, bool? trai } else { + var input_dtype = inputs.dtype; + if ((input_dtype == tf.float16) && DType == tf.float32) inputs = tf.cast(inputs, tf.float32); + (Tensor mean, Tensor variance) = tf.nn.moments(inputs, axis, keep_dims: true); - } + (Tensor scale, Tensor offset) = (_broadcast(gamma), _broadcast(beta)); + + outputs = tf.nn.batch_normalization( + inputs, + mean, + variance, + offset: offset, + scale: scale, + variance_epsilon: epsilon); + outputs = tf.cast(outputs, input_dtype); + } // If some components of the shape got lost due to adjustments, fix that. outputs.shape = input_shape; diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs index f4980b82d..98d909668 100644 --- a/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs +++ b/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs @@ -1,5 +1,7 @@ using Microsoft.VisualStudio.TestTools.UnitTesting; +using System; using System.Collections.Generic; +using System.Linq; using Tensorflow.NumPy; using static Tensorflow.Binding; using static Tensorflow.KerasApi; @@ -161,6 +163,26 @@ public void LayerNormalization() Tensor output = layer.Apply(inputs); Assert.AreEqual((5, 2), output.shape); Assert.IsTrue(output[0].numpy().Equals(new[] { -0.99998f, 0.99998f })); + + // test_layernorm_weights + Assert.AreEqual(len(layer.TrainableWeights), 2); + Assert.AreEqual(len(layer.Weights), 2); + + var beta = layer.Weights.Where(x => x.Name.StartsWith("beta")).Single(); + var gamma = layer.Weights.Where(x => x.Name.StartsWith("gamma")).Single(); + + // correctness_test + layer = keras.layers.LayerNormalization(axis: -1, epsilon: (float) 1e-12); + var x = np.random.normal(loc: 5.0f, scale: 10.0f, size: (1000, 2, 2, 2)).astype(tf.float32); + + output = layer.Apply(x); + + var y = (output - beta.numpy()) / gamma.numpy(); + + var y_mean = np.mean(y.numpy()); + var y_std = np.sqrt(np.sum(np.power(y.numpy() - np.mean(y.numpy()), 2)) / 8000); + Assert.IsTrue(tf.greater(np.array(0.1f), tf.abs(y_std - 1.0)).ToArray()[0]); + Assert.IsTrue(tf.greater(np.array(0.1f), tf.abs(y_mean)).ToArray()[0]); } /// From 786b26602ff502284f56d85586961fb9f824cc22 Mon Sep 17 00:00:00 2001 From: Beacontownfc <19636977267@qq.com> Date: Thu, 22 Jun 2023 07:15:08 +0000 Subject: [PATCH 2/2] Modify according to the reviewer's comments --- src/TensorFlowNET.Core/APIs/tf.nn.cs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/TensorFlowNET.Core/APIs/tf.nn.cs b/src/TensorFlowNET.Core/APIs/tf.nn.cs index 08b88c3d6..e5cd4e569 100644 --- a/src/TensorFlowNET.Core/APIs/tf.nn.cs +++ b/src/TensorFlowNET.Core/APIs/tf.nn.cs @@ -17,7 +17,6 @@ limitations under the License. using System.Xml.Linq; using Tensorflow.Operations; using Tensorflow.Operations.Activation; -//using static System.Formats.Asn1.AsnWriter; using static Tensorflow.Binding; namespace Tensorflow @@ -127,6 +126,18 @@ public Tensor[] fused_batch_norm(Tensor x, is_training: is_training, name: name, exponential_avg_factor: exponential_avg_factor); + + /// + /// Normalizes a tensor by `mean` and `variance`, and applies (optionally) a`scale` \\(\gamma\\) to it, as well as an `offset` \\(\beta\\). + /// + /// A floating point tensor. + /// A mean `Tensor`. + /// A variance `Tensor`. + /// An offset `Tensor`, often denoted \\(\beta\\) in equations, or NULL. If present, will be added to the normalized tensor. + /// A scale `Tensor`, often denoted \\(\gamma\\) in equations, or NULL. If present, the scale is applied to the normalized tensor. + /// A small float number to avoid dividing by 0. + /// A name for this operation. + /// the normalized, scaled, offset tensor. public Tensor batch_normalization(Tensor x, Tensor mean, Tensor variance,