implement _SwitchGrad when merge_grad is not null.

Oceania2018 · Oceania2018 · commit ad250d0c796b · 2019-11-03T10:42:17.000-06:00
diff --git a/src/TensorFlowNET.Core/Gradients/control_flow_grad.cs b/src/TensorFlowNET.Core/Gradients/control_flow_grad.cs
@@ -48,7 +48,12 @@ public static Tensor[] _SwitchGrad(Operation op, Tensor[] grads)
                     {
                         var merge_grad = grad_ctxt.grad_state.switch_map.get(op);
                         if (merge_grad != null)
-                            throw new NotImplementedException("_SwitchGrad merge_grad != null");
+                        {
+                            if (grads[1] != null)
+                                control_flow_ops._AddNextAndBackEdge(merge_grad, grads[1],
+                                             enforce_shape_invariant: false);
+                            return new Tensor[] { null, null };
+                        }
                         else if (grads[0] != null)
                         {
                             merge_grad = merge(new[] { grads[0], grads[0] }, name: "b_switch")[0];
@@ -233,17 +238,9 @@ public static Tensor[] _EnterGrad(Operation op, Tensor[] grads)
                 return grads;
             if (op.get_attr<bool>("is_constant"))
             {
-                throw new NotImplementedException("_EnterGrad is_constant");
-                //  Add a gradient accumulator for each loop invariant.
-                //    if isinstance(grad, ops.Tensor) :
-                //      result = grad_ctxt.AddBackpropAccumulator(op, grad)
-                //    elif isinstance(grad, ops.IndexedSlices) :
-                //      result = grad_ctxt.AddBackpropIndexedSlicesAccumulator(op, grad)
-                //    else:
-                //      # TODO(yuanbyu, lukasr): Add support for SparseTensor.
-                //      raise TypeError("Type %s not supported" % type(grad))
+                // Add a gradient accumulator for each loop invariant.
+                result = grad_ctxt.AddBackpropAccumulator(op, grad);
             }
-
             else
             {
                 result = control_flow_ops.exit(grad);
diff --git a/src/TensorFlowNET.Core/Gradients/gradients_util.cs b/src/TensorFlowNET.Core/Gradients/gradients_util.cs
@@ -123,10 +123,7 @@ public static Tensor[] _GradientsHelper(Tensor[] ys,
                     {
                         // generate gradient subgraph for op.
                         var op = queue.Dequeue();
-                        if(op.name == "rnn/while/Exit")
-                        {
 
-                        }
                         _maybe_colocate_with(op, gradient_uid, colocate_gradients_with_ops);
                         {
                             if (loop_state != null)
@@ -136,15 +133,14 @@ public static Tensor[] _GradientsHelper(Tensor[] ys,
                                 loop_state.ExitGradWhileContext(op, before: true);
 
                             Tensor[] in_grads = null;
+                            Func<Operation, Tensor[], Tensor[]> grad_fn = null;
                             var is_partitioned_call = _IsPartitionedCall(op);
                             var is_func_call = false;
                             var has_out_grads = out_grads.Exists(x => x != null);
                             if (has_out_grads && !stop_ops.Contains(op))
                             {
                                 // A grad_fn must be defined, either as a function or as None
                                 // for ops that do not have gradients.
-
-                                Func<Operation, Tensor[], Tensor[]> grad_fn = null;
                                 try
                                 {
                                     grad_fn = ops.get_gradient_function(op);
@@ -167,61 +163,57 @@ public static Tensor[] _GradientsHelper(Tensor[] ys,
                                         throw new LookupError($"No gradient defined for operation '{op.name}' (op type: {op.type})");
                                     }
                                 }
+                            }
 
-                                if (loop_state != null)
-                                    loop_state.EnterGradWhileContext(op, before: false);
+                            if (loop_state != null)
+                                loop_state.EnterGradWhileContext(op, before: false);
 
-                                if ((is_func_call || grad_fn != null) && has_out_grads)
+                            if ((is_func_call || grad_fn != null) && has_out_grads)
+                            {
+                                // NOTE: If _AggregatedGrads didn't compute a value for the i'th
+                                // output, it means that the cost does not depend on output[i],
+                                // therefore dC/doutput[i] is 0.
+                                foreach (var (i, out_grad) in enumerate(out_grads))
                                 {
-                                    // NOTE: If _AggregatedGrads didn't compute a value for the i'th
-                                    // output, it means that the cost does not depend on output[i],
-                                    // therefore dC/doutput[i] is 0.
-                                    foreach (var (i, out_grad) in enumerate(out_grads))
-                                    {
-                                        if (out_grad == null &&
-                                            (grad_fn == null || _IsTrainable(op.outputs[i])))
-                                        {
-                                            // Only trainable outputs or outputs for a function call that
-                                            // will use SymbolicGradient get a zero gradient. Gradient
-                                            // functions should ignore the gradient for other outputs.
-                                            if (loop_state != null)
-                                                out_grads[i] = new List<Tensor> { loop_state.ZerosLike(op, i) };
-                                            else
-                                                out_grads[i] = new List<Tensor> { control_flow_ops.ZerosLikeOutsideLoop(op, i) };
-                                        }
-                                    }
-
-                                    tf_with(ops.name_scope(op.name + "_grad"), scope1 =>
+                                    if (out_grad == null &&
+                                        (grad_fn == null || _IsTrainable(op.outputs[i])))
                                     {
-                                        if (grad_fn != null)
-                                        {
-                                            in_grads = _MaybeCompile(grad_scope,
-                                                op,
-                                                out_grads.Where(x => x != null).Select(x => x[0]).ToArray(),
-                                                null,
-                                                grad_fn);
-                                        }
+                                        // Only trainable outputs or outputs for a function call that
+                                        // will use SymbolicGradient get a zero gradient. Gradient
+                                        // functions should ignore the gradient for other outputs.
+                                        if (loop_state != null)
+                                            out_grads[i] = new List<Tensor> { loop_state.ZerosLike(op, i) };
                                         else
-                                        {
-                                            throw new NotImplementedException("lambda: _SymGrad(op, out_grads)");
-                                        }
-                                        _VerifyGeneratedGradients(in_grads, op);
-                                        if (gate_gradients && in_grads.Count(x => x != null) > 1)
-                                        {
-                                            ops._colocate_with_for_gradient(null, gradient_uid, ignore_existing: true);
-                                            in_grads = control_flow_ops.tuple(in_grads);
-                                        }
-                                    });
+                                            out_grads[i] = new List<Tensor> { control_flow_ops.ZerosLikeOutsideLoop(op, i) };
+                                    }
                                 }
-                                else
+
+                                tf_with(ops.name_scope(op.name + "_grad"), scope1 =>
                                 {
-                                    // If no grad_fn is defined or none of out_grads is available,
-                                    // just propagate a list of None backwards.
-                                    in_grads = new Tensor[_NonEagerInputs(op, xs).Count()];
-                                }
+                                    if (grad_fn != null)
+                                    {
+                                        in_grads = _MaybeCompile(grad_scope,
+                                            op,
+                                            out_grads.Where(x => x != null).Select(x => x[0]).ToArray(),
+                                            null,
+                                            grad_fn);
+                                    }
+                                    else
+                                    {
+                                        throw new NotImplementedException("lambda: _SymGrad(op, out_grads)");
+                                    }
+                                    _VerifyGeneratedGradients(in_grads, op);
+                                    if (gate_gradients && in_grads.Count(x => x != null) > 1)
+                                    {
+                                        ops._colocate_with_for_gradient(null, gradient_uid, ignore_existing: true);
+                                        in_grads = control_flow_ops.tuple(in_grads);
+                                    }
+                                });
                             }
                             else
                             {
+                                // If no grad_fn is defined or none of out_grads is available,
+                                // just propagate a list of None backwards.
                                 in_grads = new Tensor[_NonEagerInputs(op, xs).Count()];
                             }
 
@@ -370,7 +362,16 @@ private static void _SetGrad(Dictionary<string, List<List<Tensor>>> grads, Tenso
                 grads[op.name] = op_grads;
             }
             var t_grads = op_grads[t.value_index];
-            t_grads.Add(grad);
+            if (t_grads.Count == 0)
+                t_grads.Add(grad);
+            else
+                op_grads[t.value_index][0] = grad;
+
+            /*if (control_flow_util.IsLoopSwitch(op) &&
+                t_grads[0] == null)
+                op_grads[t.value_index] = new List<Tensor> { grad };
+            else
+                t_grads.Add(grad);*/
         }
 
         private static IEnumerable<Tensor> _NonEagerInputs(Operation op, Tensor[] xs)
@@ -379,15 +380,19 @@ private static IEnumerable<Tensor> _NonEagerInputs(Operation op, Tensor[] xs)
                 yield return op.inputs[i];
         }
 
-        private static List<List<Tensor>> _AggregatedGrads(Dictionary<string, List<List<Tensor>>> grads, Operation op, string gradient_uid, object loop_state, int aggregation_method = 0)
+        private static List<List<Tensor>> _AggregatedGrads(Dictionary<string, List<List<Tensor>>> grads, Operation op, string gradient_uid, 
+            ControlFlowState loop_state, int aggregation_method = 0)
         {
             var out_grads = _GetGrads(grads, op);
 
             foreach (var (i, out_grad) in enumerate(out_grads))
             {
                 if (loop_state != null)
                 {
-
+                    if (out_grads.Count > 1 && 
+                        out_grads[1].Count > 0 && 
+                        control_flow_util.IsLoopSwitch(op))
+                        continue;
                 }
 
                 // Aggregate multiple gradients, and convert [] to None.
diff --git a/src/TensorFlowNET.Core/Operations/Operation.cs b/src/TensorFlowNET.Core/Operations/Operation.cs
@@ -182,10 +182,7 @@ public Operation(NodeDef node_def, Graph g, Tensor[] inputs = null, TF_DataType[
             // This will be set by self.inputs.
             if (op_def == null)
                 op_def = g.GetOpDef(node_def.Op);
-            if(node_def.Name == "gradients/rnn/while/basic_rnn_cell/Tanh_grad/TanhGrad/f_acc")
-            {
-
-            }
+
             var grouped_inputs = _reconstruct_sequence_inputs(op_def, inputs, node_def.Attr);
             _handle = ops._create_c_op(g, node_def, grouped_inputs, control_input_ops.ToArray());
             _is_stateful = op_def.IsStateful;