import tensorflow as tf def loss(nextState, actions): # return tf.reduce_sum(tf.square(nextState[:, 2:, 0] * (0.5 * (nextState[:, 0] + 0.25 * nextState[:, 1] - actions))), axis=1) return tf.reduce_mean(tf.square(nextState[:, 0] + 0.25 * nextState[:, 1] - tf.reduce_sum( nextState[:, 2:6, 0] * (actions[:, :4] + actions[:, 4:]), axis=1))) + tf.reduce_mean( tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0) def loss2(nextState, actions): # return tf.reduce_sum(tf.square(nextState[:, 2:, 0] * (0.5 * (nextState[:, 0] + 0.25 * nextState[:, 1] - actions))), axis=1) # return 0.1 * tf.reduce_mean(tf.square(0.75 * nextState[:, 1] - tf.reduce_sum(nextState[:, 2:6, 0] * (actions[:, 4:] + actions[:, :4]),axis=1))) + 0.9 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0) # return 0.0 * tf.reduce_mean(tf.square(0.75 * nextState[:, 1] - tf.reduce_sum(nextState[:, 2:6, 0] * (actions[:, :4]),axis=1))) + 1.0 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0) return tf.reduce_mean( tf.reduce_max(nextState[:, 2:6, 0] * tf.square((nextState[:, 6:, 0] - (actions[:, :4] + actions[:, 4:]))), axis=1), axis=0) # action = nextState[:, 3] * 1 + nextState[:, 4] * 2 + nextState[:, 5] * 3 # action = tf.cast(action, tf.int32) # action = tf.reshape(action, (-1,)) # # # test = actions[:, action[:]] # # test1 = tf.slice(actions[:, :4], action, (-1, 1)) # test2 = tf.slice(actions[:, 4:], action, (-1, 1)) # # return 1.0 * tf.reduce_mean(tf.reduce_sum(tf.square((0.1 * nextState[:, 1] + nextState[:, 6:, 0]) - (test1 + test2)), axis=1)) + 0.0 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0) # return 1.0 * tf.reduce_mean(tf.reduce_sum(tf.square((0.1 * nextState[:, 1] + nextState[:, 6:, 0]) - (actions[:, :4] + actions[:, 4:])), axis=1)) + 0.0 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0) def loss3(target, pred): return tf.reduce_mean(0.5 * tf.square(0.1 * target[:, 0, 0] + target[:, 1, 0] - (pred[:, 0] + pred[:, 1])) + 0.5 * tf.square(target[:, 1, 0] - pred[:, 0]), axis=0)