Tensorflow 2.3：神经网络回归问题-成本不变

如何解决Tensorflow 2.3：神经网络回归问题-成本不变

我正在研究一种神经网络，该网络可以根据使用情况预测锂离子电池的充电状态。这是图片表示。

我已经修改了吴安德（Andrew Ng）关于“改善神经网络...”的课程中的代码。

我的问题是：成本函数保持不变。

这是代码。

   """Importing necessary packages."""

import numpy as np
import h5py
import matplotlib.pyplot as plt
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import scipy.io
import pandas as pd

print(tf.__version__)

"""Loading the training data. Need to run the MATLAB script and generate training data files: X_train_set.mat and Y_train_set.mat
These 2 .mat files will be imported and renamed appropriately.
"""

input_data=scipy.io.loadmat(X_train_set.mat)
label=scipy.io.loadmat(Y_train_set.mat)
X_train=input_data['X_train_set']
Y=label['Y_train_set']

"""Entering the layer information. First layer is input,followed by first hidden layer,second hidden layer,..and ultimately the output layer."""

layers_dims=[4,4,1]

def create_placeholders(n_x,n_y):
  """Create placeholder for inputs and outputs.
   
    Arguments: 
    n_x:number of features in the input vector(Current,temp etc) 
    n_y: number of features in output vector(SOC)
   
    Returns:
    Input matrix X and Output matrix Y
  """

  X = tf.compat.v1.placeholder(tf.float64,shape=(n_x,None),name="X")
  Y = tf.compat.v1.placeholder(tf.float64,shape=(n_y,name="Y")

  return X,Y

def initialize_parameters_deep(layer_dims):
    """
    Arguments:
    layer_dims -- python array (list) containing the dimensions of each layer in our network
    
    Returns:
    parameters -- python dictionary containing your parameters "W1","b1",...,"WL","bL":
                    Wl -- weight matrix of shape (layer_dims[l],layer_dims[l-1])
                    bl -- bias vector of shape (layer_dims[l],1)
    """
    
    parameters = {}
    L = len(layer_dims)            # number of layers in the network including the input layer
     
    tf.random.set_seed(1)
    
    for l in range(1,L):
        
        # shape=(layer_dims[l],layer_dims[l-1])
       
        initializer_w = tf.keras.initializers.GlorotUniform(seed=3)  #initializer for weight matrices
        # temp = tf.Variable(initializer_w((layers_dims[l],layers_dims[l-1])))
        # tf.dtypes.cast(temp,tf.float64)
        parameters['W' + str(l)] = tf.Variable(initializer_w((layers_dims[l],layers_dims[l-1]),dtype=tf.float64)) 
        
        
        # shape=(layer_dims[l],1)
        initializer_b = tf.zeros_initializer()     #initializer for bias matrices          
        parameters['b' + str(l)] = tf.Variable(initializer_b((layers_dims[l],1),dtype=tf.float64))
        
        
        assert(parameters['W' + str(l)].shape == (layer_dims[l],layer_dims[l-1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l],1))
        
        print("shape of W"+str(l)+" is",parameters['W' + str(l)].shape)
        print("shape of b"+str(l)+" is",parameters['b' + str(l)].shape)
        
    return parameters

"""Now,we create necessary functions for forward propogation."""

def linear_activation_forward(A_prev,W,b):
    """
    Implement the forward propagation for the LINEAR->ACTIVATION layer

    Arguments:
    A_prev -- activations from prevIoUs layer (or input data): (size of prevIoUs layer,number of examples)
    W -- weights matrix: numpy array of shape (size of current layer,size of prevIoUs layer)
    b -- bias vector,numpy array of shape (size of the current layer,1)
    
    Returns:
    A -- the output of the activation function,also called the post-activation value 
    cache -- a python tuple containing "linear_cache" and "activation_cache";
             stored for computing the backward pass efficiently
    """
    
    # Inputs: "A_prev,b". Outputs: "A,activation_cache".
    

    
    Z = tf.add(tf.matmul(W,A_prev),b)
    A = tf.nn.relu(Z)  
   

    return Z,A

def L_model_forward(X,parameters):
    """
    Implement forward propagation for the [LINEAR->RELU]*(L) times
    
    Arguments:
    X -- data,numpy array of shape (input size,number of examples)
    parameters -- output of initialize_parameters_deep()
    
    Returns:
    AL -- last post-activation value
    caches -- list of caches containing:
                every cache of linear_activation_forward() (there are L-1 of them,indexed from 0 to L-1)
    """

    caches = []
    A = X
    L = len(parameters) // 2 + 1                 # number of layers in the neural network
    
    # Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.
    for l in range(1,L):
        A_prev = A 
        ### START CODE HERE ### (≈ 2 lines of code)
        Z,A = linear_activation_forward(A_prev,parameters['W' + str(l)],parameters['b' + str(l)])
        print("shape of Z"+str(l)+"is",Z.shape)
        print("shape of A"+str(l)+"is",A.shape)
    
        ### END CODE HERE ###
    
    ZL=Z
    AL=A

    print("Shape of AL is",AL.shape)
    
    #assert(AL.shape == (1,X.shape[1]))
            
    return ZL,AL

"""Now,we define the cost function."""

def compute_cost(AL,Y):
    """
    Implement the cost function.

    Arguments:
    AL -- Predicted SOC(1,number of examples)
    Y -- Actual SOC values(1,number of examples)

    Returns:
    cost -- cross-entropy cost
    """   
    
    cost = tf.reduce_mean(tf.math.squared_difference(AL,Y))
    #print("data type of cost in compute_cost:" +str(type(cost)))
    
    
    return cost

def L_layer_model(X_train_data,Y_train_data,layers_dims,learning_rate = 0.0075,num_iterations = 3000):  #lr was 0.009
    """
    Implements a L-layer neural network.
    
    Arguments:
    X -- data,numpy array of shape (4(number of features: Temp,current,avg current,avg temp),number of examples)
    Y -- true SOC vector,of shape (1,number of examples)
    layers_dims -- list containing the input size and each layer size,of length (number of layers + 1).
    learning_rate -- learning rate of the gradient descent update rule
    num_iterations -- number of iterations of the optimization loop
    print_cost -- if True,it prints the cost every 100 steps
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """

    #tf.compat.v1.reset_default_graph()     # to be able to rerun the model without overwriting tf variables
    tf.random.set_seed(1)          # to keep consistent results
    seed=3                         # to keep consistent results
    (n_x,m)=X_train_data.shape
    n_y=Y_train_data.shape[0]
    costs = []                     # keep track of cost
    
    
    #Create placeholders for X and Y
    X,Y = create_placeholders(n_x,n_y)
    # X = tf.compat.v1.placeholder(shape=[n_x,None],dtype=tf.float64)
    # Y = tf.compat.v1.placeholder(shape=[n_y,dtype=tf.float64)
    
    # Parameters initialization    
    parameters = initialize_parameters_deep(layers_dims)

    # Forward propagation: Build the forward propagation in the tensorflow graph
    ZL,AL = L_model_forward(X,parameters)
    
    # Cost function: Add cost function to tensorflow graph
    cost = compute_cost(AL,Y)
    # print(type(cost))
    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
    optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
    # print(type(cost))
    # Initialize all the variables
    init = tf.compat.v1.global_variables_initializer()
    
    # Start the session to compute the tensorflow graph
    with tf.compat.v1.Session() as sess:
         
        # Run the initialization
        sess.run(init)
      
        # Do the training loop
        for i in range(0,num_iterations):
            #epoch_loss = 0
            
            _ = sess.run(optimizer,Feed_dict={X : X_train_data,Y : Y_train_data})
            #X_train = tf.convert_to_tensor(X_train,np.float64)

            #epoch_loss = epoch_loss + c
        # Print the cost every 100 training example
            if i % 100 == 0:
                c = sess.run(cost,Y : Y_train_data})
                print ("Cost after iteration %i: %f" %(i,c))
                costs.append(c)
            
    # plot the cost
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per hundreds)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()
    
    return parameters

"""Below we actually run the model using tensorflow."""

parameters=L_layer_model(X_train,Y,learning_rate=0.001,num_iterations=1000)

这是输出。为了清晰起见，我打印了每个矩阵的形状。

shape of W1 is (4,4)
shape of b1 is (4,1)
shape of W2 is (4,4)
shape of b2 is (4,1)
shape of W3 is (1,4)
shape of b3 is (1,1)
shape of Z1is (4,None)
shape of A1is (4,None)
shape of Z2is (4,None)
shape of A2is (4,None)
shape of Z3is (1,None)
shape of A3is (1,None)
Shape of AL is (1,None)
Cost after iteration 0: 0.367816
Cost after iteration 100: 0.367816
Cost after iteration 200: 0.367816
Cost after iteration 300: 0.367816
Cost after iteration 400: 0.367816
Cost after iteration 500: 0.367816
Cost after iteration 600: 0.367816
Cost after iteration 700: 0.367816
Cost after iteration 800: 0.367816
Cost after iteration 900: 0.367816

Graph of cost v/s iterations