Data Classification Example
# Package imports import numpy as np import matplotlib....

Data Classification Example

# Package imports
import numpy as np
import matplotlib.pyplot as plt
#from testCases_v2 import *
import sklearn
import sklearn.datasets
import sklearn.linear_model

def layer_sizes(X, Y):
    n_x = X.shape[0] # size of input layer, representing the traits we have 
    n_h = 4          # we assume that the hidden layers have four units
    n_y = Y.shape[0] # size of output layer
    return (n_x, n_h, n_y)

def initialize_parameters(n_x, n_h, n_y):
    W1 = np.random.randn(n_h,n_x)*0.01   #in neural network, the W1 need not to be transposed
    b1 = np.zeros((n_h,1))    #b is initialized with zeros and pay attention to the dimensions
    W2 = np.random.randn(n_y,n_h)*0.01
    b2 = np.random.randn(n_y,1)

    assert (W1.shape == (n_h, n_x))
    assert (b1.shape == (n_h, 1))
    assert (W2.shape == (n_y, n_h))
    assert (b2.shape == (n_y, 1))

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    return parameters

def forward_propagation(X, parameters):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    Z1 =,X)+b1
    A1 = np.tanh(Z1)
    Z2 =,A1)+b2
    A2 = sigmoid(Z2)

    assert(A2.shape == (1, X.shape[1]))
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    return A2, cache

def compute_cost(A2, Y, parameters):
    m = Y.shape[1]

    logprobs = np.multiply(np.log(A2),Y)
    cost = -np.sum(logprobs)

    cost = np.squeeze(cost)    
    assert(isinstance(cost, float))
    return cost

def backward_propagation(parameters, cache, X, Y):
    #the input parameters is just the result of the initialize function
    #in many places we need to first get the number of the example trained
    m = X.shape[1]   

    W1 = parameters["W1"]
    W2 = parameters["W2"]

    A1 = cache["A1"]
    A2 = cache["A2"]

    #The deduction of the six formulas is very important 
    dZ2 = A2 - Y
    dW2 =,A1.T)/m
    db2 = np.sum(dZ2,axis = 1,keepdims = True)/m
    dZ1 =,dZ2)*(1-np.power(A1,2)) 
    dW1 =,X.T)/m
    db1 = np.sum(dZ1,axis = 1,keepdims = True)/m

    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    return grads

def update_parameters(parameters, grads, learning_rate = 1.2):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]

    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*db1
    W2 = W2 - learning_rate*dW2
    b2 = b2 - learning_rate*db2

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    return parameters

def nn_model(X, Y, n_h, num_iterations = 10000, print_cost=False):
    n_x = layer_sizes(X, Y)[0]
    n_y = layer_sizes(X, Y)[2]

    parameters = initialize_parameters(n_x,n_h,n_y)
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    for i in range(0, num_iterations):
        # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".
        A2, cache = forward_propagation(X,parameters)
        # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".
        cost = compute_cost(A2,Y,parameters)
        # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
        grads = backward_propagation(parameters,cache,X,Y)
        # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
        parameters = update_parameters(parameters,grads)
        # Print the cost every 1000 iterations
        if print_cost and i % 1000 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))

    return parameters

def predict(parameters, X):
    A2, cache = forward_propagation(X,parameters)
    predictions = (A2 > 0.5)
    return predictions


Actually logistic regression can be seen as a one-layer neural network. In logistic regression, there is only one activation function and there is no hidden layer.
The methods used in logistic regression is applicable in the neural network with several hidden layers.
Compared with the code in last article, we can find that the propagation function is separated into forward_propagation and backward_propagation and the optimize function is separated into the update_parameters function and the loop is placed in the model function.
It has to be pointed out that in this case the Y contains the labels and the X contains the features, which is different from last case.
The two propagation functions have strong connections. The result of the back propagation helps the forward one to predict and the result of the forward propagation helps the back one compute the grads.
We need have a overview of the problem and then split it into many small pieces and finally merge the regional solutions into one model function.
Dictionary is often used to transmit the parameters between functions.
Assertions are important to keep the dimensions, avoiding many subtle bugs.
Through the practice we find that with the increase of the units in hidden layers, the accuracy will first increase but then decreases.

Reference: the deep learning course provided by Andrew Ng on Coursera

Last modification:March 13th, 2019 at 07:06 pm
If you think my article is useful to you, please feel free to appreciate

Leave a Comment