|
- #!/usr/bin/env python
- # -*- coding=utf-8 -*-
- # @author: 陈水平
- # @date: 2017-01-04
- # @description: compare the logistics regression of tensorflow with sklearn based on the exercise of deep learning course of Andrew Ng.
- # @ref: http://openclassroom.stanford.edu/MainFolder/DocumentPage.php?course=DeepLearning&doc=exercises/ex4/ex4.html
- import tensorflow as tf
- import numpy as np
- from sklearn.linear_model import LogisticRegression
- from sklearn import preprocessing
- # Read x and y
- x_data = np.loadtxt("ex4x.dat").astype(np.float32)
- y_data = np.loadtxt("ex4y.dat").astype(np.float32)
- scaler = preprocessing.StandardScaler().fit(x_data)
- x_data_standard = scaler.transform(x_data)
- # We evaluate the x and y by sklearn to get a sense of the coefficients.
- reg = LogisticRegression(C=999999999, solver="newton-cg") # Set C as a large positive number to minimize the regularization effect
- reg.fit(x_data, y_data)
- print "Coefficients of sklearn: K=%s, b=%f" % (reg.coef_, reg.intercept_)
- # Now we use tensorflow to get similar results.
- W = tf.Variable(tf.zeros([2, 1]))
- b = tf.Variable(tf.zeros([1, 1]))
- y = 1 / (1 + tf.exp(-tf.matmul(x_data_standard, W) + b))
- loss = tf.reduce_mean(- y_data.reshape(-1, 1) * tf.log(y) - (1 - y_data.reshape(-1, 1)) * tf.log(1 - y))
- optimizer = tf.train.GradientDescentOptimizer(1.3)
- train = optimizer.minimize(loss)
- init = tf.initialize_all_variables()
- sess = tf.Session()
- sess.run(init)
- for step in range(100):
- sess.run(train)
- if step % 10 == 0:
- print step, sess.run(W).flatten(), sess.run(b).flatten()
- print "Coefficients of tensorflow (input should be standardized): K=%s, b=%s" % (sess.run(W).flatten(), sess.run(b).flatten())
- print "Coefficients of tensorflow (raw input): K=%s, b=%s" % (sess.run(W).flatten() / scaler.scale_, sess.run(b).flatten() - np.dot(scaler.mean_ / scaler.scale_, sess.run(W)))
- # Problem solved and we are happy. But...
- # I'd like to implement the logistic regression from a multi-class viewpoint instead of binary.
- # In machine learning domain, it is called softmax regression
- # In economic and statistics domain, it is called multinomial logit (MNL) model, proposed by Daniel McFadden, who shared the 2000 Nobel Memorial Prize in Economic Sciences.
- print "------------------------------------------------"
- print "We solve this binary classification problem again from the viewpoint of multinomial classification"
- print "------------------------------------------------"
- # As a tradition, sklearn first
- reg = LogisticRegression(C=9999999999, solver="newton-cg", multi_class="multinomial")
- reg.fit(x_data, y_data)
- print "Coefficients of sklearn: K=%s, b=%f" % (reg.coef_, reg.intercept_)
- print "A little bit difference at first glance. What about multiply them with 2?"
- # Then try tensorflow
- W = tf.Variable(tf.zeros([2, 2])) # first 2 is feature number, second 2 is class number
- b = tf.Variable(tf.zeros([1, 2]))
- V = tf.matmul(x_data_standard, W) + b
- y = tf.nn.softmax(V) # tensorflow provide a utility function to calculate the probability of observer n choose alternative i, you can replace it with `y = tf.exp(V) / tf.reduce_sum(tf.exp(V), keep_dims=True, reduction_indices=[1])`
- # Encode the y label in one-hot manner
- lb = preprocessing.LabelBinarizer()
- lb.fit(y_data)
- y_data_trans = lb.transform(y_data)
- y_data_trans = np.concatenate((1 - y_data_trans, y_data_trans), axis=1) # Only necessary for binary class
- loss = tf.reduce_mean(-tf.reduce_sum(y_data_trans * tf.log(y), reduction_indices=[1]))
- optimizer = tf.train.GradientDescentOptimizer(1.3)
- train = optimizer.minimize(loss)
- init = tf.initialize_all_variables()
- sess = tf.Session()
- sess.run(init)
- for step in range(100):
- sess.run(train)
- if step % 10 == 0:
- print step, sess.run(W).flatten(), sess.run(b).flatten()
- print "Coefficients of tensorflow (input should be standardized): K=%s, b=%s" % (sess.run(W).flatten(), sess.run(b).flatten())
- print "Coefficients of tensorflow (raw input): K=%s, b=%s" % ((sess.run(W) / scaler.scale_).flatten(), sess.run(b).flatten() - np.dot(scaler.mean_ / scaler.scale_, sess.run(W)))
复制代码 输出如下:
- Coefficients of sklearn: K=[[ 0.14834077 0.15890845]], b=-16.378743
- 0 [ 0.33699557 0.34786162] [ -4.84287721e-09]
- 10 [ 1.15830743 1.22841871] [ 0.02142336]
- 20 [ 1.3378191 1.42655993] [ 0.03946959]
- 30 [ 1.40735555 1.50197577] [ 0.04853692]
- 40 [ 1.43754184 1.53418231] [ 0.05283691]
- 50 [ 1.45117068 1.54856908] [ 0.05484771]
- 60 [ 1.45742035 1.55512536] [ 0.05578374]
- 70 [ 1.46030474 1.55814099] [ 0.05621871]
- 80 [ 1.46163988 1.55953443] [ 0.05642065]
- 90 [ 1.46225858 1.56017959] [ 0.0565144]
- Coefficients of tensorflow (input should be standardized): K=[ 1.46252561 1.56045783], b=[ 0.05655487]
- Coefficients of tensorflow (raw input): K=[ 0.14831361 0.15888004], b=[-16.26265144]
- ------------------------------------------------
- We solve this binary classification problem again from the viewpoint of multinomial classification
- ------------------------------------------------
- Coefficients of sklearn: K=[[ 0.07417039 0.07945423]], b=-8.189372
- A little bit difference at first glance. What about multiply them with 2?
- 0 [-0.33699557 0.33699557 -0.34786162 0.34786162] [ 6.05359674e-09 -6.05359674e-09]
- 10 [-0.68416572 0.68416572 -0.72988117 0.72988123] [ 0.02157043 -0.02157041]
- 20 [-0.72234094 0.72234106 -0.77087188 0.77087194] [ 0.02693938 -0.02693932]
- 30 [-0.72958517 0.72958535 -0.7784785 0.77847856] [ 0.02802362 -0.02802352]
- 40 [-0.73103166 0.73103184 -0.77998811 0.77998811] [ 0.02824244 -0.02824241]
- 50 [-0.73132294 0.73132324 -0.78029168 0.78029174] [ 0.02828659 -0.02828649]
- 60 [-0.73138171 0.73138207 -0.78035289 0.78035301] [ 0.02829553 -0.02829544]
- 70 [-0.73139352 0.73139393 -0.78036523 0.78036535] [ 0.02829732 -0.0282972 ]
- 80 [-0.73139596 0.73139632 -0.78036767 0.78036791] [ 0.02829764 -0.02829755]
- 90 [-0.73139644 0.73139679 -0.78036815 0.78036839] [ 0.02829781 -0.02829765]
- Coefficients of tensorflow (input should be standardized): K=[-0.7313965 0.73139679 -0.78036827 0.78036839], b=[ 0.02829777 -0.02829769]
- Coefficients of tensorflow (raw input): K=[-0.07417037 0.07446811 -0.07913655 0.07945422], b=[ 8.1893692 -8.18937111]
复制代码
|
|