【TensorFlow学习小组】Week 2动手任务list

实践作业

#1

####2017.11.9

####动手任务:

1、使用tf实现Logistic Regression算法(必做) 截止日期:11.18 2、使用【1任务】实现的算法,完成 “Kaggle泰坦尼克之灾” (链接https://www.kaggle.com/c/titanic)(选做) 截止日期:11.25

备注:

代码结果在后面帖子回复


#2

#3
import scipy.io as scio
import matplotlib.pyplot as plt
import  numpy as np

file_x = scio.loadmat('ex4x.mat');                 #导入的数据为dict数据类型,前一部分为数据信息说明,后一部分数据是以
array方式存放
file_y = scio.loadmat('ex4y.mat');
data_x = file_x['x']                               #因为数据存放的关键字为'x',提取dict中的array元素另存为data_x
data_y = file_y['y']
data_x = np.mat(data_x)                            #因为后面有矩阵的运算,所有将array转为矩阵数据类型
in_1 = np.ones((80,1),dtype=np.float64)
in_1 = np.mat(in_1)
data_x=np.column_stack((in_1,data_x))             #给矩阵增加列  np.row_stack:给矩阵增加行
data_y = np.mat(data_y)
#获取矩阵的维度,行数和列数
data_x_dim = data_x.shape

m = data_x_dim[0]
n = data_x_dim[1]

plot_x = data_x[:,1]                            #获取data_x第二列数据
plot_y = data_x[:,2]                            #获取data_x第三列数据
plot_x = np.array(np.transpose(plot_x))         #画图时,参变量只能是一维的
plot_y = np.array(np.transpose(plot_y))
plot_x1 = plot_x[:,1:40]
plot_y1 = plot_y[:,1:40]

plot_x2 = plot_x[:,41:80]
plot_y2 = plot_y[:,41:80]



#以上为数据处理和画图部分,下面进行逻辑回归及牛顿迭代
w =np.mat(np.zeros((n,1),dtype=np.float64))
z = data_x*w
m_1_ones = np.mat(np.ones((m,1)))
h = m_1_ones /(1+np.exp(-z))

max_iteration=20
thred = 0.0001
count=1
cost_now=0
while 1:
    count = count + 1
    z = data_x*w
    m_1_ones = np.mat(np.ones((m,1)))
    h = m_1_ones /(1+np.exp(-z))
    cont_pre=cost_now
    error = np.multiply(data_y , np.log(h)) + np.multiply((1-data_y) ,(np.log(np.mat(np.ones((m,1))-h))))
    cost_now=error.sum(axis=0)                                   #当前代价函数值
    if(abs(cost_now-cont_pre)<=thred) or (count>max_iteration):
        break
    iteration_error = h-data_y
    G = (np.transpose(data_x)*iteration_error)/m

    a1= np.matrix.tolist(np.transpose(h))
    b1 = tuple(a1[0])
    c1 = np.diag(b1)

    a2= np.matrix.tolist(np.transpose((1-h)))
    b2 = tuple(a2[0])
    c2= np.diag(b2)
    r = c1*c2
    H = (np.transpose(data_x)*r*data_x)/m
    w = w -H.I*G

fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(plot_x1,plot_y1,marker='+',color='r')
ax.scatter(plot_x2,plot_y2,marker='o',color='g')
x_ax = plot_x
y_ax = (-w[0]-w[1]*plot_x)/w[2]
ax.plot(x_ax,y_ax,marker='o',color='b')
plt.ion()
plt.show()

图中绿色和红色为两类原始数据,蓝色直线为训练得出的模型线,因本人边学python边调程序,程序里面可能有很多重复代码,请多见谅。 另外附上原始数据地址 http://openclassroom.stanford.edu/MainFolder/DocumentPage.php?course=DeepLearning&doc=exercises/ex4/ex4.html


#4
# -*- coding: utf-8 -*-
import tensorflow as tf

# 导入input_data用于自动下载
from tensorflow.examples.tutorials.mnist import input_data
# 安装MNIST数据集
mnist = input_data.read_data_sets("MNIST_Data/data/", one_hot=True)

# 一些参数
learning_rate = 0.01
training_epochs = 25
batch_size = 100
display_step = 1

# 放置占位符,用于在计算时接收输入值

# mnist图像数据 28*28=784
x = tf.placeholder(tf.float32, [None, 784])

# 图像类别,总共10类
y = tf.placeholder(tf.float32, [None, 10])

#创建两个变量,分别用来存放权重值W和偏置值b
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

# 使用Tensorflow提供的回归模型softmax,y代表输出
pred = tf.nn.softmax(tf.matmul(x, W) + b)

# reduce_sum 对所有类别求和,reduce_mean 对和取平均
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))

# 往graph中添加新的操作,计算梯度,计算参数的更新
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# 初始化之前创建的变量的操作
init = tf.initialize_all_variables()

# 启动初始化
with tf.Session() as sess:
    sess.run(init)
    # 开始训练模型
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        # 每一轮迭代total_batches
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            # 使用batch data训练数据
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_xs,
                                                          y: batch_ys})
            # 将每个batch的损失相加求平均
            avg_cost += c / total_batch
        # 每一轮打印损失
        if (epoch+1) % display_step == 0:
            print "Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost)

    print "Optimization Finished!"

    # 评估模型,tf.argmax能给出某个tensor对象在某一维上数据最大值的索引。因为标签是由0,1组成了one-hot vector,返回的索引就是数值为1的位置
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))

    # 计算正确预测项的比例,因为tf.equal返回的是布尔值,使用tf.cast可以把布尔值转换成浮点数,tf.reduce_mean是求平均值
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # 在session中启动accuracy,输入是MNIST中的测试集
    print "Accuracy:", accuracy.eval({x: mnist.test.images[:3000], y: mnist.test.labels[:3000]})


#5

:+1:,非常精彩。期待更多总结~


#6

哈哈 :grin: 过奖啦 我只是总结了前辈们的记录 希望能和大家一起学习:blush:


#7

###参考了《面向机器智能的TensorFlow实践》一书,用tf实现logistic Regression算法,并用Kaggle泰坦尼克数据做测试 import tensorflow as tf

# 从csv文件读取数据
def read_csv(batch_size, file_name, record_defaults):
    filename_queue = \
        tf.train.string_input_producer([file_name])

    reader = tf.TextLineReader(skip_header_lines=1)
    key, value = reader.read(filename_queue)

    decoded = tf.decode_csv(value, record_defaults=record_defaults)

    return tf.train.shuffle_batch(decoded,
                                  batch_size=batch_size,
                                  capacity=batch_size * 50,
                                  min_after_dequeue=batch_size)

# 参数初始化
w = tf.Variable(tf.zeros([5, 1]), name="weights")
b = tf.Variable(0., name="bias")

# 值合并,也就是计算推断函数
def combine_inputs(X):
    return tf.matmul(X, w) + b


# 计算模型在X上的输出
def inference(X):
    return tf.sigmoid(combine_inputs(X))


# 计算损失
def loss(X, Y):
    return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=combine_inputs(X), labels=Y))


# 读取或生成训练数据X,及期望输出Y
def inputs():
    record_defaults = [[0.], [0.], [0.], [""], [""], [0.], [0.], [0.], [""], [0.], [""], [""]]

    passenger_id, survived, pclass, name, sex, age, \
    sibsp, parch, ticket, fare, cabin, embarked = \
        read_csv(891, "E:/机器学习/泰坦尼克/train.csv", record_defaults)

    # 转换属性数据
    is_first_class = tf.to_float(tf.equal(pclass, [1]))
    is_second_class = tf.to_float(tf.equal(pclass, [2]))
    is_third_class = tf.to_float(tf.equal(pclass, [3]))

    gender = tf.to_float(tf.equal(sex, ["female"]))

    features = tf.transpose(tf.stack([is_first_class, is_second_class, is_third_class, gender, age]))
    survived = tf.reshape(survived, [891, 1])

    return features, survived


# 依据计算的总损失 训练、调整模型参数
def train(total_loss):
    learning_rate = 0.001
    return tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)


# 对训练得到的模型进行评估
def evaluate(sess, X, Y):
    predicted = tf.cast(inference(X) > 0.5, tf.float32)
    print('准确率:', sess.run(tf.reduce_mean(tf.cast(tf.equal(predicted, Y), tf.float32))))


# 在一个会话中启动数据流图
with tf.Session() as sess:
    tf.global_variables_initializer().run()

    X, Y = inputs()

    total_loss = loss(X, Y)
    train_op = train(total_loss)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    #训练迭代次数
    training_steps = 1000
    try:

        for step in range(training_steps):
            if coord.should_stop():
                break
            sess.run([train_op])
            #查看损失函数的值
            if step % 10 == 0:
                print("loss: ", sess.run([total_loss]))
    except Exception as e:
        coord.request_stop(e)

    evaluate(sess, X, Y)

    coord.request_stop()
    coord.join(threads)
    sess.close()

###只选取了船票等级、性别、年龄等属性,预测准确率在0.8左右


#8

上周一直在改论文,没有按时交作业,还是要补起,学习要继续,gogogo

代码:

运行过程:

损失值变化图:

遇到的问题: 1.在虚拟机中进行的实验,下载数据集的时候老是出现连不上网络的情况,于是去网上自己下载了数据集进行的实验。 2.虽然听了理论,但是在编码时还是会忘记,比如损失函数的求法,于是去一些博客上查看了,这里贴出我查看的博客,关于线性回归和逻辑回归的损失函数定义与区别,讲得很好也很清楚。 http://blog.csdn.net/wjlucc/article/details/71095206 3.在训练的过程出现损失值优化很慢的情况,调整了学习速率后效果变好了。 4.实现参考了别人的实现过程,然后根据理论自己摸索,还是出现了很多细节的问题,比如tf.Variable写成了tf.variable。

收获: 经过实践,对tf的运用变得更加熟悉。


#9
import numpy as np
import pandas as pd
import tensorflow as tf

DATA_DIR = './kaggle_titanic/'

 # 从csv文件中读取数据
data = pd.read_csv(DATA_DIR + 'train.csv')

 # 数据预处理
 # 1. 性别转换为one-hot
 # 2. 年龄补零
 # 3. 年龄归一化?
data['Sex'] = data['Sex'].apply(lambda x: 1 if x == 'male' else 0)
data = data.fillna(0)

 # 整理特征集合
data_X = data[['Pclass', 'Sex', 'Age']]
dataset_X = data_X.as_matrix()

 # 整理标签集合
data['Deceased'] = data['Survived'].apply(lambda s: 1 - s) 
data_Y = data[['Deceased', 'Survived']]
dataset_Y = data_Y.as_matrix()

 # 将训练数据切分为“训练数据集和验证数据集”
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    dataset_X, dataset_Y, test_size = 0.2, random_state = 42)

 # 声明输入数据占位符
X = tf.placeholder(tf.float32, shape=[None, 3], name='input')
y = tf.placeholder(tf.float32, shape=[None, 2], name='label')

 # 声明参数变量
W = tf.Variable(tf.random_normal([3, 2]), name='weights')
b = tf.Variable(tf.zeros([2]), name='bias')

 # 构造前向传播计算图--逻辑回归
y_pred = tf.nn.softmax(tf.matmul(X, W) + b)

 # 声明代价函数--交叉熵
cross_entropy = - tf.reduce_sum(y * tf.log(y_pred + 1e-10), reduction_indices=1)
 # 批量样本的代价值为所有样本交叉熵的平均值
cost = tf.reduce_mean(cross_entropy)

 # 使用随机梯度下降算法优化器来最小化代价,系统自动构建反向传播部分的计算图
 # 学习率(learning rate)设定为0.001
train_op = tf.train.GradientDescentOptimizer(0.001).minimize(cost)

 # 构建训练迭代过程,以及进行预测
with tf.Session() as sess:
    # 初始化所有变量,必须最先执行
    tf.global_variables_initializer().run()
    # 训练迭代,100轮
    for epoch in range(100):
        total_loss = 0.
        for i in range(len(X_train)):
            feed = {X: [X_train[i]], y: [y_train[i]]}
            # 通过Session.run接口触发执行
            _, loss = sess.run([train_op, cost], feed_dict=feed)
            total_loss += loss
        if (epoch % 10 == 0):
            print('Epoch: %04d, total loss=%.9f' % (epoch + 1, total_loss))
    print('Training complete!')
    # 用验证集评估模型的准确率
    pred_val = sess.run(y_pred, feed_dict={X: X_val})
    correct = np.equal(np.argmax(pred_val, 1), np.argmax(y_val, 1))
    accuracy = np.mean(correct.astype(np.float32))
    print('accuracy on validation set:%.9f' % accuracy)
    
    # ------
    
    # 读入测试数据集
    testdata = pd.read_csv(DATA_DIR + 'test.csv')
    
    # 数据清洗, 数据预处理
    testdata = testdata.fillna(0)
    testdata['Sex'] = testdata['Sex'].apply(lambda x: 1 if x == 'male' else 0)
    
    # 特征选择
    # X_test = testdata[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]
    X_test = testdata[['Pclass', 'Sex', 'Age']]
    
    # 评估模型
    pred_test = np.argmax(sess.run(y_pred, feed_dict={X: X_test}), 1)
    
    # 保存预测结果
    submission = pd.DataFrame({
        'PassengerId': testdata['PassengerId'],
        'Survived': pred_test
    })
    submission.to_csv(DATA_DIR + 'titanic-submission.csv', index=False)

Epoch: 0001, total loss=1338.798869372 Epoch: 0011, total loss=472.102258002 Epoch: 0021, total loss=448.355031409 Epoch: 0031, total loss=433.984386031 Epoch: 0041, total loss=424.142653639 Epoch: 0051, total loss=416.959412128 Epoch: 0061, total loss=411.572202141 Epoch: 0071, total loss=407.487125621 Epoch: 0081, total loss=404.373041582 Epoch: 0091, total loss=401.991867148 Training complete! accuracy on validation set:0.815642476


#10

data来自于Andrew Ng的公开课DeepLearning ex4Data.zip

将数据集分成两部分,数据量较少,会影响效果,因此只是写了个过程,test_size设置的0.0,也就是说全部用来训练。一般是将数据分成训练集,验证集,测试集。在训练过程中,训练集和验证集的精度会相差不多,测试集的精度可能会有差异。单纯的二分类,代码简单的可以直接写logistics公式,而不用tf.nn中的函数。

import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import tensorflow as tf

# load data
# the data from Andrew Ng's deep learning course
x_data = np.loadtxt("ex4x.dat", dtype=np.float32)
y_data = np.loadtxt("ex4y.dat", dtype=np.float32)

# pre-process data
# Scalar the x-data and one-hot y-data
scalar = preprocessing.StandardScaler().fit(x_data)
X1 = scalar.transform(x_data)

lb = preprocessing.LabelBinarizer()
lb.fit(y_data)
Y1 = np.concatenate((1 - lb.transform(y_data), lb.transform(y_data)), axis=1)

# split the data into train set and test set
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(Y1)))
x_shuffled = X1[shuffle_indices]
y_shuffled = Y1[shuffle_indices]
x_train, x_test, y_train, y_test = train_test_split(x_shuffled, y_shuffled, test_size=0.0)

# define the tf graph
x = tf.placeholder(tf.float32, [None, 2])
y = tf.placeholder(tf.float32, [None, 2])

W = tf.Variable(tf.zeros([2, 2]))
b = tf.Variable(tf.zeros([1, 2]))

forw = tf.nn.softmax(tf.matmul(x, W) + b)
loss = tf.reduce_mean(-tf.reduce_sum(y * tf.log(forw), reduction_indices=[1]))
opti = tf.train.GradientDescentOptimizer(learning_rate=1.3)
train = opti.minimize(loss)

# predict on the test
correct_prediction = tf.equal(tf.argmax(forw, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# train and test
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
with sess.as_default():
    for i in range(100):  # train 100 epoch
        _, c = sess.run([train, loss], feed_dict={x: x_train, y: y_train})
        if i % 10 == 0:
            print(i, sess.run(W).flatten(), sess.run(b).flatten(),c)
            print("the correct acc is: ", accuracy.eval(feed_dict={x: x_train, y: y_train}))

    print("the result is: ", sess.run(W).flatten(), sess.run(b).flatten())
    print("the acc on train is :", accuracy.eval(feed_dict={x: x_train, y: y_train}))
    print("the acc on test is: ", accuracy.eval(feed_dict={x: x_test, y: y_test}))

#11

逻辑回归 PS:这两周忙着复习期末考,所以比较匆忙(程序来源:香港三天速成课件)

```x_data = [[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]]
```y_data = [[0], [0], [0], [1], [1], [1]]

```# placeholders for a tensor that will be always fed.
```X = tf.placeholder(tf.float32, shape=[None, 2])
```Y = tf.placeholder(tf.float32, shape=[None, 1])
```W = tf.Variable(tf.random_normal([2, 1]), name='weight')
```b = tf.Variable(tf.random_normal([1]), name='bias')

```# Hypothesis using sigmoid: tf.div(1., 1. + tf.exp(tf.matmul(X, W) + b))
```hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
```# cost/loss function
```cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *tf.log(1 - hypothesis))
```train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

```# Accuracy computation
```# True if hypothesis>0.5 else False
```predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
```accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

```# Launch graph
```with tf.Session() as sess:
```   # Initialize TensorFlow variables
```   sess.run(tf.global_variables_initializer())

```   for step in range(10001):
```       cost_val, _ = sess.run([cost, train], feed_dict={X: x_data, Y: y_data})
```       if step % 200 == 0:
```           print(step, cost_val)

```   # Accuracy report
```   h, c, a = sess.run([hypothesis, predicted, accuracy],
```                      feed_dict={X: x_data, Y: y_data})
```   print("\nHypothesis: ", h, "\nCorrect (Y): ", c, "\nAccuracy: ", a)

#12

泰坦尼克号,从kaggle下载数据,程序来源:《深度学习原理》,运行成功,后期还需改进,增加代码

```import numpy as np
```import pandas as pd
```from sklearn.cross_validation import train_test_split

```#读取数据
```data=pd.read_csv('train.csv')
```print(data)

```#数据预处理
```data.info()     #通过DataFrame.info()方法查看数据的概况:标签名称,条目数,数值类型
```data['Sex']=data['Sex'].apply(lambda s:1 if s=='male' else 0) #将"male"替换为1
```data=data.fillna(0) #将所有缺失的字段填充为0
```dataset_X=data[['Sex','Age','Pclass','SibSp','Parch','Fare']] #挑出这几个我们认为最相关的因素
```dataset_X=dataset_X.as_matrix() #转为矩阵 ,      scipy问题

```#独热编码ont-hot编码
```#data['Deceased'] = data['Survived'].apply(lambda s: 1 - s)
```data['Deceased'] = data['Survived'].apply(lambda s: int(not s))
``` #新增‘Deceased’字段表示第二种分类的标签
```dataset_Y=data[['Deceased','Survived']]
```dataset_Y=dataset_Y.as_matrix()

```#为了防止训练过拟合,我们将仅有的标记数据分成训练数据集和验证数据集
```#用train_test_split()随机打乱数据后按比例拆分数据集
```X_train,X_test,Y_train,Y_test=train_test_split(dataset_X,dataset_Y,test_size=0.2,random_state=42)

```X=tf.placeholder(tf.float32,shape=[None,6])
```Y=tf.placeholder(tf.float32,shape=[None,2])

```W=tf.Variable(tf.random_normal([6,2]),name='weights')
```b=tf.Variable(tf.zeros([2]),name='bias')   #偏置的初始值设为0

```#前向传播
```y_pred=tf.nn.softmax(tf.matmul(X,W)+b)

```#声明代价函数
```cross_entropy=-tf.reduce_sum(Y*tf.log(y_pred+1e-10),reduction_indices=1)
```cost=tf.reduce_mean(cross_entropy)

```#加入优化算法
```train_op=tf.train.GradientDescentOptimizer(0.001).minimize(cost)

```#执行图
```with tf.Session() as sess:
```    sess.run(tf.global_variables_initializer())
```    for epoch in range(10):
```        total_loss=0
```        for i in range(len(X_train)):
```            feed={X:[X_train[i]],Y:[Y_train[i]]}
```            _,loss=sess.run([train_op,cost],feed_dict=feed)
```            total_loss+=loss
```        print('Epoch:%04d,total loss=%.9f'%(epoch+1,total_loss))
```    print('Training complete')
```    pred=sess.run(y_pred,feed_dict={X:X_test})
```    correct=np.equal(np.argmax(pred,1),np.argmax(Y_test,1))
```    accuracy=np.mean(correct.astype(np.float32))
```    print("Accuracy on validation set:%.9f"%accuracy)

#13

Logistic Regression,

参考吴恩达DeepLearning, week2中Logistic Regression编程作业 数据集下载链接为

http://openclassroom.stanford.edu/MainFolder/courses/MachineLearning/exercises/ex4materials/ex4Data.zip

# 生成训练集与测试集
def load_dataset():
    x_data = np.loadtxt("ex4x.dat", dtype=np.float32)
    y_data = np.loadtxt("ex4y.dat", dtype=np.uint8).T
    scalar = preprocessing.StandardScaler().fit(x_data)
    X1 = scalar.transform(x_data)
    x_train, x_test, y_train, y_test = shuffle_dataset(X1, y_data)
    x_train_flatten = x_train.reshape(x_train.shape[0], -1).T
    x_test_flatten = x_test.reshape(x_test.shape[0], -1).T
    y_train = y_train.reshape(1, -1)
    y_test = y_test.reshape(1, -1)
    return x_train_flatten, y_train, x_test_flatten, y_test

# sigmoid函数
def sigmoid(z):
    s = 1/(1+np.exp(-z))
    return s
# 初始化权值
def initialize_with_zeros(dim):
    w = np.random.rand(dim).reshape(dim, 1)
    b = 0
    return w, b
# 传播
def propagate(w, b, X, Y):
    m = X.shape[1]
    z = np.dot(w.T, X) + b
    A = sigmoid(z)
    cost = -(1.0/m)*np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))
    dw = 1/m * np.dot(X, (A-Y).T)
    db = 1/m * np.sum((A-Y), axis=1, keepdims=True)
    cost = np.squeeze(cost)
    grads = {
        "dw": dw,
        "db": db
    }
    return grads, cost
# cost最小化
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost=False):
    costs = []
    for i in range(num_iterations):
        grads, cost = propagate(w, b, X, Y)
        dw = grads["dw"]
        db = grads["db"]

        w = w - learning_rate * dw
        b = b - learning_rate * db

        if i % 100 == 0:
            costs.append(cost)

        if print_cost and i % 100 == 0:
            print("Cost after iteration %i: %f" % (i, cost))
        params = {
            "w": w,
            "b": b
        }

        grads = {
            "dw": dw,
            "db": db
        }

    return params, grads, costs
# 预测结果
def predict(w, b, X):
    m = X.shape[1]
    Y_prediction = np.zeros((1, m))
    w = w.reshape(X.shape[0], 1)
    A = sigmoid(np.dot(w.T, X) + b)
    for i in range(A.shape[1]):
        if A[0, i] > 0.5:
            Y_prediction[0, i] = 1
        else:
            Y_prediction[0, i] = 0
    return Y_prediction
# 模型
def model(X_train, Y_train, X_test, Y_test, num_iterations=2000, learning_rate=0.5, print_cost=False):
    w, b = initialize_with_zeros(X_train.shape[0])
    parameters, grades, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost=True)
    w = parameters["w"]
    b = parameters["b"]
    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)
    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))

    d = {"costs": costs,
         "Y_prediction_test": Y_prediction_test,
         "Y_prediction_train": Y_prediction_train,
         "w": w,
         "b": b,
         "learning_rate": learning_rate,
         "num_iterations": num_iterations}
    return d

if __name__ == "__main__":
    x_train, y_train, x_test, y_test = load_dataset()
    d = model(x_train, y_train, x_test, y_test, num_iterations = 1000, learning_rate = 0.1, print_cost = True)

输出结果

Cost after iteration 0: 0.601037
Cost after iteration 100: 0.444401
Cost after iteration 200: 0.432832
Cost after iteration 300: 0.430681
Cost after iteration 400: 0.430151
Cost after iteration 500: 0.430002
Cost after iteration 600: 0.429957
Cost after iteration 700: 0.429942
Cost after iteration 800: 0.429937
Cost after iteration 900: 0.429936
train accuracy: 77.5 %
test accuracy: 80.0 %

坐标点显示代码

import numpy as np
import matplotlib.pyplot as plt

# load data
# the data from Andrew Ng's deep learning course
x_data = np.loadtxt("ex4x.dat", dtype=np.float32)
y_data = np.loadtxt("ex4y.dat", dtype=np.uint8)
# pre-process data
# Scalar the x-data and one-hot y-data
# scalar = preprocessing.StandardScaler().fit(x_data)
# X1 = scalar.transform(x_data)

fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.set_title('Scatter Plot')
plt.xlabel('X')
plt.ylabel('Y')
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data, s=40, cmap=plt.cm.Spectral);
plt.legend('x1')
plt.show()


#14

##Kaggle泰坦尼克之灾

###1.数据详细情况浏览

各个属性的情况统计 各个属性的统计值

###2.各个属性与结果的关系

代码:

import pandas as pd
import matplotlib.pyplot as plt

data_train=pd.read_csv('train.csv')

#图像参数的设置
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
fig = plt.figure()
fig.set(alpha=0.2)# 设定图表颜色alpha参数

#获救和未获救的人数分布
#data_train.Survived.value_counts().plot(kind='bar')# 柱状图 
#plt.title(u"获救情况 (1为获救)") # 标题
#plt.ylabel(u"人数")  

#乘客等级人数分布
#data_train.Pclass.value_counts().plot(kind="bar")
#plt.ylabel(u"人数")
#plt.title(u"乘客等级分布")

#获救者的年龄分布
#plt.scatter(data_train.Survived, data_train.Age)
#plt.ylabel(u"年龄")                         # 设定纵坐标名称
#plt.grid(b=True, which='major', axis='y') 
#plt.title(u"按年龄看获救分布 (1为获救)")

#各客舱等级的年龄分布
#data_train.Age[data_train.Pclass == 1].plot(kind='kde')   
#data_train.Age[data_train.Pclass == 2].plot(kind='kde')
#data_train.Age[data_train.Pclass == 3].plot(kind='kde')
#plt.xlabel(u"年龄")# plots an axis lable
#plt.ylabel(u"密度") 
#plt.title(u"各等级的乘客年龄分布")
#plt.legend((u'头等舱', u'2等舱',u'3等舱'),loc='best') # sets our legend for our graph.

#各登船口岸上船人数
#data_train.Embarked.value_counts().plot(kind='bar')
#plt.title(u"各登船口岸上船人数")
#plt.ylabel(u"人数")  

#各乘客等级的获救情况
#Survived_0 = data_train.Pclass[data_train.Survived == 0].value_counts()
#Survived_1 = data_train.Pclass[data_train.Survived == 1].value_counts()
#df=pd.DataFrame({u'获救':Survived_1, u'未获救':Survived_0})
#df.plot(kind='bar', stacked=True)
#plt.title(u"各乘客等级的获救情况")
#plt.xlabel(u"乘客等级") 
#plt.ylabel(u"人数")

#按性别看获救情况
#Survived_m = data_train.Survived[data_train.Sex == 'male'].value_counts()
#Survived_f = data_train.Survived[data_train.Sex == 'female'].value_counts()
#df=pd.DataFrame({u'男性':Survived_m, u'女性':Survived_f})
#df.plot(kind='bar', stacked=True)
#plt.title(u"按性别看获救情况")
#plt.xlabel(u"性别") 
#plt.ylabel(u"人数")

#根据乘客等级和性别的获救情况
#plt.title(u"根据舱等级和性别的获救情况")
#
#ax1=fig.add_subplot(141)
#data_train.Survived[data_train.Sex == 'female'][data_train.Pclass != 3].value_counts().plot(kind='bar', label="female highclass", color='#FA2479')
#ax1.set_xticklabels([u"获救", u"未获救"], rotation=0)
#ax1.legend([u"女性/高级舱"], loc='best')
#
#ax2=fig.add_subplot(142, sharey=ax1)
#data_train.Survived[data_train.Sex == 'female'][data_train.Pclass == 3].value_counts().plot(kind='bar', label='female, low class', color='pink')
#ax2.set_xticklabels([u"未获救", u"获救"], rotation=0)
#plt.legend([u"女性/低级舱"], loc='best')
#
#ax3=fig.add_subplot(143, sharey=ax1)
#data_train.Survived[data_train.Sex == 'male'][data_train.Pclass != 3].value_counts().plot(kind='bar', label='male, high class',color='lightblue')
#ax3.set_xticklabels([u"未获救", u"获救"], rotation=0)
#plt.legend([u"男性/高级舱"], loc='best')
#
#ax4=fig.add_subplot(144, sharey=ax1)
#data_train.Survived[data_train.Sex == 'male'][data_train.Pclass == 3].value_counts().plot(kind='bar', label='male low class', color='steelblue')
#ax4.set_xticklabels([u"未获救", u"获救"], rotation=0)
#plt.legend([u"男性/低级舱"], loc='best')

#各登录港口乘客的获救情况
#Survived_0 = data_train.Embarked[data_train.Survived == 0].value_counts()
#Survived_1 = data_train.Embarked[data_train.Survived == 1].value_counts()
#df=pd.DataFrame({u'获救':Survived_1, u'未获救':Survived_0})
#df.plot(kind='bar', stacked=True)
#plt.title(u"各登录港口乘客的获救情况")
#plt.xlabel(u"登录港口") 
#plt.ylabel(u"人数")

#按客舱有无看获救情况
#Survived_cabin = data_train.Survived[pd.notnull(data_train.Cabin)].value_counts()
#Survived_nocabin = data_train.Survived[pd.isnull(data_train.Cabin)].value_counts()
#df=pd.DataFrame({u'有':Survived_cabin, u'无':Survived_nocabin}).transpose()
#df.plot(kind='bar', stacked=True)
#plt.title(u"按客舱有无看获救情况")
#plt.xlabel(u"客舱有无") 
#plt.ylabel(u"人数")

plt.show()

结果: 因为SibSp和Parch两个属性缺失值大,且对结果作用很小,所以舍弃了这两个属性 Ticket 对结果也没有太大影响,也舍弃 剩下的属性进行初步的分类。

###3.数据预处理

def dpp(df):
    #使用均值填补年龄空值
    def set_missing_ages(df):
        df.loc[ (df.Age.isnull()), 'Age' ] = df[df.Age.notnull()].Age.mean()
        return df
 
    #有Cabin设置为1,无Cabin的设置为0
    def set_Cabin_type(df):
        df.loc[ (df.Cabin.notnull()), 'Cabin' ] = 1
        df.loc[ (df.Cabin.isnull()), 'Cabin' ] = 0
        return df
       
    #男生设置为1,女生设置为0
    def set_Sex_type(df):
        df.Sex = df.Sex.replace('male',1)
        df.Sex = df.Sex.replace('female',0)
        return df
       
    #S设置为1,C设置为2,Q设置为3
    def set_Embarked_type(df):
        df.Embarked = df.Embarked.replace('S',1)
        df.Embarked = df.Embarked.replace('C',2)
        df.Embarked = df.Embarked.replace('Q',3)
        return df
       
    #对Age和Fare做归一化处理
    def set_sca(df):
        new_age = (df['Age'] - df['Age'].min())/(df['Age'].max() - df['Age'].min())
        df = df.drop(['Age'],axis=1)
        df['Age'] = new_age
        new_fare = (df['Fare'] - df['Fare'].min())/(df['Fare'].max() - df['Fare'].min())
        df = df.drop(['Fare'],axis=1)
        df['Fare'] = new_fare
        return df
   
    df = set_missing_ages(df)
    df = set_Cabin_type(df)
    df = set_Sex_type(df)
    df = set_Embarked_type(df)
    df.drop(['Name', 'Ticket', 'PassengerId'], axis=1, inplace=True)
    df = set_sca(df)
    return df

首先对缺失的年龄值进行填充,我是使用均值进行的填充 然后对非数值型属性进行数值替换 最后对年龄和票价进行归一化处理

###4.训练模型并对测试集做预测

代码:

import numpy as np
import os
import matplotlib.pyplot as plt
import pprint
import tensorflow as tf
from data_pre_processing import train_data,test_data
CLASS=2
data_y,data_X = train_data()
test_y,test_X = test_data()
feature_dim = len(data_X[0])

X=tf.placeholder(dtype=tf.float32,shape=[None,feature_dim])
Y=tf.placeholder(dtype=tf.float32,shape=[None,CLASS])
#W=tf.Variable(dtype=tf.float32,initial_value=tf.zeros([feature_dim,CLASS]),name='weight')
#B=tf.Variable(dtype=tf.float32,initial_value=tf.zeros([CLASS]),name='bias')
W=tf.Variable(tf.random_normal([feature_dim,CLASS],mean=0.0,stddev=1.0,dtype=tf.float32,seed=1))
B=tf.Variable(tf.random_normal([CLASS],mean=0.0,stddev=1.0,dtype=tf.float32,seed=1))


learning_rate=0.01
iteration=2000
batch_size=50

tf_data_y = tf.one_hot(data_y,CLASS,1,0)
tf_test_y = tf.one_hot(test_y,CLASS,1,0)
y_predict=tf.nn.softmax(tf.matmul(X,W)+B)
def h(y_predict):
    return 1/(1+tf.exp(-y_predict))
y_predict2=h(y_predict)
loss=tf.reduce_mean(-tf.reduce_sum(Y*tf.log(y_predict2)+(1-Y)*tf.log(y_predict2),reduction_indices=[1]))
optimizer=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    loss_list=[]
    data_y=sess.run(tf_data_y)
    test_y=sess.run(tf_test_y)
    for i in range(iteration):
        avg_loss = 0.
        for j in range((len(data_X)/batch_size)+1):
            if j<len(data_X)/batch_size:
                x=data_X[j*batch_size:(j+1)*batch_size]
                y=data_y[j*batch_size:(j+1)*batch_size]
            if j==len(data_X)/batch_size:
                x=data_X[j*batch_size:]
                y=data_y[j*batch_size:]
            _,c=sess.run([optimizer, loss], feed_dict={X: x,Y: y})
            avg_loss+=c/batch_size
        print "iteration:",'%02d'%(i+1),"loss=","{:.9f}".format(avg_loss)
        loss_list.append(avg_loss)    
    correct_prediction=tf.equal(tf.argmax(y_predict, 1),tf.argmax(Y, 1))
    accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
    print "Accuracy:",accuracy.eval({X: test_X,Y:test_y})
    
#plt.figure(1)
#plt.plot(range(len(loss_list)), loss_list, 'b-', label='loss')
#plt.legend(loc = 'upper right')
#plt.show()
  • 遇到的坑:

1)因为缺失值没有填补完,总是出现loss为nan的情况 2)学习率太大,出现无法收敛的情况 3)数据格式不对,老是报错 4)两个属性值,初始化权值和偏置的时候全为0,导致loss一直不变,后来使用随机数初始化,因为没有固定初始值,又遇到每次调参得到的结果都不同的情况,所以使用了seed固定初始值的随机数

  • 结果分析:

坑填完后终于正常开始训练,最开始选择了Age|SibSp|Parch|Fare|Cabin|Embarked|Sex|Pclass这些属性,准确率在0.5~0.7之间,然后删掉了Fare属性,准确率在0.7~0.8之间,然后又去掉了Embarked属性,准确率在0.8以上,得到的最高的准确率是0.830144。 在这个最高准确率的时候,损失值变化如下图所示 开始下降很快,后来下降变慢。

  • 总结:

属性的选择对于准确率有很大的关系,我这里只做了简单的属性关联分析,所以准确率还是只有0.83,想得到更高的准确率,还是要做深层的属性分析还有参数调整。


#15
import tensorflow as tf
import pandas as pd
import numpy as np
from pandas import Series,DataFrame
from sklearn.ensemble import RandomForestRegressor
data_train = pd.read_csv("./Train.csv")
data_train1=data_train.copy()

#补全carbin和age数据
age_df = data_train1[['Age','Fare', 'Parch', 'SibSp', 'Pclass']]
# 乘客分成已知年龄和未知年龄两部分
know_age=age_df[age_df.Age.notnull()].as_matrix()
unknown_age = age_df[age_df.Age.isnull()].as_matrix()
# y即目标年龄
y=know_age[:,0]
# X即特征属性值
X=know_age[:,1:]
# fit到RandomForestRegressor之中
rfr = RandomForestRegressor(random_state=0, n_estimators=2000, n_jobs=-1)
rfr.fit(X,y)
# 用得到的模型进行未知年龄结果预测
predictedAges = rfr.predict(unknown_age[:, 1::])
data_train1.loc[ (data_train1.Age.isnull()), 'Age' ] = predictedAges

data_train1.loc[ (data_train1.Cabin.notnull()), 'Cabin' ] = "Yes"
data_train1.loc[ (data_train1.Cabin.isnull()), 'Cabin' ] = "No"
dummies_Cabin = pd.get_dummies(data_train1['Cabin'], prefix= 'Cabin')
dummies_Embarked = pd.get_dummies(data_train1['Embarked'], prefix= 'Embarked')
dummies_Sex = pd.get_dummies(data_train1['Sex'], prefix= 'Sex')
dummies_Pclass = pd.get_dummies(data_train1['Pclass'], prefix= 'Pclass')
df = pd.concat([data_train1, dummies_Cabin, dummies_Embarked, dummies_Sex, dummies_Pclass], axis=1)
df.drop(['Pclass', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], axis=1, inplace=True)

train_df = df.filter(regex='Survived|Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
train_np = train_df.as_matrix()
label = train_np[:, 0]
label=label.reshape((891,1))
print(label.shape)
input= train_np[:, 1:]
print(input.shape)
#tensorflow 训练数据
y = tf.placeholder(tf.float32, shape=[None, 1], name='label')
x = tf.placeholder(tf.float32, shape=[None, 12], name='input')

w = tf.Variable(tf.random_normal([12, 1]), name='weight')
b = tf.Variable(tf.constant([0.01]), name='bias')

a = tf.matmul(x, w) + b
y_=1/(1+tf.exp(-a))

loss =  tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=a, labels=y))
train_step = tf.train.AdamOptimizer(0.01).minimize(loss)
predicted = tf.cast(y_> 0.5, tf.float32)
accuracy=tf.reduce_mean(tf.cast(tf.equal(predicted,y ), tf.float32))
with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    for i in range(1000):
        start = (i * 8) % 891
        end = min(start + 8, 891)
        sess.run(train_step, feed_dict={x: input, y: label})
        print(sess.run(loss, feed_dict={x: input, y: label}))
    accuracy=  sess.run(accuracy, feed_dict={x: input, y: label})
    print("accuracy is %g"%(accuracy))

结果: accuracy is 0.805836


#16

##week2 动手任务,逻辑回归模型

感谢@LeafScar 提供的数据集下载地址。

数据点的分布如下:

代码如下,

import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt

x_train = pd.read_table('C:\\Users\\Administrator\\Desktop\\ex4Data\\ex4x.dat',header=None,sep='\s+',names=['x','y'])
y_train = pd.read_table('C:\\Users\\Administrator\\Desktop\\ex4Data\\ex4y.dat',header=None,sep='\s+',names=['lable'])

weight = tf.Variable(tf.zeros([2,2],dtype=tf.float64),name='weight')
bias = tf.Variable(tf.zeros([2],dtype=tf.float64),name='bias')
#one_hot编码
y_ = tf.concat([y_train,1-y_train],1)

#lable = tf.cast(y_train,tf.int64)
train = tf.constant(x_train)

#进行one_hot编码
#y_ = tf.one_hot(y_train,2,dtype=tf.float64)

y = tf.nn.softmax(tf.matmul(train,weight)+bias)
cross_entropy = -tf.reduce_sum(y_*tf.log(y))
cross_sum = []
train_step = tf.train.GradientDescentOptimizer(0.000001).minimize(cross_entropy)
with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    for i in range(1000):
        print(i)
        sess.run(train_step)
        cross_sum.append(sess.run(cross_entropy))
    
plt.plot(cross_sum,'b-')  
plt.title('Loss Change')  
plt.xlabel('each train')  
plt.ylabel('cross_tropy')  
plt.show() 

#画出点分布图
plt.figure('point')#定义一个图表
for index in range(len(x_train)):
    row = x_train[index:index+1]
    row2 = y_train[index:index+1]
    if row2.lable.values == 1.0:
        plt.scatter(row.x,row.y,c='r')
    else:
        plt.scatter(row.x,row.y,c='b')
plt.show()

损失值变化如下:

####遇见问题

  • 经过训练后权重weight和偏移量不改变

  • 问题出现在lable标签的维度写错了,应该是二维张量,但是经过一下操作之后变成了三维张量,因此训练不成功。 操作代码如下:

y_train = pd.read_table('C:\\Users\\Administrator\\Desktop\\ex4Data\\ex4y.dat',header=None,sep='\s+',names=['lable'])
lable = tf.cast(y_train,tf.int64)
y_ = tf.one_hot(lable,2,dtype=tf.float64)
# 此时y_为三维张量

#修改后的代码为:
y_ = tf.concat([y_train,1-y_train],1)
  • 将维度修改完之后出现权重weight和偏移量bias为nan的情况,

  • 参考该篇文章的解决办法,将学习率调小就可以了。 tensorflow训练中出现nan问题

####总结 先用tensorflow实现,自己把一些运算实现一次,自己亲自造轮子,熟悉每个细节。


#17

import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np

learning_rate = 0.01 training_epochs = 100 batch_size = 100

class linearregression(object): def init(self): self.w = None self.b = None def train(self,X,y,X_v,y_v): self.w = tf.Variable(tf.zeros([np.shape(X)[1],np.shape(y)[1]])) self.b = tf.Variable(tf.zeros([np.shape(y)[1]]))

    x_data = tf.placeholder(tf.float32, [None,np.shape(X)[1]])
    y_data = tf.placeholder(tf.float32, [None,np.shape(y)[1]])
    
    pred = tf.nn.softmax(tf.matmul(x_data,self.w) + self.b)
    
    cost = tf.reduce_mean(-tf.reduce_sum(y_data*tf.log(pred),reduction_indices = 1))
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    
    correct_prediction = tf.equal(tf.argmax(pred, 1),tf.argmax(y_data, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
    
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for i in range(training_epochs):
            avg_cost = 0
            for j in range(int(np.shape(X)[0]/batch_size)):
                if (j+1)*batch_size < np.shape(X)[0]:
                    _,C = sess.run([optimizer,cost],feed_dict = {x_data:X[j*batch_size:(j+1)*batch_size,:], y_data:y[j*batch_size:(j+1)*batch_size]})
                else:
                    _,C = sess.run([optimizer,cost],feed_dict = {x_data:X[j*batch_size:,:], y_data:y[j*batch_size:]})
                avg_cost += C/(int(np.shape(X)[0]/batch_size))
            print("%d step,cost = "%(i),avg_cost)

        print("Accuracy: ", sess.run([accuracy],feed_dict = {x_data:X,y_data:y}))

def getdata(): mnist = input_data.read_data_sets(".//MNIST_Data//", one_hot=True) print(np.shape(mnist.train.images)) print(np.shape(mnist.test.images)) print(np.shape(mnist.train.labels)) print(np.shape(mnist.test.labels)) return mnist.train.images,mnist.test.images,mnist.train.labels,mnist.test.labels

X_train,X_test,y_train,y_test = getdata() clf = linearregression() clf.train(X_train,y_train,X_test,y_test)


#18
import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data import numpy as np

learning_rate = 0.01 training_epochs = 100 batch_size = 100

class linearregression(object): def init(self): self.w = None self.b = None def train(self,X,y,X_v,y_v): self.w = tf.Variable(tf.zeros([np.shape(X)[1],np.shape(y)[1]])) self.b = tf.Variable(tf.zeros([np.shape(y)[1]]))

    x_data = tf.placeholder(tf.float32, [None,np.shape(X)[1]])
    y_data = tf.placeholder(tf.float32, [None,np.shape(y)[1]])
    
    pred = tf.nn.softmax(tf.matmul(x_data,self.w) + self.b)
    
    cost = tf.reduce_mean(-tf.reduce_sum(y_data*tf.log(pred),reduction_indices = 1))
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    
    correct_prediction = tf.equal(tf.argmax(pred, 1),tf.argmax(y_data, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
    
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for i in range(training_epochs):
            avg_cost = 0
            for j in range(int(np.shape(X)[0]/batch_size)):
                if (j+1)*batch_size < np.shape(X)[0]:
                    _,C = sess.run([optimizer,cost],feed_dict = {x_data:X[j*batch_size:(j+1)*batch_size,:], y_data:y[j*batch_size:(j+1)*batch_size]})
                else:
                    _,C = sess.run([optimizer,cost],feed_dict = {x_data:X[j*batch_size:,:], y_data:y[j*batch_size:]})
                avg_cost += C/(int(np.shape(X)[0]/batch_size))
            print("%d step,cost = "%(i),avg_cost)

        print("Accuracy: ", sess.run([accuracy],feed_dict = {x_data:X,y_data:y}))

def getdata(): mnist = input_data.read_data_sets(".//MNIST_Data//", one_hot=True) print(np.shape(mnist.train.images)) print(np.shape(mnist.test.images)) print(np.shape(mnist.train.labels)) print(np.shape(mnist.test.labels)) return mnist.train.images,mnist.test.images,mnist.train.labels,mnist.test.labels

X_train,X_test,y_train,y_test = getdata() clf = linearregression() clf.train(X_train,y_train,X_test,y_test)


#19

第四周的作业都出来, 吓得一条, 赶紧把第二周的实践作业完成了。:rolling_eyes:

2. 思考问题:

2.1 back propagation 算法原理理解 ?

http://blog.csdn.net/mao_xiao_feng/article/details/53048213

2.2 sigmoid函数、tanh函数和ReLU函数的区别?以及各自的优缺点?对应的tf函数是?

这三个都是非线性激励函数, 为什么引入非线性激励函数?

如果不用激励函数(其实相当于激励函数是f(x) = x),在这种情况下你每一层输出都是上层输入的线性函数,很容易验证,无论你神经网络有多少层,输出都是输入的线性组合,与没有隐藏层效果相当,这种情况就是最原始的感知机(Perceptron)。

正因为上面的原因,决定引入非线性函数作为激励函数,这样深层神经网络就有意义了(不再是输入的线性组合,可以逼近任意函数)。最早的想法是sigmoid函数或者tanh函数,输出有界,很容易充当下一层输入。

激活函数(Activation Function)的特点:

  • 非线性: 当激活函数是线性的时候,一个两层的神经网络就可以逼近基本上所有的函数了。

  • 可微: 当优化方法是基于梯度的时候,这个性质是必须的。

  • 单调性: 当激活函数是单调的时候,单层网络能够保证是凸函数。

  • f(x)≈x: 当激活函数满足这个性质的时候,如果参数的初始化是random的很小的值,那么神经网络的训练将会很高效。

  • 输出值范围: 当激活函数输出值是 有限 的时候,基于梯度的优化方法会更加 稳定,因为特征的表示受有限权值的影响更显著;当激活函数的输出是 无限 的时候,模型的训练会更加高效,不过在这种情况小,一般需要更小的学习率。

  • Sigmod

  • 公式: $f(x)=\frac{1}{1+e^{-z}}$

  • 图像:

  • 优点: 可将任意值变换到01区间, 可做概率; 它还是便于求导的平滑函数,其导数为 $\sigma(x)(1-\sigma(x))$

  • 缺点:

    • 容易出现梯度消失gradient vanishing, 在反向传播算法中, Sigmod函数求导越来越小, 最后会发生梯度消失。ReLU导数是常数, 因此可以解决梯度消失问题。
    • 函数输出并不是零均值的zero-centered, Sigmoid函数的输出值恒大于0,这会导致模型训练的收敛速度变慢。
    • 幂运算相对来讲比较耗时
  • tf函数 tf.sigmoid(x, name=None)

  • Tanh

  • 公式: $\quad tanh(x)=\frac{e^x-e^{-x}}{e^x+e^{-x}} = 2sigmod(2x) -1$

  • 图片:

  • 优点: tanh读作Hyperbolic Tangent,如上图所示,tanh 是0均值的, 它解决了不是零均值的输出问题,。因此,实际应用中,tanh 会比 sigmoid 更好。

  • 缺点: 梯度消失的问题和幂运算的问题仍然存在。

  • tf函数 tf.tanh(x, name=None)

  • ReLU

  • 数学公式: $ReLU=f(x)=max(0, x)$

  • tf函数 tf.nn.relu(features, name=None)

  • 二维数据的时候: 从图左可以看出,输入信号<0时,输出都是0,>0 的情况下,输出等于输入。w 是二维的情况下,使用ReLU之后的效果如下:

  • 优点

    • 解决了gradient vanishing问题 (在正区间)
    • ReLU 得到的SGD的收敛速度会比 sigmoid/tanh 快很多(看右图)。计算速度非常快,只需要判断输入是否大于0
    • 收敛速度远快于sigmoid和tanh
  • 缺点

  • ReLU的输出不是zero-centered

  • Dead ReLU Problem 神经元死亡问题,指的是某些神经元可能永远不会被激活。

  • tf函数 tf.nn.relu(features, name=None)

2.3 softmax和cross_entropy原理解释?

  • softmax softmax用于多分类过程中,它将多个神经元的输出,映射到(0,1)区间内,输出结果(概率)和为1, 可以看成概率来理解,从而来进行多分类!

  • cross_entropy 交叉熵,信息学理论中的概念,用来衡量两个分布之间的相似程度,值越小,越相似。当两者一样时,熵为0。

  • softmax 和 sigmod的区别

    • sigmoid将一个real value映射到(0,1)的区间(当然也可以是(-1,1)),这样可以用来做二分类。

    • softmax把一个k维的real value向量(a1,a2,a3,a4….)映射成一个(b1,b2,b3,b4….)其中bi是一个0-1的常数,然后可以根据bi的大小来进行多分类的任务,如取权重最大的一维。

2.4 tf.placeholder() 、tf.constant()、tf.Variable()的区别?

  • tf.placeholder()

  • 用于得到传递进来的真实的训练样本, 是个占位符。

  • 不必指定初始值,可在运行时,通过 Session.run 的函数的 feed_dict 参数指定

  • tf.constant()

    • tf.constant(value, dtype=None, shape=None, name=‘Const’, verify_shape=False) 作用:创建一个常量tensor 参数: value: 一个dtype类型(如果指定了)的常量值(列表)。要注意的是,要是value是一个列表的话,那么列表的长度不能够超过形状参数指定的大小(如果指定了)。要是列表长度小于指定的,那么多余的由列表的最后一个元素来填充。 dtype: 返回tensor的类型 shape: 返回的tensor形状。 name: tensor的名字 verify_shape: Boolean that enables verification of a shape of values.
  • tf.Variable()

    • 通过构造一个Variable类的实例在图中添加一个变量,主要在于一些可训练变量(trainable variables),比如模型的权重(weights,W)或者偏执值(bias)
    • 构造函数需要初始值,这个初始值可以是一个任何类型任何形状的Tensor,初始值的形状和类型决定了这个变量的形状和类型。

2.5 举例说明:tf.Graph() 概念理解?

一个TensorFlow的运算,被表示为一个数据流的图。一幅图中包含一些操作(Operation)对象,这些对象是计算节点。Tensor对象,则是表示在不同的操作(operation)间的数据节点。

2.6 tf.name_scope()和tf.variable_scope()的理解?

  • tf.name_scope(name, default_name=None, values=None) 主要结合 tf.Variable() 来使用,方便参数命名管理。

  • tf.variable_scope() 主要结合 tf.get_variable() 来使用,实现 变量共享。 先通过tf.variable_scope生成一个上下文管理器,并指明需求的变量在这个上下文管理器中,就可以直接通过tf.get_variable获取已经生成的变量。

我觉得这个相当于指定一个命名空间。

#通过tf.variable_scope函数控制tf.get_variable函数来获取以及创建过的变量  
with tf.variable_scope("ttf"):#ttf的命名空间  
        v=tf.get_variable("v",[1],initializer=tf.constant_initializer(1.0))  #在ttf的命名空间内创建名字为v的变量 

在上下文管理器中已经生成一个v的变量,若想通过tf.get_variable函数获取其变量,则可以通过reuse参数的设定为True来获取, 重用。

with tf.variable_scope("ttf",reuse=True):  
      v1=tf.get_variable("v",[1])  
print(v==v1)   #输出为True  

2.7 tf.variable_scope() 和tf.get_variable()的理解?

  • tf.variable_scope()

  • tf.get_variable() tf.get_variable(name, shape=None, dtype=None, initializer=None, regularizer=None, trainable=True, collections=None, caching_device=None, partitioner=None, validate_shape=True, use_resource=None, custom_getter=None)

Gets an existing variable with these parameters or create a new one. 获取或者创建变量。

当tf.get_variable用于创建变量时,则与tf.Variable的功能基本相同。

#定义的基本等价  
v = tf.get_variable("v",shape=[1],initializer.constant_initializer(1.0))  
v = tf.Variable(tf.constant(1.0,shape=[1]),name="v")  
  • 相同点:通过两函数创建变量的过程基本一样,且tf.variable函数调用时提供的维度(shape)信息以及初始化方法(initializer)的参数和tf.Variable函数调用时提供的初始化过程中的参数基本类似。
  • 不同点:两函数指定变量名称的参数不同,对于tf.Variable函数,变量名称是一个可选的参数,通过name="v"的形式给出,而tf.get_variable函数,变量名称是一个必填的参数,它会根据变量名称去创建或者获取变量。

2.8 tf.global_variables_initializer() 什么时候使用?

使用tf.global_variables_initializer()函数初始化所有可变张量的状态。

2.9 学习中的知识点收获记录?

3. 实践任务

a、使 tf实现Logistic Regression算法(必做) 截止日期:11.18

b、使 a任务实现的算法,完成 “Kaggle泰坦尼克之灾”(链接https://www.kaggle.com/c/titanic)(选做)截止日期:11.25

# -*- coding: utf-8 -*-
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import Series,DataFrame
from sklearn.model_selection import train_test_split
data_train = pd.read_csv("./input/train.csv", header=0)
data_train.columns
Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

PassengerId => 乘客ID
Pclass => 乘客等级(1/2/3等舱位)
Name => 乘客姓名
Sex => 性别
Age => 年龄
SibSp => 堂兄弟/妹个数
Parch => 父母与小孩个数
Ticket => 船票信息
Fare => 票价
Cabin => 客舱
Embarked => 登船港口

data_train.Sex.value_counts()
male      577
female    314
Name: Sex, dtype: int64
# 计算平均年龄
mean_age = int(data_train["Age"].mean())
mean_age
29
data_train2 = data_train
#将年龄为空的记录 用平均年龄填充
data_train2.loc[(data_train2.Age.isnull()), 'Age' ] = mean_age
# 有舱位的数据用Yes填充, 没有的用No
def setCabinType(df):
    df.loc[ (df.Cabin.notnull()), 'Cabin' ] = "Yes"
    df.loc[ (df.Cabin.isnull()), 'Cabin' ] = "No"
    return df
data_train2 = setCabinType(data_train2)
data_train2.head(5)
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 No S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 Yes C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 No S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 Yes S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 No S
#特征离散化
dummies_Sex = pd.get_dummies(data_train2['Sex'], prefix= 'Sex')
dummies_Pclass = pd.get_dummies(data_train2['Pclass'], prefix= 'Pclass')
dummies_Cabin = pd.get_dummies(data_train2['Cabin'], prefix= 'Cabin')
dummies_Embarked = pd.get_dummies(data_train2['Embarked'], prefix= 'Embarked')
df = pd.concat([data_train, dummies_Cabin, dummies_Embarked, dummies_Sex, dummies_Pclass], axis=1)
df.drop(['Pclass', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], axis=1, inplace=True)
df.head(5)
PassengerId Survived Age SibSp Parch Fare Cabin_No Cabin_Yes Embarked_C Embarked_Q Embarked_S Sex_female Sex_male Pclass_1 Pclass_2 Pclass_3
0 1 0 22.0 1 0 7.2500 1 0 0 0 1 0 1 0 0 1
1 2 1 38.0 1 0 71.2833 0 1 1 0 0 1 0 1 0 0
2 3 1 26.0 0 0 7.9250 1 0 0 0 1 1 0 0 0 1
3 4 1 35.0 1 0 53.1000 0 1 0 0 1 1 0 1 0 0
4 5 0 35.0 0 0 8.0500 1 0 0 0 1 0 1 0 0 1
train_df = df.filter(regex='Survived|Age_.*|Sex_.*|Pclass_.*')
train_np = train_df.as_matrix()

label = train_np[:, 0]
label=label.reshape((891,1))
print(label.shape)
input= train_np[:, 1:]
print(input.shape)

(891, 1)
(891, 5)
learning_rate = 0.5
seed = 0;

X_train, X_val, y_train, y_val = train_test_split(
    input, label, test_size = 0.2, random_state = seed)

print(X_train.shape)
print(X_val.shape)
print(y_train.shape)
print(y_val.shape)

# 训练数据
X = tf.placeholder(tf.float32, shape=[None, 5], name='input')
y = tf.placeholder(tf.float32, shape=[None, 1], name='label')

 # 声明参数权重变量
W = tf.Variable(tf.random_normal([5, 1]), name='weights')
b = tf.Variable(tf.zeros([1]), name='bias')

 # softmax逻辑回归
y_pred = tf.nn.softmax(tf.matmul(X, W) + b)

 # 损失函数:交叉熵
cross_entropy = - tf.reduce_sum(y * tf.log(y_pred + 1e-10), reduction_indices=1)
 # 计算交叉熵的平均值
cost = tf.reduce_mean(cross_entropy)
 # 使用SDG最小化代价cost函数
train_op = tf.train.GradientDescentOptimizer(0.5).minimize(cost)
 # 保存训练模型
saver = tf.train.Saver()
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for epoch in range(100):
        total_loss = 0.0
        for i in range(len(X_train)):
            feed = {X: [X_train[i]], y: [y_train[i]]}
            _, loss = sess.run([train_op, cost], feed_dict=feed)
            total_loss += loss
        if (epoch % 10 == 0):
            print('Epoch: %04d, total loss=%.9f' % (epoch + 1, total_loss))
    print('Training complete!')
    # 用验证集评估模型的准确率
    pred_val = sess.run(y_pred, feed_dict={X: X_val})
    correct = np.equal(np.argmax(pred_val, 1), np.argmax(y_val, 1))
    accuracy = np.mean(correct.astype(np.float32))
    print('accuracy on validation set:%.9f' % accuracy)
    
    saver.save(sess, "Model/model_ema.ckpt")
    sess.close()
    

(712, 5)
(179, 5)
(712, 1)
(179, 1)
Epoch: 0001, total loss=0.000000000
Epoch: 0011, total loss=0.000000000
Epoch: 0021, total loss=0.000000000
Epoch: 0031, total loss=0.000000000
Epoch: 0041, total loss=0.000000000
Epoch: 0051, total loss=0.000000000
Epoch: 0061, total loss=0.000000000
Epoch: 0071, total loss=0.000000000
Epoch: 0081, total loss=0.000000000
Epoch: 0091, total loss=0.000000000
Training complete!
accuracy on validation set:1.000000000


#20

第二周的作业拖了这么久,赶紧补上。 用了经典的mnist数据集,代码参考了http://blog.csdn.net/lhanchao/article/details/51226564

import tensorflow as tf  
import numpy as np  
import input_data  
  
readData = input_data.read_data_sets("/home/abner/tens/dataset" , one_hot=True)  
learning_rate = 0.01   
x = tf.placeholder("float",[None,784])  
y = tf.placeholder("float",[None,10])  
  
W = tf.Variable(tf.zeros([784,10], "float", "weight"))  
b = tf.Variable(tf.zeros([10],"float","bais"))  
activation = tf.nn.softmax(tf.matmul(x, W)+b)  
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(activation),1))  
opt = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)  
  
init = tf.initialize_all_variables()  
  
with tf.Session() as sess:  
    sess.run(init)  
    #train过程  
    for step in range(25):  
        avg_cost = 0.  
        totalStep = (int)(readData.train.num_examples/100)  
        for i in range(totalStep):  
            images,labels = readData.train.next_batch(100)  
            sess.run(opt,feed_dict={x:images,y:labels})  
            avg_cost += sess.run(cost,feed_dict={x:images,y:labels})/totalStep  
        print "Step: ","%04d"%step," cost: ",avg_cost  
          
    testImages,testLabels = readData.test.next_batch(200)  
    accuracy = 0.  
    predictResult = tf.arg_max(activation, 1)    
    for i in range(200):  
        p = sess.run(predictResult[i],feed_dict={x:testImages})  
        label = np.argmax(testLabels, 1)  
        l = label[i]  
        if(p == l):  
            accuracy += 1.0/200.0  
        print "Predict Result: ",p," True Label: ",l  
    print "accuracy: ",accuracy