Logistic Regression¶
[예제 10] Logistic Regression (TensorFlow)¶
Load modules¶
In [ ]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
print("NumPy Version :{}".format(np.__version__))
print("TensorFlow Version :{}".format(tf.__version__))
print("Matplotlib Version :{}".format(plt.matplotlib.__version__))
WARNING:tensorflow:From c:\python\Lib\site-packages\keras\src\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead. NumPy Version :1.24.3 TensorFlow Version :2.15.0 Matplotlib Version :3.7.1
Input and Label¶
In [ ]:
# Logistic regression : Binary Classification data
x_input = tf.constant([[1, 1], [2, 1], [1, 2], [0.5, 4], [4, 1], [2.5, 2.3]], dtype= tf.float32)
labels = tf.constant([[0], [0], [0], [1], [1], [1]], dtype= tf.float32)
W = tf.Variable(tf.random.normal((2, 1)), dtype=tf.float32)
B = tf.Variable(tf.random.normal((1,)), dtype=tf.float32)
In [ ]:
def Hypothesis(x):
return tf.sigmoid(tf.add(tf.matmul(x ,W), B))
In [ ]:
def Cost():
return -tf.reduce_mean(labels * tf.math.log(Hypothesis(x_input)) + (1 - labels) * tf.math.log(1 - Hypothesis(x_input)))
학습 (Training)¶
In [ ]:
%%time
epochs = 10000
learning_rate = 0.1
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
training_idx = np.arange(0, epochs+1, 1)
cost_graph = np.zeros(epochs+1)
check = np.array([0, epochs*0.01, epochs*0.08, epochs*0.2, epochs*0.4, epochs])
W_trained = []
b_trained = []
check_idx = 0
# 학습 (Training)
for cnt in range(0, epochs+1):
cost_graph[cnt] = Cost()
if cnt % (epochs//20) == 0:
print("[{:>5}] cost = {:>10.4}, W = [[{:>7.4}] [{:>7.4}]], B = [[{:>7.4}]]".format(cnt, cost_graph[cnt], W[0,0], W[1,0], B[0]))
if check[check_idx] == cnt:
W_trained.append(W.numpy())
b_trained.append(B.numpy())
check_idx += 1
optimizer.minimize(Cost,[W, B])
[ 0] cost = 1.434, W = [[ -1.095] [ 0.7391]], B = [[ -1.516]] [ 500] cost = 0.2489, W = [[ 1.194] [ 1.392]], B = [[ -4.57]] [ 1000] cost = 0.1635, W = [[ 1.657] [ 1.916]], B = [[ -6.499]] [ 1500] cost = 0.1215, W = [[ 1.983] [ 2.285]], B = [[ -7.857]] [ 2000] cost = 0.09657, W = [[ 2.234] [ 2.569]], B = [[ -8.904]] [ 2500] cost = 0.08006, W = [[ 2.439] [ 2.801]], B = [[ -9.758]] [ 3000] cost = 0.06832, W = [[ 2.612] [ 2.996]], B = [[ -10.48]] [ 3500] cost = 0.05956, W = [[ 2.761] [ 3.165]], B = [[ -11.1]] [ 4000] cost = 0.05277, W = [[ 2.892] [ 3.314]], B = [[ -11.65]] [ 4500] cost = 0.04736, W = [[ 3.009] [ 3.447]], B = [[ -12.13]] [ 5000] cost = 0.04295, W = [[ 3.115] [ 3.567]], B = [[ -12.58]] [ 5500] cost = 0.03929, W = [[ 3.212] [ 3.677]], B = [[ -12.98]] [ 6000] cost = 0.03619, W = [[ 3.301] [ 3.778]], B = [[ -13.35]] [ 6500] cost = 0.03355, W = [[ 3.383] [ 3.871]], B = [[ -13.69]] [ 7000] cost = 0.03126, W = [[ 3.459] [ 3.957]], B = [[ -14.01]] [ 7500] cost = 0.02926, W = [[ 3.53] [ 4.038]], B = [[ -14.3]] [ 8000] cost = 0.0275, W = [[ 3.597] [ 4.114]], B = [[ -14.58]] [ 8500] cost = 0.02594, W = [[ 3.66] [ 4.186]], B = [[ -14.85]] [ 9000] cost = 0.02455, W = [[ 3.719] [ 4.253]], B = [[ -15.09]] [ 9500] cost = 0.0233, W = [[ 3.776] [ 4.318]], B = [[ -15.33]] [10000] cost = 0.02217, W = [[ 3.83] [ 4.378]], B = [[ -15.55]] CPU times: total: 55.4 s Wall time: 55.5 s
Hypothesis Test¶
In [ ]:
# Training 결과 Test 및 Prediction
print("[Training Test]")
H_x = Hypothesis(x_input).numpy()
H_x = H_x.reshape((-1,))
H = [int(h>0.5) for h in H_x]
for idx in range(x_input.shape[0]):
print("Input {} , Label : {} => H :{:>2}(H_x:{:>5.2})".format(x_input[idx], labels[idx], H[idx], H_x[idx]))
[Training Test] Input [1. 1.] , Label : [0.] => H : 0(H_x:0.00065) Input [2. 1.] , Label : [0.] => H : 0(H_x:0.029) Input [1. 2.] , Label : [0.] => H : 0(H_x:0.049) Input [0.5 4. ] , Label : [1.] => H : 1(H_x: 0.98) Input [4. 1.] , Label : [1.] => H : 1(H_x: 0.98) Input [2.5 2.3] , Label : [1.] => H : 1(H_x: 0.98)
In [ ]:
print("\n[ Prediction by specific data ]")
x_test = tf.constant([[1.5, 3], [0.5, 2]], dtype= tf.float32)
H_x = Hypothesis(x_test).numpy().reshape((-1,))
for idx in range(x_test.shape[0]):
print("Input {} => H_x: {:>5.2}".format(x_test[idx], H_x[idx]))
[ Prediction by specific data ] Input [1.5 3. ] => H_x: 0.97 Input [0.5 2. ] => H_x: 0.0075
In [ ]:
# Training 상황에 대한 그래프 출력
# Training 회수 별 Cost 값
plt.title("'Cost / Epochs' Graph")
plt.xlabel("Epochs")
plt.ylabel("Cost")
plt.plot(training_idx, cost_graph)
plt.xlim(0, epochs)
plt.grid(True)
plt.semilogy()
plt.show()
In [ ]:
# 구분선 그리기
x_decision = np.linspace(0, 5, 1000)
fig, ax = plt.subplots(2, 3, figsize=(15, 11))
fig.suptitle("'Hypothesis / Training Count' Graph")
for ax_idx in range(check.size):
W = W_trained[ax_idx]
B = b_trained[ax_idx]
y_decision = -(W[0] * x_decision + B[0])/W[1]
# label의 값에 따라서 blue 또는 red 점 찍기
for i in range(labels.shape[0]):
if(labels[i][0] == 0):
ax[ax_idx // 3][ax_idx % 3].scatter(x_input[i][0], x_input[i][1], color='blue')
else:
ax[ax_idx // 3][ax_idx % 3].scatter(x_input[i][0], x_input[i][1], color='red')
ax[ax_idx // 3][ax_idx % 3].plot(x_decision, y_decision, label=' Decision Boundary', color='green')
ax[ax_idx // 3][ax_idx % 3].set_title("Epochs : {}".format(check[ax_idx]))
ax[ax_idx // 3][ax_idx % 3].set_xlim((0, 5))
ax[ax_idx // 3][ax_idx % 3].set_ylim((0, 5))
ax[ax_idx // 3][ax_idx % 3].set_xlabel("x0")
ax[ax_idx // 3][ax_idx % 3].set_ylabel("x1")
ax[ax_idx // 3][ax_idx % 3].grid(True)
ax[ax_idx // 3][ax_idx % 3].legend()
plt.show()