728x90
반응형
# DQN
import gym
import tensorflow as tf
import numpy as np
import random
from collections import deque
# 뉴럴 네트워크 모델 만들기
# cartpole 상태
# [position of cart, velocity of cart, angle of pole, rotation rate of pole]
player = tf.keras.models.Sequential([
tf.keras.Input(shape=(4,)),
tf.keras.layers.Dense(24, activation=tf.nn.relu),
tf.keras.layers.Dense(24, activation=tf.nn.relu),
tf.keras.layers.Dense(2, activation='linear')
])
# 모델 컴파일
player.compile(optimizer="adam",
loss='mean_squared_error')
game_score = []
memory = deque(maxlen=2000)
# CartPole 환경 구성
game = gym.make('CartPole-v0')
# 1000회의 에피소드 시작
for episode in range(1000): #i==episode: 한 판
state_0 = game.reset() # 게임 초기화
epsil = 1 / (episode / 50 + 10) # 10%~3.3%
curr_state = state_0
curr_state = curr_state.reshape((1,4))
# 200 timestepsil
for t in range(200):
# Inference: e-greedy
if np.random.rand() < epsil:
curr_action = np.random.randint(0, 2)
else:
actions_scores = player.predict(curr_state)
curr_action = np.argmax(actions_scores)
next_state, curr_reward, end_game, _ = game.step(curr_action)
next_state = next_state.reshape((1,4))
memory.append((curr_state,
curr_action,
curr_reward,
next_state,
end_game))
curr_state = next_state
if end_game or t == 199:
print('Episode', episode, 'game_score', t + 1)
game_score.append(t + 1)
break
# # Training
if episode > 10:
minibatch = random.sample(memory, 16)
print(len(minibatch))
for state, action, reward, next_state, end_game in minibatch:
target = reward
if not end_game:
target = reward + 0.9 * np.amax(player.predict(next_state)[0])
# # print(player.predict(next_state)[0])
# # print(np.amax(player.predict(next_state)[0]))
# # print(reward)
# # print(target)
target_outputs = player.predict(state)
# # print(target_outputs)
# # print(target_outputs[0])
target_outputs[0][action] = target
# # print(target_outputs[0])
player.fit(state, target_outputs, epochs=1, verbose=0)
game.close()
print(game_score)
728x90
반응형
'Deep Learning' 카테고리의 다른 글
1. Setting up the environment for ML (0) | 2023.09.18 |
---|---|
Teachable Machine (0) | 2023.01.09 |
Reinforcement Learning (0) | 2022.09.19 |
Calculating a Function (0) | 2022.09.16 |
Linear Models_MLBasic.03 (0) | 2022.09.16 |