728x90
반응형
# DQN

import gym
import tensorflow as tf
import numpy as np
import random
from collections import deque

# 뉴럴 네트워크 모델 만들기
model = tf.keras.models.Sequential([
    tf.keras.Input(shape=(4,)),                                   
    tf.keras.layers.Dense(24, activation=tf.nn.relu),
    tf.keras.layers.Dense(24, activation=tf.nn.relu),
    tf.keras.layers.Dense(2, activation='linear')
])

# 모델 컴파일
model.compile(optimizer="adam",
              loss='mean_squared_error')

score = []
memory = deque(maxlen=2000)

# CartPole 환경 구성
env = gym.make('CartPole-v0')

# 1000회의 에피소드 시작
for i in range(1000):

    state = env.reset()
    state = np.reshape(state, [1, 4])
    eps = 1 / (i / 50 + 10)

    # 200 timesteps
    for t in range(200):

        # Inference: e-greedy
        if np.random.rand() < eps:
            action = np.random.randint(0, 2)
        else:
            predict = model.predict(state)
            action = np.argmax(predict)

        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, 4])

        memory.append((state, action, reward, next_state, done))
        state = next_state

        if done or t == 199:
            print('Episode', i, 'Score', t + 1)
            score.append(t + 1)
            break

    # Training
    if i > 10:
        minibatch = random.sample(memory, 16)

        for state, action, reward, next_state, done in minibatch:
            target = reward           
            if not done:
                target = reward + 0.9 * np.amax(model.predict(next_state)[0])
                # print(model.predict(next_state)[0])
                # print(np.amax(model.predict(next_state)[0]))
                # print(reward)
                # print(target)
            target_outputs = model.predict(state)
            # print(target_outputs)
            # print(target_outputs[0])
            target_outputs[0][action] = target
            # print(target_outputs[0])
            model.fit(state, target_outputs, epochs=1, verbose=0)

env.close()
print(score)
728x90
반응형

'Deep Learning' 카테고리의 다른 글

Teachable Machine  (0) 2023.01.09
Reinforcement 'CartPole-v1'  (0) 2022.09.23
Calculating a Function  (0) 2022.09.16
Linear Models_MLBasic.03  (0) 2022.09.16
ScikitLearn_MLBasic.02  (0) 2022.09.16

+ Recent posts