"""
This part of code is the Q learning brain, which is a brain of the agent.
All decisions are made in here.
View more on my tutorial page: https://morvanzhou.github.io/tutorials/
"""importnumpyasnpimportpandasaspdnp.random.seed(2)# reproducibleclassQLearningTable:def__init__(self,actions,learning_rate=0.01,reward_decay=0.9,e_greedy=0.9):self.actions=actions# a listself.lr=learning_rateself.gamma=reward_decayself.epsilon=e_greedyself.q_table=pd.DataFrame(columns=self.actions,dtype=np.float64)defchoose_action(self,observation):self.check_state_exist(observation)# action selectionifnp.random.uniform()<self.epsilon:# choose best actionstate_action=self.q_table.loc[observation,:]# some actions may have the same value, randomly choose on in these actionsaction=np.random.choice(state_action[state_action==np.max(state_action)].index)else:# choose random actionaction=np.random.choice(self.actions)returnactiondeflearn(self,s,a,r,s_):self.check_state_exist(s_)q_predict=self.q_table.loc[s,a]ifs_!='terminal':q_target=r+self.gamma*self.q_table.loc[s_,:].max()# next state is not terminalelse:q_target=r# next state is terminalself.q_table.loc[s,a]+=self.lr*(q_target-q_predict)# updatedefcheck_state_exist(self,state):ifstatenotinself.q_table.index:# append new state to q tableself.q_table=self.q_table.append(pd.Series([0]*len(self.actions),index=self.q_table.columns,name=state,))