optimizer 
 SGD 
 Momentum 
    identity 
    step 

 構成・方式など
 タスク
 導入
 Sample

 用語
 sampleなどはCentOS7で実施 ($ python3 xxxx.py)

  optimizer  #!/usr/bin/env python3  # optimizer.py  import numpy as np  class SGD:    # 確率的勾配降下法(Stochastic Gradient Descent)   def __init__(self, lr=0.01):   self.lr = lr   def update(self, params, grads):   for key in params.keys():   params[key] -= self.lr * grads[key]  class Momentum:    # Momentum SGD   def __init__(self, lr=0.01, momentum=0.9):   self.lr = lr   self.momentum = momentum   self.v = None   def update(self, params, grads):   if self.v is None:   self.v = {}   for key, val in params.items():   self.v[key] = np.zeros_like(val)   for key in params.keys():   self.v[key] = self.momentum*self.v[key] - self.lr*grads[key]   params[key] += self.v[key]  class Nesterov:   def __init__(self, lr=0.01, momentum=0.9):   self.lr = lr   self.momentum = momentum   self.v = None   def update(self, params, grads):   if self.v is None:   self.v = {}   for key, val in params.items():   self.v[key] = np.zeros_like(val)   for key in params.keys():   self.v[key] *= self.momentum   self.v[key] -= self.lr * grads[key]   params[key] += self.momentum * self.momentum * self.v[key]   params[key] -= (1 + self.momentum) * self.lr * grads[key]  class AdaGrad:    #   def __init__(self, lr=0.01):   self.lr = lr   self.h = None   def update(self, params, grads):   if self.h is None:   self.h = {}   for key, val in params.items(): self.h[key] = np.zeros_like(val)   for key in params.keys():   self.h[key] += grads[key] * grads[key]   params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)  class RMSprop:   def __init__(self, lr=0.01, decay_rate = 0.99):   self.lr = lr   self.decay_rate = decay_rate   self.h = None   def update(self, params, grads):   if self.h is None:   self.h = {}   for key, val in params.items():   self.h[key] = np.zeros_like(val)   for key in params.keys():   self.h[key] *= self.decay_rate   self.h[key] += (1 - self.decay_rate) * grads[key] * grads[key]   params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)  class Adam:    #   def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):   self.lr = lr   self.beta1 = beta1   self.beta2 = beta2   self.iter = 0   self.m = None   self.v = None   print ("optimizerは(Adam)")   print ("学習係数(lr)は", self.lr)   def update(self, params, grads):   if self.m is None:   self.m, self.v = {}, {}   for key, val in params.items():   self.m[key] = np.zeros_like(val)   self.v[key] = np.zeros_like(val)   self.iter += 1   lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)   for key in params.keys():   #self.m[key] = self.beta1*self.m[key] + (1-self.beta1)*grads[key]   #self.v[key] = self.beta2*self.v[key] + (1-self.beta2)*(grads[key]**2)   self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])   self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])   params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7)   #unbias_m += (1 - self.beta1) * (grads[key] - self.m[key]) # correct bias   #unbisa_b += (1 - self.beta2) * (grads[key]*grads[key] - self.v[key]) # correct bias   #params[key] += self.lr * unbias_m / (np.sqrt(unbisa_b) + 1e-7)