sampleなどはCentOS7で実施 ($ python3 xxxx.py)

MNISTデータをセットpklファイル作成   #!/usr/bin/env python3   # mnist.py   #import urllib.request   try:    import urllib.request   except ImportError:    raise ImportError('You should use Python 3.x')   import gzip   import numpy as np   import os   import os.path   import pickle   import urllib.request   url_base = 'http://yann.lecun.com/exdb/mnist/'   key_file = {    'train_img':'train-images-idx3-ubyte.gz',    'train_label':'train-labels-idx1-ubyte.gz',    'test_img':'t10k-images-idx3-ubyte.gz',    'test_label':'t10k-labels-idx1-ubyte.gz'   }   dataset_dir = os.path.dirname(os.path.abspath(__file__)) # 現在のDir   save_file = dataset_dir + "/data" + "/mnist.pkl" # pklファイル用 data Dirは作成済   print("***save_file***", save_file)   # train_num = 60000   # test_num = 10000   # img_dim = (1, 28, 28)   # img_size = 784   def init_mnist():    print("***download***")    download_mnist()    print("Downloading Done")    dataset = _convert_numpy()    print("_convert_numpy() Done!")    with open(save_file, 'wb') as f:    pickle.dump(dataset, f, -1)    print("Creating pickle file ... Done!")   def download_mnist():    for v in key_file.values():    _download(v)   def _download(file_name):    file_path = dataset_dir + "/data" + "/" + file_name    if os.path.exists(file_path):    return    urllib.request.urlretrieve(url_base + file_name, file_path)   def _convert_numpy():    dataset = {}    dataset['train_img'] = _load_img(key_file['train_img'])    dataset['train_label'] = _load_label(key_file['train_label'])    # np.set_printoptions(linewidth=118)    # print(dataset['train_img'][999].reshape((28, 28)))    # print(dataset['train_label'][999])    dataset['test_img'] = _load_img(key_file['test_img'])    dataset['test_label'] = _load_label(key_file['test_label'])    print("Dataset Done")    return dataset   def _load_img(file_name):    file_path = dataset_dir + "/data" + "/" + file_name    print("Converting " + file_name + " to NumPy Array ...")    with gzip.open(file_path, 'rb') as f:    data = np.frombuffer(f.read(), np.uint8, offset=16)    data = data.reshape(-1, img_size)    return data   def _load_label(file_name):    file_path = dataset_dir + "/data" + "/" + file_name    print("Converting " + file_name + " to NumPy Array ...")    with gzip.open(file_path, 'rb') as f:    labels = np.frombuffer(f.read(), np.uint8, offset=8)    return labels   def _change_one_hot_label(X):    T = np.zeros((X.size, 10))    for idx, row in enumerate(T):    row[X[idx]] = 1    return T   def load_mnist(normalize=True, flatten=True, one_hot_label=False):    if not os.path.exists(save_file):    init_mnist() # 無ければダウンロード    with open(save_file, 'rb') as f:    dataset = pickle.load(f)    if normalize:    for key in ('train_img', 'test_img'):    dataset[key] = dataset[key].astype(np.float32)    dataset[key] /= 255.0    if one_hot_label:    dataset['train_label'] = _change_one_hot_label(dataset['train_label'])    dataset['test_label'] = _change_one_hot_label(dataset['test_label'])    if not flatten:    for key in ('train_img', 'test_img'):    dataset[key] = dataset[key].reshape(-1, 1, 28, 28)    return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label'])   if __name__ == '__main__': init_mnist()
実行結果 (mnist.py)   現在のディレクトリは xxxx   指定保存場所にダウンロード   Downloading train-images-idx3-ubyte.gz ...   Done   ・・・・   Converting train-images-idx3-ubyte.gz to NumPy Array ...   Done   ・・・・   train_img.shape (60000, 784)   train_label.shape (60000,)   test_img.shape (10000, 784)   test_label.shape (10000,)   dataset_end   Creating pickle file ...   pickle_end
MNISTデータ画像表示   #!/usr/bin/env python3   # mnistDataHyouji.py   # MNISTデータ表示   import os.path   import numpy as np   import pickle   import matplotlib.pyplot as plt   from mnist import load_mnist   (x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) # データロード   dataset_dir = os.path.dirname(os.path.abspath(__file__))   print ("現在のディレクトリは",dataset_dir)   save_file = dataset_dir + "/data" + "/mnist.pkl"   print("***save_file***", save_file)   dataset = {}   with open(save_file, 'rb') as f:    dataset = pickle.load(f)   print ("データ確認")   print (dataset['train_img'].shape)   print (dataset['train_label'].shape)   print (dataset['test_img'].shape)   print (dataset['test_label'].shape)   np.set_printoptions(linewidth=118)   print (dataset['train_img'][999].reshape((28, 28))) # 1000番目の画像データ   example = dataset['train_img'][999].reshape((28, 28))   plt.imshow(example)   #plt.show() # データ表示または保存   plt.savefig("example999.png")   print ("disp_end")
数値勾配ミニバッチ学習 (数値微分法ミニバッチ学習 mnistNumerical_gradient.py)   #!/usr/bin/env python3   # mnistNumerical_gradient.py   # ミニバッチ学習   import sys, os   sys.path.append(os.pardir)   import numpy as np   import matplotlib.pyplot as plt   from mnist import load_mnist   from two_layer_net import TowLayerNet   # データの読み込み   (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)   print ("データ確認")   print ("x_train.shape", x_train.shape)   print ("t_train.shape", t_train.shape)   print ("x_test.shape", x_test.shape)   print ("t_test.shape", t_test.shape)   print ("one_hot_label=True")   print ("2層nnクラスの入力層は784、中間層は50、出力層は10")   network = TowLayerNet(input_size=784, hidden_size=50, output_size=10)   print ("ハイパーパラメータ")   iters_num = 10000 # 繰り返しの回数を適宜設定する   print ("勾配法によるパラメータの更新回数", iters_num)   train_size = x_train.shape[0]   print ("訓練データ数", train_size)   batch_size = 100   print ("ミニバッチの数", batch_size)   learning_rate = 0.1   print ("学習率", learning_rate)   train_loss_list = []   train_acc_list = []   test_acc_list = []   iter_per_epoch = max(train_size / batch_size, 1)   print ("1エポックの繰り返し数", iter_per_epoch)   for i in range(iters_num):     #学習    batch_mask = np.random.choice(train_size, batch_size)    x_batch = x_train[batch_mask]    t_batch = t_train[batch_mask]    grad = network.numerical_gradient(x_batch, t_batch)    #grad = network.gradient(x_batch, t_batch)    #print (grad)    for key in ('W1', 'b1', 'W2', 'b2'):     # パラメータの更新    network.params[key] -= learning_rate * grad[key]    loss = network.loss(x_batch, t_batch)     # 損失関数の値    train_loss_list.append(loss)    if i % iter_per_epoch == 0:     # 1エポックごとに認識の精度を計算    print("iter_per_epoch", i)    train_acc = network.accuracy(x_train, t_train)    test_acc = network.accuracy(x_test, t_test)    train_acc_list.append(train_acc)    test_acc_list.append(test_acc)    print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))   markers = {'train': 'o', 'test': 's'} # グラフの描画   x = np.arange(len(train_acc_list))   plt.plot(x, train_acc_list, label='train acc')   plt.plot(x, test_acc_list, label='test acc', linestyle='--')   plt.xlabel("epochs")   plt.ylabel("accuracy")   plt.ylim(0, 1.0)   plt.legend(loc='lower right')   # plt.show()   plt.savefig("train_neuralnet.png")   print ("ミニバッチ学習_end")
TowLayerNet   #!/usr/bin/env python3   # two_layer_net.py   import numpy as np   from functions import sigmoid, softmax, numerical_gradient, cross_entropy_error, sigmoid_grad   class TwoLayerNet:    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):   # 重みの初期化    self.params = {}    self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)    # 0.01*標準正規分布による784*50の行列    self.params['b1'] = np.zeros(hidden_size)    self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)    self.params['b2'] = np.zeros(output_size)    print("重みの初期化 weight_init_std", weight_init_std)    print("self.params['W1'].shape", self.params['W1'].shape)    print("self.params['b1'].shape", self.params['b1'].shape)    print("self.params['W2'].shape", self.params['W2'].shape)    print("self.params['b2'].shape", self.params['b2'].shape)    # print("(self.params['W1'])", (self.params['W1']))    print("(self.params['W1'])[0, 0]", (self.params['W1'])[0, 0])    print("(self.params['W1'])[783, 49]", (self.params['W1'])[783, 49])    print("(self.params['b1'])[0]", (self.params['b1'])[0])    # print("(self.params['W2'])[49, 9]", (self.params['W2']))    print("(self.params['W2'])[0, 0]", (self.params['W2'])[0, 0])    print("(self.params['W2'])[49, 9]", (self.params['W2'])[49, 9])    print("(self.params['b2'])[0]", (self.params['b2'])[0])    def predict(self, x): # 予測    W1, W2 = self.params['W1'], self.params['W2']    b1, b2 = self.params['b1'], self.params['b2']    a1 = np.dot(x, W1) + b1    z1 = sigmoid(a1) # シグモイド関数    a2 = np.dot(z1, W2) + b2    y = softmax(a2) # ソフトマックス関数    return y    def loss(self, x, t): # 損失関数, x:入力データ, t:教師データ    y = self.predict(x)    return cross_entropy_error(y, t)    def accuracy(self, x, t): # 認識精度    y = self.predict(x)    y = np.argmax(y, axis=1)    t = np.argmax(t, axis=1)    accuracy = np.sum(y == t) / float(x.shape[0])    return accuracy    def numerical_gradient(self, x, t): # 重みパラメーターに対する勾配の算出, x:入力データ, t:教師データ    grads = {}    grads['W1'] = numerical_gradient(lambda: self.loss(x, t), self.params['W1'])    grads['b1'] = numerical_gradient(lambda: self.loss(x, t), self.params['b1'])    grads['W2'] = numerical_gradient(lambda: self.loss(x, t), self.params['W2'])    grads['b2'] = numerical_gradient(lambda: self.loss(x, t), self.params['b2'])    return grads    def gradient(self, x, t): # 誤差逆伝播法用    W1, W2 = self.params['W1'], self.params['W2']    b1, b2 = self.params['b1'], self.params['b2']    grads = {}    batch_num = x.shape[0]    # forward    a1 = np.dot(x, W1) + b1    z1 = sigmoid(a1)    a2 = np.dot(z1, W2) + b2    y = softmax(a2)    # backward    dy = (y - t) / batch_num    grads['W2'] = np.dot(z1.T, dy)    grads['b2'] = np.sum(dy, axis=0)    dz1 = np.dot(dy, W2.T)    da1 = sigmoid_grad(a1) * dz1    grads['W1'] = np.dot(x.T, da1)    grads['b1'] = np.sum(da1, axis=0)    return grads
誤差逆伝播法によるミニバッチ学習   #!/usr/bin/env python3   # ミニバッチ学習   import sys, os   sys.path.append(os.pardir)   import numpy as np   import matplotlib.pyplot as plt   from mnist import load_mnist   from two_layer_net import TowLayerNet   # データの読み込み   (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)   print ("データ確認")   print ("x_train.shape", x_train.shape)   print ("t_train.shape", t_train.shape)   print ("x_test.shape", x_test.shape)   print ("t_test.shape", t_test.shape)   print ("one_hot_label=True")   print ("2層nnクラスの入力層は784、中間層は50、出力層は10")   network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)   print ("ハイパーパラメータ")   iters_num = 10000 # 繰り返しの回数を適宜設定する   print ("勾配法によるパラメータの更新回数", iters_num)   train_size = x_train.shape[0]   print ("訓練データ数", train_size)   batch_size = 100   print ("ミニバッチの数", batch_size)   learning_rate = 0.1   print ("学習率", learning_rate)   train_loss_list = []   train_acc_list = []   test_acc_list = []   #iter_per_epoch = 10   iter_per_epoch = max(train_size / batch_size, 1)   print ("1エポックの繰り返し数", iter_per_epoch)   for i in range(iters_num):    batch_mask = np.random.choice(train_size, batch_size)    x_batch = x_train[batch_mask]    t_batch = t_train[batch_mask]    # 勾配の計算    #grad = network.numerical_gradient(x_batch, t_batch)    grad = network.gradient(x_batch, t_batch)    #print (grad)    # パラメータの更新    for key in ('W1', 'b1', 'W2', 'b2'):    network.params[key] -= learning_rate * grad[key]    loss = network.loss(x_batch, t_batch)    train_loss_list.append(loss)    if i % iter_per_epoch == 0:    print("iter_per_epoch", i)    train_acc = network.accuracy(x_train, t_train)    test_acc = network.accuracy(x_test, t_test)    train_acc_list.append(train_acc)    test_acc_list.append(test_acc)    print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))   # グラフの描画   markers = {'train': 'o', 'test': 's'}   x = np.arange(len(train_acc_list))   plt.plot(x, train_acc_list, label='train acc')   plt.plot(x, test_acc_list, label='test acc', linestyle='--')   plt.xlabel("epochs")   plt.ylabel("accuracy")   plt.ylim(0, 1.0)   plt.legend(loc='lower right')   # plt.show()   plt.savefig("train_neuralnet.png")   print ("ミニバッチ学習_end")
SimpleConvNetによる学習   #!/usr/bin/env python3   # SimpleConvNetによる学習   import sys, os   sys.path.append(os.pardir)   import numpy as np   import matplotlib.pyplot as plt   from mnist import load_mnist   from simpleConvNet import SimpleConvNet   from trainer import Trainer   #from collections import OrderedDict   (x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) # データロード   max_epochs = 20   network = SimpleConvNet(input_dim=(1,28,28),    conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},    hidden_size=100, output_size=10, weight_init_std=0.01)   trainer = Trainer(network, x_train, t_train, x_test, t_test,    epochs=max_epochs, mini_batch_size=100,    optimizer='Adam', optimizer_param={'lr': 0.001},    evaluate_sample_num_per_epoch=1000)   trainer.train()   network.save_params("simleConvNet_params.pkl") # パラメータ保存   print("Saved Network Parameters!")   # グラフの描画   markers = {'train': 'o', 'test': 's'}   x = np.arange(max_epochs)   plt.plot(x, trainer.train_acc_list, marker='o', label='train', markevery=2)   plt.plot(x, trainer.test_acc_list, marker='s', label='test', markevery=2)   plt.xlabel("epochs")   plt.ylabel("accuracy")   plt.ylim(0, 1.0)   plt.legend(loc='lower right')   # plt.show()   plt.savefig("train_neuralnet.png")   print ("SimpleConvNet学習_end")
SimpleConvNet   #!/usr/bin/env python3   # SimpleConvNet   import pickle   import sys, os   sys.path.append(os.pardir)   import numpy as np   from layers import *   from gradient import numerical_gradient   from collections import OrderedDict   class SimpleConvNet:    # 単純なConvNet、conv - relu - pool - affine - relu - affine - softmax    # Parameters    # input_size : 入力サイズ(MNISTの場合は784)    # hidden_size_list : 隠れ層のニューロンの数のリスト(e.g. [100, 100, 100])    # output_size : 出力サイズ(MNISTの場合は10)    # activation : 'relu' or 'sigmoid'    # weight_init_std : 重みの標準偏差を指定(e.g. 0.01)    # 'relu'または'he'を指定した場合は「Heの初期値」を設定    # 'sigmoid'または'xavier'を指定した場合は「Xavierの初期値」を設定    def __init__(self, input_dim=(1, 28, 28),    conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},    hidden_size=100, output_size=10, weight_init_std=0.01):    filter_num = conv_param['filter_num']    filter_size = conv_param['filter_size']    filter_pad = conv_param['pad']    filter_stride = conv_param['stride']    input_size = input_dim[1]    conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1    pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))    # 重みの初期化    self.params = {}    self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)    self.params['b1'] = np.zeros(filter_num)    self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)    self.params['b2'] = np.zeros(hidden_size)    self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)    self.params['b3'] = np.zeros(output_size)    # レイヤの生成    self.layers = OrderedDict() # 順序が保持される    self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],    conv_param['stride'], conv_param['pad'])    self.layers['Relu1'] = Relu()    self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)    self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])    self.layers['Relu2'] = Relu()    self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])    self.last_layer = SoftmaxWithLoss()    def predict(self, x): # 推論用    for layer in self.layers.values():    x = layer.forward(x)    return x    # 損失関数を求める    def loss(self, x, t): # x:入力データ, t:教師データ    y = self.predict(x)    return self.last_layer.forward(y, t)    # 認識精度を算出(正解率)    def accuracy(self, x, t, batch_size=100): # x:入力データ, t:教師データ    if t.ndim != 1 : t = np.argmax(t, axis=1)    acc = 0.0    for i in range(int(x.shape[0] / batch_size)):    tx = x[i*batch_size:(i+1)*batch_size]    tt = t[i*batch_size:(i+1)*batch_size]    y = self.predict(tx)    y = np.argmax(y, axis=1)    acc += np.sum(y == tt)    return acc / x.shape[0]    # 勾配を求める(数値微分)    def numerical_gradient(self, x, t): # x:入力データ, t:教師データ    # 各層の勾配を持ったディクショナリ変数 # grads['W1']、grads['W2']、・・・は各層の重み # grads['b1']、grads['b2']、・・・は各層のバイアス    loss_W = lambda W: self.loss(x, t)    grads = {}    for idx in (1, 2, 3):    grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])    grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])    return grads    # 勾配を求める(誤差逆伝搬法)    def gradient(self, x, t): # x:入力データ, t:教師データ   # forward    self.loss(x, t)    # backward    dout = 1    dout = self.last_layer.backward(dout)    layers = list(self.layers.values())    layers.reverse()    for layer in layers:    dout = layer.backward(dout)    # 設定    grads = {}    grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db    grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db    grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db    return grads    # パラメータ格納    def save_params(self, file_name="params.pkl"):    params = {}    for key, val in self.params.items():    params[key] = val    with open(file_name, 'wb') as f:    pickle.dump(params, f)    # パラメータload    def load_params(self, file_name="params.pkl"):    with open(file_name, 'rb') as f:    params = pickle.load(f)    for key, val in params.items():    self.params[key] = val    for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):    self.layers[key].W = self.params['W' + str(i+1)]    self.layers[key].b = self.params['b' + str(i+1)]
Trainer   #!/usr/bin/env python3   # Trainer.py   import numpy as np   from optimizer import *   class Trainer: #ニューラルネットの訓練を行うクラス    def __init__(self, network, x_train, t_train, x_test, t_test,    epochs=20, mini_batch_size=100,    optimizer='SGD', optimizer_param={'lr':0.01},    evaluate_sample_num_per_epoch=None, verbose=True):    self.network = network    self.verbose = verbose    self.x_train = x_train    self.t_train = t_train    self.x_test = x_test    self.t_test = t_test    self.epochs = epochs    self.batch_size = mini_batch_size    self.evaluate_sample_num_per_epoch = evaluate_sample_num_per_epoch    # optimizer    optimizer_class_dict = {'sgd':SGD, 'momentum':Momentum, 'nesterov':Nesterov,    'adagrad':AdaGrad, 'rmsprpo':RMSprop, 'adam':Adam}    self.optimizer = optimizer_class_dict[optimizer.lower()](**optimizer_param)    self.train_size = x_train.shape[0]    self.iter_per_epoch = max(self.train_size / mini_batch_size, 1)    self.max_iter = int(epochs * self.iter_per_epoch)    self.current_iter = 0    self.current_epoch = 0    self.train_loss_list = []    self.train_acc_list = []    self.test_acc_list = []    def train_step(self):    batch_mask = np.random.choice(self.train_size, self.batch_size)    x_batch = self.x_train[batch_mask]    t_batch = self.t_train[batch_mask]    grads = self.network.gradient(x_batch, t_batch)    self.optimizer.update(self.network.params, grads)    loss = self.network.loss(x_batch, t_batch)    self.train_loss_list.append(loss)    if self.verbose: print("train loss:" + str(loss))    if self.current_iter % self.iter_per_epoch == 0:    self.current_epoch += 1    x_train_sample, t_train_sample = self.x_train, self.t_train    x_test_sample, t_test_sample = self.x_test, self.t_test    if not self.evaluate_sample_num_per_epoch is None:    t = self.evaluate_sample_num_per_epoch    x_train_sample, t_train_sample = self.x_train[:t], self.t_train[:t]    x_test_sample, t_test_sample = self.x_test[:t], self.t_test[:t]    train_acc = self.network.accuracy(x_train_sample, t_train_sample)    test_acc = self.network.accuracy(x_test_sample, t_test_sample)    self.train_acc_list.append(train_acc)    self.test_acc_list.append(test_acc)    if self.verbose: print("=== epoch:" + str(self.current_epoch) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc) + " ===")    self.current_iter += 1    def train(self): # 訓練開始    for i in range(self.max_iter):    self.train_step()    test_acc = self.network.accuracy(self.x_test, self.t_test)    if self.verbose:    print("Final Test Accuracy")    print("test acc:" + str(test_acc))
 MNIST深層学習
  #!/usr/bin/env python3   # train_deepnet   import sys, os   sys.path.append(os.pardir)   import numpy as np   import matplotlib.pyplot as plt   from mnist import load_mnist   from deepConvNet import DeepConvNet   from trainer import Trainer  (x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) # データロード  network = DeepConvNet()  trainer = Trainer(network, x_train, t_train, x_test, t_test,   epochs=20, mini_batch_size=100,    optimizer='Adam', optimizer_param={'lr':0.001},   evaluate_sample_num_per_epoch=1000)  trainer.train()  network.save_params("deep_convnet_params.pkl") # パラメータ保存  print("Saved Network Parameters!")
DeepConvNet
$ python3 xxxx.py   #!/usr/bin/env python3   # DeepConvNet   import pickle   import numpy as np   from collections import OrderedDict   from layers import *   class DeepConvNet: # 認識率99%以上のネットワーク構成    # conv - relu - conv- relu - pool - conv - relu - conv- relu - pool-    # conv - relu - conv- relu - pool - affine - relu - dropout -    # affine - dropout - softmax    def __init__(self, input_dim=(1, 28, 28),    conv_param_1 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},    conv_param_2 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},    conv_param_3 = {'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1},    conv_param_4 = {'filter_num':32, 'filter_size':3, 'pad':2, 'stride':1},    conv_param_5 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},    conv_param_6 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},    hidden_size=50, output_size=10):    # 重みの初期化    # 各層のニューロンひとつあたりが、前層のニューロンといくつのつながりがあるか(TODO:自動で計算する)    pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size])    weight_init_scales = np.sqrt(2.0 / pre_node_nums) # ReLUを使う場合に推奨される初期値    self.params = {}    pre_channel_num = input_dim[0]    for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]):    self.params['W' + str(idx+1)] = weight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size'])    self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num'])    pre_channel_num = conv_param['filter_num']    self.params['W7'] = weight_init_scales[6] * np.random.randn(64*4*4, hidden_size)    self.params['b7'] = np.zeros(hidden_size)    self.params['W8'] = weight_init_scales[7] * np.random.randn(hidden_size, output_size)    self.params['b8'] = np.zeros(output_size)    # レイヤの生成===========    self.layers = [] #    self.layers.append(Convolution(self.params['W1'], self.params['b1'],    conv_param_1['stride'], conv_param_1['pad']))    self.layers.append(Relu())    self.layers.append(Convolution(self.params['W2'], self.params['b2'],    conv_param_2['stride'], conv_param_2['pad']))    self.layers.append(Relu())    self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))    self.layers.append(Convolution(self.params['W3'], self.params['b3'],    conv_param_3['stride'], conv_param_3['pad']))    self.layers.append(Relu())    self.layers.append(Convolution(self.params['W4'], self.params['b4'],    conv_param_4['stride'], conv_param_4['pad']))    self.layers.append(Relu())    self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))    self.layers.append(Convolution(self.params['W5'], self.params['b5'],    conv_param_5['stride'], conv_param_5['pad']))    self.layers.append(Relu())    self.layers.append(Convolution(self.params['W6'], self.params['b6'],    conv_param_6['stride'], conv_param_6['pad']))    self.layers.append(Relu())    self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))    self.layers.append(Affine(self.params['W7'], self.params['b7']))    self.layers.append(Relu())    self.layers.append(Dropout(0.5))    self.layers.append(Affine(self.params['W8'], self.params['b8']))    self.layers.append(Dropout(0.5))    self.last_layer = SoftmaxWithLoss()    def predict(self, x, train_flg=False): # 予測    for layer in self.layers:    if isinstance(layer, Dropout): # layerオブジェクトがDropoutならTrue    x = layer.forward(x, train_flg)    else:    x = layer.forward(x)    return x    def loss(self, x, t): # x:入力データ、t:教師データ    y = self.predict(x, train_flg=True)    return self.last_layer.forward(y, t) # last_layerはSoftmaxWithLoss()    def accuracy(self, x, t, batch_size=100):    if t.ndim != 1 : t = np.argmax(t, axis=1)    acc = 0.0    for i in range(int(x.shape[0] / batch_size)):    tx = x[i*batch_size:(i+1)*batch_size]    tt = t[i*batch_size:(i+1)*batch_size]    y = self.predict(tx, train_flg=False)    y = np.argmax(y, axis=1)    acc += np.sum(y == tt)    return acc / x.shape[0]    def gradient(self, x, t): # 勾配    # forward    self.loss(x, t)    # backward    dout = 1    dout = self.last_layer.backward(dout)    tmp_layers = self.layers.copy()    tmp_layers.reverse()    for layer in tmp_layers:    dout = layer.backward(dout)    # 設定    grads = {}    for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):    grads['W' + str(i+1)] = self.layers[layer_idx].dW    grads['b' + str(i+1)] = self.layers[layer_idx].db    return grads    def save_params(self, file_name="params.pkl"):    params = {}    for key, val in self.params.items():    params[key] = val    with open(file_name, 'wb') as f:    pickle.dump(params, f)    def load_params(self, file_name="params.pkl"):    with open(file_name, 'rb') as f:    params = pickle.load(f)    for key, val in params.items():    self.params[key] = val    for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):    self.layers[layer_idx].W = self.params['W' + str(i+1)]    self.layers[layer_idx].b = self.params['b' + str(i+1)]
 MNIST推論
$ python3 xxxx.py   #!/usr/bin/env python3   # neuralnet_mnist_batch   import sys, os   sys.path.append(os.pardir)   import numpy as np   import pickle   from mnist import load_mnist   from functions import sigmoid, softmax   def get_data():    # テストデータ抽出    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False)    return x_test, t_test   def init_network():    # 学習済重みパラメータ取り出し    with open("sample_weight.pkl", 'rb') as f:    network = pickle.load(f)    return network   def predict(network, x):    # 訓練データ、テストデータの出力結果    w1, w2, w3 = network['W1'], network['W2'], network['W3']    b1, b2, b3 = network['b1'], network['b2'], network['b3']    a1 = np.dot(x, w1) + b1    z1 = sigmoid(a1)    a2 = np.dot(z1, w2) + b2    z2 = sigmoid(a2)    a3 = np.dot(z2, w3) + b3    y = softmax(a3)    return y   x, t = get_data()   network = init_network()   w1, w2, w3 = network['W1'], network['W2'], network['W3']   b1, b2, b3 = network['b1'], network['b2'], network['b3']   #print(network['W3'])   print(w1.shape)   print(w2.shape)   print(w3.shape)   #print(network['b1'])   print(b1.shape)   #print(x)   print(x.shape)   batch_size = 100    # バッチの数   accuracy_cnt = 0   for i in range(0, len(x), batch_size):    # テストデータで精度を確認    x_batch = x[i:i+batch_size]    y_batch = predict(network, x_batch)    p = np.argmax(y_batch, axis=1)    accuracy_cnt += np.sum(p == t[i:i+batch_size])    # テストデータの出力と正解ラベルを比較  print("Accuracy:" + str(float(accuracy_cnt) / len(x)))    # 正解率
ミニバッチ作成   #!/usr/bin/env python3   # MNISTミニバッチ作成   import os.path   import numpy as np   import pickle   import matplotlib.pyplot as plt   dataset_dir = os.path.dirname(os.path.abspath(__file__))   print ("現在のディレクトリは",dataset_dir)   dataset = {}   save_file = dataset_dir + '/mnist.pkl'   with open(save_file, 'rb') as f:    dataset = pickle.load(f)   def _change_one_hot_label(X):    T = np.zeros((X.size, 10))    for idx, row in enumerate(T):    row[X[idx]] = 1    return T   dataset['train_label'] = _change_one_hot_label(dataset['train_label'])   train_size = dataset['train_img'].shape[0]   batch_size = 10   batch_mask = np.random.choice(train_size, batch_size)   print ("データ確認")   print (dataset['train_img'].shape)   print (dataset['train_label'].shape)   print ("訓練データ数は", train_size)   print ("バッチ数は", batch_size)   print ("選ぶインデックは",batch_mask)   print ("disp_end")