Source code for snntorch.spikevision.spikedata.nmnist

# NMNIST Dataset Citation:
# Orchard, G.; Cohen, G.; Jayawant, A.; and Thakor, N.
# “Converting Static Image Datasets to Spiking Neuromorphic Datasets
# Using Saccades", Frontiers in Neuroscience, vol.9, no.437, Oct. 2015.
# Dataloader adapted from
# https://github.com/nmi-lab/torchneuromorphic by
# Emre Neftci and Clemens Schaefer

# import struct
# import time
# import copy
import numpy as np
import h5py

# import torch.utils.data
from ..neuromorphic_dataset import NeuromorphicDataset
from ..events_timeslices import get_tmad_slice
from .._transforms import (
    toOneHot,
    Repeat,
    Compose,
    dvs_permute,
    ToTensor,
    Downsample,
    CropDims,
    ToCountFrame,
)
import os
from .._utils import load_ATIS_bin
from tqdm import tqdm
import glob

mapping = {
    0: "0",
    1: "1",
    2: "2",
    3: "3",
    4: "4",
    5: "5",
    6: "6",
    7: "7",
    8: "8",
    9: "9",
}


[docs] class NMNIST(NeuromorphicDataset): """`NMNIST <https://www.garrickorchard.com/datasets/n-mnist>`_ Dataset. The Neuromorphic-MNIST (NMNIST) dataset is a spiking version of the original frame-based `MNIST <http://yann.lecun.com/exdb/mnist/>`_ dataset. The downloaded and extracted dataset consists of the same 60000 training and 10000 testing samples as the MNIST dataset, and is captured at the same visual scale as the original MNIST dataset (28x28 pixels). For compatibility with the .hdf5 conversion process, this is reduced such that the number of samples for each class are balanced to the label with the minimum number of samples (training: 5421, test: 892). **Number of classes:** 10 **Number of train samples:** 54210 **Number of test samples:** 8920 **Dimensions:** ``[num_steps x 2 x 32 x 32]`` * **num_steps:** time-dimension of event-based footage * **2:** number of channels (on-spikes for luminance increasing; off-spikes for luminance decreasing) * **32x32:** W x H spatial dimensions of event-based footage For further reading, see: *Orchard, G.; Cohen, G.; Jayawant, A.; and Thakor, N. “Converting Static Image Datasets to Spiking Neuromorphic Datasets Using Saccades", Frontiers in Neuroscience, vol.9, no.437, Oct. 2015.* Example:: from snntorch.spikevision import spikedata train_ds = spikedata.NMNIST("data/nmnist", train=True, num_steps=300, dt=1000) test_ds = spikedata.NMNIST("data/nmnist", train=False, num_steps=300, dt=1000) # by default, each time step is integrated over 1ms, or dt=1000 microseconds # dt can be changed to integrate events over a varying number of time steps # Note that num_steps should be scaled inversely by the same factor train_ds = spikedata.NMNIST("data/nmnist", train=True, num_steps=150, dt=2000) test_ds = spikedata.NMNIST("data/nmnist", train=False, num_steps=150, dt=2000) The dataset can also be manually downloaded, extracted and placed into ``root`` which will allow the dataloader to bypass straight to the generation of a hdf5 file. **Direct Download Links:** `Dropbox Train Set Link <https://www.dropbox.com/sh/tg2ljlbmtzygrag/ AABlMOuR15ugeOxMCX0Pvoxga/Train.zip?dl=0>`_ `Dropbox Test Set Link <https://www.dropbox.com/sh/tg2ljlbmtzygrag/ AADSKgJ2CjaBWh75HnTNZyhca/Test.zip?dl=0>`_ :param root: Root directory of dataset where ``Train.zip`` and ``Test.zip`` exist. :type root: string :param train: If True, creates dataset from ``Train.zip``, otherwise from ``Test.zip`` :type train: bool, optional :param transform: A function/transform that takes in a PIL image and returns a transforms version. By default, a pre-defined set of transforms are applied to all samples to convert them into a time-first tensor with correct orientation. :type transform: callable, optional :param target_transform: A function/transform that takes in the target and transforms it. :type target_transform: callable, optional :param download_and_create: If True, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. :type download_and_create: bool, optional :param num_steps: Number of time steps, defaults to ``300`` :type num_steps: int, optional :param dt: Number of time stamps integrated in microseconds, defaults to ``1000`` :type dt: int, optional Dataloader adapted from `torchneuromorphic <https://github.com/nmi-lab/torchneuromorphic>`_ originally by Emre Neftci and Clemens Schaefer. The dataset is released under the Creative Commons Attribution-ShareAlike 4.0 license. All rights remain with the original authors. """ _resources_url = [ [ "https://www.dropbox.com/sh/tg2ljlbmtzygrag/" "AABlMOuR15ugeOxMCX0Pvoxga/Train.zip?dl=1", None, "Train.zip", ], [ "https://www.dropbox.com/sh/tg2ljlbmtzygrag/" "AADSKgJ2CjaBWh75HnTNZyhca/Test.zip?dl=1", None, "Test.zip", ], ] def __init__( self, root, train=True, transform=None, target_transform=None, download_and_create=True, num_steps=300, dt=1000, ): self.n = 0 self.nclasses = self.num_classes = 10 self.download_and_create = download_and_create self.train = train self.dt = dt self.num_steps = num_steps # self.balance = balance self.directory = root.split("n_mnist.hdf5")[0] self.resources_local = [ self.directory + "/Train.zip", self.directory + "/Test.zip", ] if self.train: self.resources_local_extracted = [self.directory + "/Train"] else: self.resources_local_extracted = [self.directory + "/Test"] size = [2, 32, 32] # 32//ds if transform is None: transform = Compose( [ CropDims(low_crop=[0, 0], high_crop=[32, 32], dims=[2, 3]), Downsample(factor=[dt, 1, 1, 1]), ToCountFrame(T=num_steps, size=size), ToTensor(), dvs_permute(), ] ) if target_transform is not None: target_transform = Compose([Repeat(num_steps), toOneHot(10)]) super().__init__( root=root + "/n_mnist.hdf5", transform=transform, target_transform=target_transform, ) with h5py.File(self.root, "r", swmr=True, libver="latest") as f: try: if train: self.n = f["extra"].attrs["Ntrain"] self.keys = f["extra"]["train_keys"][()] self.keys_by_label = f["extra"]["train_keys_by_label"][()] else: self.n = f["extra"].attrs["Ntest"] self.keys = f["extra"]["test_keys"][()] self.keys_by_label = f["extra"]["test_keys_by_label"][()] self.keys_by_label[:, :] -= self.keys_by_label[ 0, 0 ] # normalize except (AttributeError, KeyError) as e: file_name = "/n_mnist.hdf5" print( f"Attribute not found in hdf5 file. You may be " f"using an old hdf5 build. Delete {root + file_name} " f"and run again." ) print(e.message, e.args) raise def _download(self): # isexisting = super(NMNIST, self)._download() try: _ = super(NMNIST, self)._download() except Exception as e: print(e.message, e.args) def _create_hdf5(self): create_events_hdf5(self.directory, self.root) # create_events_hdf5(self.directory, self.root, self.balance) def __len__(self): return self.n def __getitem__(self, key): # Important to open and close in getitem to enable num_workers>0 with h5py.File(self.root, "r", swmr=True, libver="latest") as f: if self.train: key = f["extra"]["train_keys"][key] else: key = f["extra"]["test_keys"][key] data, target = sample(f, key, T=self.num_steps * self.dt) if self.transform is not None: data = self.transform(data) target = self.target_transform(target) return data, target
def create_events_hdf5(directory, hdf5_filename): # fns_train, fns_test = nmnist_get_file_names(directory, balance) fns_train, fns_test = nmnist_get_file_names(directory) fns_train = [val for sublist in fns_train for val in sublist] fns_test = [val for sublist in fns_test for val in sublist] test_keys = [] train_keys = [] train_label_list = [[] for i in range(10)] test_label_list = [[] for i in range(10)] with h5py.File(hdf5_filename, "w") as f: f.clear() key = 0 metas = [] data_grp = f.create_group("data") extra_grp = f.create_group("extra") print("Creating n_mnist.hdf5...") for file_d in tqdm(fns_train + fns_test): istrain = file_d in fns_train # data = nmnist_load_events_from_bin(file_d) # times = data[:, 0] # addrs = data[:, 1:] label = int( file_d.replace("\\", "/").split("/")[-2] ) # \\ for binder/colab # out = [] if istrain: train_keys.append(key) train_label_list[label].append(key) else: test_keys.append(key) test_label_list[label].append(key) metas.append({"key": str(key), "training sample": istrain}) subgrp = data_grp.create_group(str(key)) # tm_dset = subgrp.create_dataset( # "times", data=times, dtype=np.uint32 # ) # ad_dset = subgrp.create_dataset( # "addrs", data=addrs, dtype=np.uint8 # ) # lbl_dset = subgrp.create_dataset( # "labels", data=label, dtype=np.uint8 # ) subgrp.attrs["meta_info"] = str(metas[-1]) assert label in range(10) key += 1 # TO-DO: implement method for balanced MNIST classes # if balance: extra_grp.create_dataset("train_keys", data=train_keys) extra_grp.create_dataset("train_keys_by_label", data=train_label_list) extra_grp.create_dataset("test_keys_by_label", data=test_label_list) extra_grp.create_dataset("test_keys", data=test_keys) extra_grp.attrs["N"] = len(train_keys) + len(test_keys) extra_grp.attrs["Ntrain"] = len(train_keys) extra_grp.attrs["Ntest"] = len(test_keys) print("n_mnist.hdf5 was created successfully.") # else: # test # extra_grp.create_dataset('train_keys', data = train_keys) # for idx, item in enumerate(train_label_list): # _class_imbalance(extra_grp, 'train_keys_by_label_' + # str(idx), item) # for idx, item in enumerate(test_label_list): # _class_imbalance(extra_grp, 'test_keys_by_label_'+str(idx), # item) # extra_grp.create_dataset('test_keys', data = test_keys) # extra_grp.attrs['N'] = len(train_keys) + len(test_keys) # extra_grp.attrs['Ntrain'] = len(train_keys) # extra_grp.attrs['Ntest'] = len(test_keys) # print(f"n_mnist.hdf5 was created successfully.") # def _class_imbalance(group, header, data): # group.create_dataset(header, data) def nmnist_load_events_from_bin(file_path, max_duration=None): timestamps, xaddr, yaddr, pol = load_ATIS_bin(file_path) return np.column_stack( [ np.array(timestamps, dtype=np.uint32), np.array(pol, dtype=np.uint8), np.array(xaddr, dtype=np.uint16), np.array(yaddr, dtype=np.uint16), ] ) def nmnist_get_file_names(dataset_path): if not os.path.isdir(dataset_path): raise FileNotFoundError( "N-MNIST Dataset not found, looked at: {}".format(dataset_path) ) train_files = [] test_files = [] for digit in range(10): digit_train = glob.glob( os.path.join(dataset_path, "Train/{}/*.bin".format(digit)) ) digit_test = glob.glob( os.path.join(dataset_path, "Test/{}/*.bin".format(digit)) ) train_files.append(digit_train) test_files.append(digit_test) # if balance: # We need the same number of train and test samples for each digit, # let's compute the minimum max_n_train = min(map(lambda l_var: len(l_var), train_files)) max_n_test = min(map(lambda l_var: len(l_var), test_files)) n_train = max_n_train # we could take max_n_train, but my memory on # the shared drive is full n_test = max_n_test # we test on the whole test set - lets only take # 100*10 samples assert (n_train <= max_n_train) and ( n_test <= max_n_test ), "Requested more samples than present in dataset" print( f"\nN-MNIST: {n_train*10} train samples and {n_test*10} test samples" ) # Crop extra samples of each digits train_files = map(lambda l_var: l_var[:n_train], train_files) test_files = map(lambda l_var: l_var[:n_test], test_files) return list(train_files), list(test_files) # else: # test # print(f"\nN-MNIST: {60000} train samples and {10000} test samples") # return list(train_files), list(test_files) def sample(hdf5_file, key, T=300): dset = hdf5_file["data"][str(key)] label = dset["labels"][()] # tend = dset["times"][-1] start_time = 0 # ha = dset["times"][()] tmad = get_tmad_slice( dset["times"][()], dset["addrs"][()], start_time, T * 1000 ) tmad[:, 0] -= tmad[0, 0] return tmad, label