Source code for pynwb

'''This package will contain functions, classes, and objects
for reading and writing data in NWB format
'''
import os.path
from pathlib import Path
from copy import deepcopy
import subprocess
import pickle
from warnings import warn
import h5py

from hdmf.spec import NamespaceCatalog
from hdmf.utils import docval, getargs, popargs, get_docval
from hdmf.backends.io import HDMFIO
from hdmf.backends.hdf5 import HDF5IO as _HDF5IO
from hdmf.build import BuildManager, TypeMap
import hdmf.common
from hdmf.common import load_type_config as hdmf_load_type_config
from hdmf.common import get_loaded_type_config as hdmf_get_loaded_type_config
from hdmf.common import unload_type_config as hdmf_unload_type_config


CORE_NAMESPACE = 'core'

from .spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace  # noqa E402
from .validate import validate  # noqa: F401, E402

try:
    # see https://effigies.gitlab.io/posts/python-packaging-2023/
    from ._version import __version__
except ImportError:  # pragma: no cover
    # this is a relatively slower method for getting the version string
    from importlib.metadata import version  # noqa: E402

    __version__ = version("pynwb")
    del version


[docs] @docval({'name': 'config_path', 'type': str, 'doc': 'Path to the configuration file.'}, {'name': 'type_map', 'type': TypeMap, 'doc': 'The TypeMap.', 'default': None}, is_method=False) def load_type_config(**kwargs): """ This method will either load the default config or the config provided by the path. """ config_path = kwargs['config_path'] type_map = kwargs['type_map'] or get_type_map() hdmf_load_type_config(config_path=config_path, type_map=type_map)
[docs] @docval({'name': 'type_map', 'type': TypeMap, 'doc': 'The TypeMap.', 'default': None}, is_method=False) def get_loaded_type_config(**kwargs): type_map = kwargs['type_map'] or get_type_map() return hdmf_get_loaded_type_config(type_map=type_map)
[docs] @docval({'name': 'type_map', 'type': TypeMap, 'doc': 'The TypeMap.', 'default': None}, is_method=False) def unload_type_config(**kwargs): """ Remove validation. """ type_map = kwargs['type_map'] or get_type_map() hdmf_unload_type_config(type_map=type_map)
def __get_resources() -> dict: try: from importlib.resources import files except ImportError: # TODO: Remove when python 3.9 becomes the new minimum from importlib_resources import files __location_of_this_file = files(__name__) __core_ns_file_name = 'nwb.namespace.yaml' __schema_dir = 'nwb-schema/core' cached_core_typemap = __location_of_this_file / 'core_typemap.pkl' cached_version_indicator = __location_of_this_file / '.core_typemap_version' ret = dict() ret['namespace_path'] = str(__location_of_this_file / __schema_dir / __core_ns_file_name) ret['cached_typemap_path'] = str(cached_core_typemap) ret['cached_version_indicator'] = str(cached_version_indicator) return ret def _get_resources(): # LEGACY: Needed to support legacy implementation. # TODO: Remove this in PyNWB 3.0. warn("The function '_get_resources' is deprecated and will be removed in a future release.", DeprecationWarning) return __get_resources() # a global type map global __TYPE_MAP __ns_catalog = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace) hdmf_typemap = hdmf.common.get_type_map() __TYPE_MAP = TypeMap(__ns_catalog) __TYPE_MAP.merge(hdmf_typemap, ns_catalog=True) # load the core namespace, i.e. base NWB specification __resources = __get_resources()
[docs] @docval({'name': 'extensions', 'type': (str, TypeMap, list), 'doc': 'a path to a namespace, a TypeMap, or a list consisting of paths to namespaces and TypeMaps', 'default': None}, returns="TypeMap loaded for the given extension or NWB core namespace", rtype=tuple, is_method=False) def get_type_map(**kwargs): ''' Get the TypeMap for the given extensions. If no extensions are provided, return the TypeMap for the core namespace ''' extensions = getargs('extensions', kwargs) type_map = None if extensions is None: type_map = deepcopy(__TYPE_MAP) else: if isinstance(extensions, TypeMap): type_map = extensions else: type_map = deepcopy(__TYPE_MAP) if isinstance(extensions, list): for ext in extensions: if isinstance(ext, str): type_map.load_namespaces(ext) elif isinstance(ext, TypeMap): type_map.merge(ext) else: raise ValueError('extensions must be a list of paths to namespace specs or a TypeMaps') elif isinstance(extensions, str): type_map.load_namespaces(extensions) elif isinstance(extensions, TypeMap): type_map.merge(extensions) return type_map
[docs] @docval(*get_docval(get_type_map), returns="the namespaces loaded from the given file", rtype=tuple, is_method=False) def get_manager(**kwargs): ''' Get a BuildManager to use for I/O using the given extensions. If no extensions are provided, return a BuildManager that uses the core namespace ''' type_map = get_type_map(**kwargs) return BuildManager(type_map)
[docs] @docval({'name': 'namespace_path', 'type': str, 'doc': 'the path to the YAML with the namespace definition'}, returns="the namespaces loaded from the given file", rtype=tuple, is_method=False) def load_namespaces(**kwargs): ''' Load namespaces from file ''' namespace_path = getargs('namespace_path', kwargs) return __TYPE_MAP.load_namespaces(namespace_path)
[docs] def available_namespaces(): """Returns all namespaces registered in the namespace catalog""" return __TYPE_MAP.namespace_catalog.namespaces
def __git_cmd(*args) -> subprocess.CompletedProcess: """ Call git with the package as the directory regardless of cwd. Since any folder within a git repo works, don't try to ascend to the top, since if we're *not* actually in a git repo we're only guaranteed to know about the inner `pynwb` directory. """ parent_dir = str(Path(__file__).parent) result = subprocess.run(["git", "-C", parent_dir, *args], capture_output=True) return result def __clone_submodules(): if __git_cmd('rev-parse').returncode == 0: warn( 'NWB core schema not found in cloned installation, initializing submodules...', stacklevel=1) res = __git_cmd('submodule', 'update', '--init', '--recursive') if not res.returncode == 0: # pragma: no cover raise RuntimeError( 'Exception while initializing submodules, got:\n' 'stdout:\n' + ('-'*20) + res.stdout + "\nstderr:\n" + ('-'*20) + res.stderr) else: # pragma: no cover raise RuntimeError("Package is not installed from a git repository, can't clone submodules") def __load_core_namespace(final:bool=False): """ Load the core namespace into __TYPE_MAP, either by loading a pickled version or creating one anew and pickling it. We keep a dotfile next to it that tracks what version of pynwb created it, so that we invalidate it when the code changes. Args: final (bool): This function tries again if the submodules aren't cloned, but it shouldn't go into an infinite loop. If final is ``True``, don't recurse. """ global __TYPE_MAP global __resources # if we have a version indicator file and it doesn't match the current version, # scrap the cached typemap if os.path.exists(__resources['cached_version_indicator']): with open(__resources['cached_version_indicator'], 'r') as f: cached_version = f.read().strip() if cached_version != __version__: Path(__resources['cached_typemap_path']).unlink(missing_ok=True) else: # remove any cached typemap, forcing re-creation Path(__resources['cached_typemap_path']).unlink(missing_ok=True) # load pickled typemap if we have one if os.path.exists(__resources['cached_typemap_path']): with open(__resources['cached_typemap_path'], 'rb') as f: __TYPE_MAP = pickle.load(f) # type: TypeMap # otherwise make a new one and cache it elif os.path.exists(__resources['namespace_path']): load_namespaces(__resources['namespace_path']) with open(__resources['cached_typemap_path'], 'wb') as f: pickle.dump(__TYPE_MAP, f, protocol=pickle.HIGHEST_PROTOCOL) with open(__resources['cached_version_indicator'], 'w') as f: f.write(__version__) # otherwise, we don't have the schema and try and initialize from submodules, # afterwards trying to load the namespace again else: try: __clone_submodules() except (FileNotFoundError, OSError, RuntimeError) as e: # pragma: no cover if 'core' not in available_namespaces(): warn( "'core' is not a registered namespace. If you installed PyNWB locally using a git clone, " "you need to use the --recurse_submodules flag when cloning. " "See developer installation instructions here: " "https://pynwb.readthedocs.io/en/stable/install_developers.html#install-from-git-repository\n" f"Got exception: \n{e}" ) if not final: __load_core_namespace(final=True) __load_core_namespace() # a function to register a container classes with the global map
[docs] @docval({'name': 'neurodata_type', 'type': str, 'doc': 'the neurodata_type to get the spec for'}, {'name': 'namespace', 'type': str, 'doc': 'the name of the namespace'}, {"name": "container_cls", "type": type, "doc": "the class to map to the specified neurodata_type", 'default': None}, is_method=False) def register_class(**kwargs): """Register an NWBContainer class to use for reading and writing a neurodata_type from a specification If container_cls is not specified, returns a decorator for registering an NWBContainer subclass as the class for neurodata_type in namespace. """ neurodata_type, namespace, container_cls = getargs('neurodata_type', 'namespace', 'container_cls', kwargs) def _dec(cls): __TYPE_MAP.register_container_type(namespace, neurodata_type, cls) return cls if container_cls is None: return _dec else: _dec(container_cls)
[docs] @docval({'name': 'h5py_file', 'type': h5py.File, 'doc': 'An NWB file'}, rtype=tuple, is_method=False,) def get_nwbfile_version(**kwargs): """ Get the NWB version of the file if it is an NWB file. :Returns: Tuple consisting of: 1) the original version string as stored in the file and 2) a tuple with the parsed components of the version string, consisting of integers and strings, e.g., (2, 5, 1, beta). (None, None) will be returned if the file is not a valid NWB file or the nwb_version is missing, e.g., in the case when no data has been written to the file yet. """ # Get the version string for the NWB file h5py_file = getargs('h5py_file', kwargs) try: nwb_version_string = h5py_file.attrs['nwb_version'] # KeyError occurs when the file is empty (e.g., when creating a new file nothing has been written) # or when the HDF5 file is not a valid NWB file except KeyError: return None, None # Other system may have written nwb_version as a fixed-length string, resulting in a numpy.bytes_ object # on read, rather than a variable-length string. To address this, decode the bytes if necessary. if not isinstance(nwb_version_string, str): nwb_version_string = nwb_version_string.decode() # Parse the version string nwb_version_parts = nwb_version_string.replace("-", ".").replace("_", ".").split(".") nwb_version = tuple([int(i) if i.isnumeric() else i for i in nwb_version_parts]) return nwb_version_string, nwb_version
# a function to register an object mapper for a container class
[docs] @docval({"name": "container_cls", "type": type, "doc": "the Container class for which the given ObjectMapper class gets used"}, {"name": "mapper_cls", "type": type, "doc": "the ObjectMapper class to use to map", 'default': None}, is_method=False) def register_map(**kwargs): """Register an ObjectMapper to use for a Container class type If mapper_cls is not specified, returns a decorator for registering an ObjectMapper class as the mapper for container_cls. If mapper_cls is specified, register the class as the mapper for container_cls """ container_cls, mapper_cls = getargs('container_cls', 'mapper_cls', kwargs) def _dec(cls): __TYPE_MAP.register_map(container_cls, cls) return cls if mapper_cls is None: return _dec else: _dec(mapper_cls)
[docs] @docval({'name': 'neurodata_type', 'type': str, 'doc': 'the neurodata_type to get the NWBContainer class for'}, {'name': 'namespace', 'type': str, 'doc': 'the namespace the neurodata_type is defined in'}, is_method=False) def get_class(**kwargs): """ Parse the YAML file for a given neurodata_type that is a subclass of NWBContainer and automatically generate its python API. This will work for most containers, but is known to not work for descendants of MultiContainerInterface and DynamicTable, so these must be defined manually (for now). `get_class` infers the API mapping directly from the specification. If you want to define a custom mapping, you should not use this function and you should define the class manually. Examples: Generating and registering an extension is as simple as:: MyClass = get_class('MyClass', 'ndx-my-extension') `get_class` defines only the `__init__` for the class. In cases where you want to provide additional methods for querying, plotting, etc. you can still use `get_class` and attach methods to the class after-the-fact, e.g.:: def get_sum(self, a, b): return self.feat1 + self.feat2 MyClass.get_sum = get_sum """ neurodata_type, namespace = getargs('neurodata_type', 'namespace', kwargs) return __TYPE_MAP.get_dt_container_cls(neurodata_type, namespace)
[docs] class NWBHDF5IO(_HDF5IO):
[docs] @staticmethod def can_read(path: str): """Determine whether a given path is readable by this class""" if not os.path.isfile(path): # path is file that exists return False try: with h5py.File(path, "r") as file: # path is HDF5 file version_info = get_nwbfile_version(file) if version_info[0] is None: warn("Cannot read because missing NWB version in the HDF5 file. The file is not a valid NWB file.") return False elif version_info[1][0] < 2: # Major versions of NWB < 2 not supported warn("Cannot read because PyNWB supports NWB files version 2 and above.") return False else: return True except IOError: return False
@docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, {'name': 'mode', 'type': str, 'doc': 'the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x")', 'default': 'r'}, {'name': 'load_namespaces', 'type': bool, 'doc': ('whether or not to load cached namespaces from given path - not applicable in write mode ' 'or when `manager` is not None or when `extensions` is not None'), 'default': True}, {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager to use for I/O', 'default': None}, {'name': 'extensions', 'type': (str, TypeMap, list), 'doc': 'a path to a namespace, a TypeMap, or a list consisting paths to namespaces and TypeMaps', 'default': None}, *get_docval(_HDF5IO.__init__, "file", "comm", "driver", "aws_region", "herd_path"),) def __init__(self, **kwargs): path, mode, manager, extensions, load_namespaces, file_obj, comm, driver, aws_region, herd_path =\ popargs('path', 'mode', 'manager', 'extensions', 'load_namespaces', 'file', 'comm', 'driver', 'aws_region', 'herd_path', kwargs) # Define the BuildManager to use io_modes_that_create_file = ['w', 'w-', 'x'] if mode in io_modes_that_create_file or manager is not None or extensions is not None: load_namespaces = False if mode in io_modes_that_create_file and not str(path).endswith('.nwb'): warn(f"The file path provided: {path} does not end in '.nwb'. " "It is recommended that NWB files using the HDF5 backend use the '.nwb' extension.", UserWarning) if load_namespaces: tm = get_type_map() super().load_namespaces(tm, path, file=file_obj, driver=driver, aws_region=aws_region) manager = BuildManager(tm) # XXX: Leaving this here in case we want to revert to this strategy for # loading cached namespaces # ns_catalog = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace) # super().load_namespaces(ns_catalog, path) # tm = TypeMap(ns_catalog) # tm.copy_mappers(get_type_map()) else: if manager is not None and extensions is not None: raise ValueError("'manager' and 'extensions' cannot be specified together") elif extensions is not None: manager = get_manager(extensions=extensions) elif manager is None: manager = get_manager() # Open the file super().__init__(path, manager=manager, mode=mode, file=file_obj, comm=comm, driver=driver, aws_region=aws_region, herd_path=herd_path) @property def nwb_version(self): """ Get the version of the NWB file opened via this NWBHDF5IO object. :returns: Tuple consisting of: 1) the original version string as stored in the file and 2) a tuple with the parsed components of the version string, consisting of integers and strings, e.g., (2, 5, 1, beta). (None, None) will be returned if the nwb_version is missing, e.g., in the case when no data has been written to the file yet. """ return get_nwbfile_version(self._file)
[docs] @docval(*get_docval(_HDF5IO.read), {'name': 'skip_version_check', 'type': bool, 'doc': 'skip checking of NWB version', 'default': False}) def read(self, **kwargs): """ Read the NWB file from the IO source. :raises TypeError: If the NWB file version is missing or not supported :return: NWBFile container """ # Check that the NWB file is supported skip_verison_check = popargs('skip_version_check', kwargs) if not skip_verison_check: file_version_str, file_version = self.nwb_version if file_version is None: raise TypeError("Missing NWB version in file. The file is not a valid NWB file.") if file_version[0] < 2: raise TypeError("NWB version %s not supported. PyNWB supports NWB files version 2 and above." % str(file_version_str)) # read the file file = super().read(**kwargs) return file
[docs] @docval({'name': 'src_io', 'type': HDMFIO, 'doc': 'the HDMFIO object (such as NWBHDF5IO) that was used to read the data to export'}, {'name': 'nwbfile', 'type': 'NWBFile', 'doc': 'the NWBFile object to export. If None, then the entire contents of src_io will be exported', 'default': None}, {'name': 'write_args', 'type': dict, 'doc': 'arguments to pass to :py:meth:`~hdmf.backends.io.HDMFIO.write_builder`', 'default': None}, {'name': 'cache_spec', 'type': bool, 'doc': 'whether to cache the specification to file', 'default': True}) def export(self, **kwargs): """ Export an NWB file to a new NWB file using the HDF5 backend. If ``nwbfile`` is provided, then the build manager of ``src_io`` is used to build the container, and the resulting builder will be exported to the new backend. So if ``nwbfile`` is provided, ``src_io`` must have a non-None manager property. If ``nwbfile`` is None, then the contents of ``src_io`` will be read and exported to the new backend. Arguments can be passed in for the ``write_builder`` method using ``write_args``. Some arguments may not be supported during export. ``{'link_data': False}`` can be used to copy any datasets linked to from the original file instead of creating a new link to those datasets in the exported file. The exported file will not contain any links to the original file. All links, internal and external, will be preserved in the exported file. All references will also be preserved in the exported file. The exported file will use the latest schema version supported by the version of PyNWB used. For example, if the input file uses the NWB schema version 2.1 and the latest schema version supported by PyNWB is 2.3, then the exported file will use the 2.3 NWB schema. Example usage: .. code-block:: python with NWBHDF5IO(self.read_path, mode='r') as read_io: nwbfile = read_io.read() # ... # modify nwbfile nwbfile.set_modified() # this may be necessary if the modifications are changes to attributes with NWBHDF5IO(self.export_path, mode='w') as export_io: export_io.export(src_io=read_io, nwbfile=nwbfile) See :ref:`export` and :ref:`modifying_data` for more information and examples. """ nwbfile = popargs('nwbfile', kwargs) kwargs['container'] = nwbfile super().export(**kwargs)
[docs] @staticmethod @docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, {'name': 'file', 'type': [h5py.File, 'S3File'], 'doc': 'a pre-existing h5py.File object', 'default': None}, is_method=False) def read_nwb(**kwargs): """ Helper factory method for reading an NWB file and return the NWBFile object """ # Retrieve the filepath path = popargs('path', kwargs) file = popargs('file', kwargs) path = str(path) if path is not None else None # Streaming case if path is not None and (path.startswith("s3://") or path.startswith("http")): import fsspec fsspec_file_system = fsspec.filesystem("http") ffspec_file = fsspec_file_system.open(path, "rb") open_file = h5py.File(ffspec_file, "r") io = NWBHDF5IO(file=open_file) nwbfile = io.read() else: io = NWBHDF5IO(path=path, file=file, mode="r", load_namespaces=True) nwbfile = io.read() return nwbfile
from . import io as __io # noqa: F401,E402 from .core import NWBContainer, NWBData # noqa: F401,E402 from .base import TimeSeries, ProcessingModule # noqa: F401,E402 from .file import NWBFile # noqa: F401,E402 from . import behavior # noqa: F401,E402 from . import device # noqa: F401,E402 from . import ecephys # noqa: F401,E402 from . import epoch # noqa: F401,E402 from . import icephys # noqa: F401,E402 from . import image # noqa: F401,E402 from . import misc # noqa: F401,E402 from . import ogen # noqa: F401,E402 from . import ophys # noqa: F401,E402 from . import legacy # noqa: F401,E402 from hdmf.data_utils import DataChunkIterator # noqa: F401,E402 from hdmf.backends.hdf5 import H5DataIO # noqa: F401,E402 from ._due import due, BibTeX # noqa: E402 due.cite( BibTeX(""" @article {10.7554/eLife.78362, article_type = {journal}, title = {{The Neurodata Without Borders ecosystem for neurophysiological data science}}, author = {R\"ubel, Oliver and Tritt, Andrew and Ly, Ryan and Dichter, Benjamin K and Ghosh, Satrajit and Niu, Lawrence and Baker, Pamela and Soltesz, Ivan and Ng, Lydia and Svoboda, Karel and Frank, Loren and Bouchard, Kristofer E}, editor = {Colgin, Laura L and Jadhav, Shantanu P}, volume = {11}, year = {2022}, month = {oct}, pub_date = {2022-10-04}, pages = {e78362}, citation = {eLife 2022;11:e78362}, doi = {10.7554/eLife.78362}, url = {https://doi.org/10.7554/eLife.78362}, keywords = {Neurophysiology, data ecosystem, data language, data standard, FAIR data, archive}, journal = {eLife}, issn = {2050-084X}, publisher = {eLife Sciences Publications, Ltd}} """), description="The Neurodata Without Borders ecosystem for neurophysiological data science", path="pynwb/", version=__version__, cite_module=True ) del due, BibTeX