Source code for pynetcf.base

# Copyright (c) 2023, TU Wien, Department of Geodesy and Geoinformation.
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#   * Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#   * Neither the name of the Vienna University of Technology,
#     Department of Geodesy and Geoinformation nor the
#     names of its contributors may be used to endorse or promote products
#     derived from this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
Base classes for reading and writing time series and images in NetCDF files
using Climate Forecast Metadata Conventions (http://cfconventions.org/).
"""

import os
import time
import numpy as np
import netCDF4
import datetime


[docs]class DatasetError(Exception):
    pass


[docs]class Dataset:
    """
    NetCDF file wrapper class that makes some things easier

    Parameters
    ----------
    filename : string
        filename of netCDF file. If already exiting then it will be opened
        as read only unless the append keyword is set. if the overwrite
        keyword is set then the file will be overwritten
    name : string, optional
        will be written as a global attribute if the file is a new file
    file_format : string, optional
        file format
    mode : string, optional
        access mode. default "r"
        "r" means read-only; no data can be modified.
        "w" means write; a new file is created, an existing file with the
            same name is deleted.
        "a" and "r+" mean append (in analogy with serial files); an existing
            file is opened for reading and writing.
        Appending s to modes w, r+ or a will enable unbuffered shared access
        to NETCDF3_CLASSIC or NETCDF3_64BIT formatted files. Unbuffered
        access may be useful even if you don"t need shared access, since it
        may be faster for programs that don"t access data sequentially.
        This option is ignored for NETCDF4 and NETCDF4_CLASSIC
        formatted files.
    zlib : boolean, optional
        Default True
        if set netCDF compression will be used
    complevel : int, optional
        Default 4
        compression level used from 1(low compression) to 9(high compression)
    autoscale : bool, optional
        If disabled data will not be automatically scaled when reading and
        writing
    automask : bool, optional
        If disabled data will not be masked during reading.
        This means Fill Values will be used instead of NaN.
    """

    def __init__(self,
                 filename,
                 name=None,
                 file_format="NETCDF4",
                 mode="r",
                 zlib=True,
                 complevel=4,
                 autoscale=True,
                 automask=True):

        self.dataset_name = name
        self.filename = filename
        self.file = None
        self.file_format = file_format
        self.buf_len = 0
        self.global_attr = {}
        self.global_attr["id"] = os.path.split(self.filename)[1]
        s = "%Y-%m-%d %H:%M:%S"
        self.global_attr["date_created"] = datetime.datetime.now().strftime(s)
        if self.dataset_name is not None:
            self.global_attr["dataset_name"] = self.dataset_name
        self.zlib = zlib
        self.complevel = complevel
        self.mode = mode
        self.autoscale = autoscale
        self.automask = automask

        if self.mode == "a" and not os.path.exists(self.filename):
            self.mode = "w"
        if self.mode == "w":
            self._create_file_dir()

        try:
            self.dataset = netCDF4.Dataset(self.filename,
                                           self.mode,
                                           format=self.file_format)
        except RuntimeError:
            raise IOError(f"File {self.filename} does not exist")

        self.dataset.set_auto_scale(self.autoscale)
        self.dataset.set_auto_mask(self.automask)

    def _create_file_dir(self):
        """
        Create directory for file to sit in.
        Avoid race condition if multiple instances are
        writing files into the same directory.
        """
        path = os.path.dirname(self.filename)
        if not os.path.exists(path):
            try:
                os.makedirs(path)
            except OSError:
                time.sleep(1)
                self._create_file_dir()

    def _set_global_attr(self):
        """
        Write global attributes to NetCDF file.
        """
        self.dataset.setncatts(self.global_attr)
        self.global_attr = {}

[docs]    def create_dim(self, name, n):
        """
        Create dimension for NetCDF file.
        if it does not yet exist

        Parameters
        ----------
        name : str
            Name of the NetCDF dimension.
        n : int
            Size of the dimension.
        """
        if name not in self.dataset.dimensions.keys():
            self.dataset.createDimension(name, size=n)

[docs]    def write_var(self,
                  name,
                  data=None,
                  dim=None,
                  attr={},
                  dtype=None,
                  zlib=None,
                  complevel=None,
                  chunksizes=None,
                  **kwargs):
        """
        Create or overwrite values in a NetCDF variable. The data will be
        written to disk once flush or close is called

        Parameters
        ----------
        name : str
            Name of the NetCDF variable.
        data : np.ndarray, optional
            Array containing the data.
            if not given then the variable will be left empty
        dim : tuple, optional
            A tuple containing the dimension names.
        attr : dict, optional
            A dictionary containing the variable attributes.
        dtype: data type, string or numpy.dtype, optional
            if not given data.dtype will be used
        zlib: boolean, optional
            explicit compression for this variable
            if not given then global attribute is used
        complevel: int, optional
            explicit compression level for this variable
            if not given then global attribute is used
        chunksizes : tuple, optional
            chunksizes can be used to manually specify the
            HDF5 chunksizes for each dimension of the variable.
        """

        fill_value = None
        if "_FillValue" in attr:
            fill_value = attr.pop("_FillValue")

        if dtype is None:
            dtype = data.dtype

        if zlib is None:
            zlib = self.zlib
        if not np.issubdtype(dtype, np.number):
            # Only numeric data can be compressed
            zlib = False
        if complevel is None:
            complevel = self.complevel

        if name in self.dataset.variables.keys():
            var = self.dataset.variables[name]
        else:
            var = self.dataset.createVariable(name,
                                              dtype,
                                              dim,
                                              fill_value=fill_value,
                                              zlib=zlib,
                                              complevel=complevel,
                                              chunksizes=chunksizes,
                                              **kwargs)

        for attr_name in attr:
            attr_value = attr[attr_name]
            var.setncattr(attr_name, attr_value)

        var.set_auto_scale(self.autoscale)
        if data is not None:
            var[:] = data

[docs]    def append_var(self, name, data, **kwargs):
        """
        append data along unlimited dimension(s) of variable

        Parameters
        ----------
        name : string
            Name of variable to append to.
        data : numpy.array
            Numpy array of correct dimension.

        Raises
        ------
        IOError
            if appending to variable without unlimited dimension
        """
        if name in self.dataset.variables.keys():
            var = self.dataset.variables[name]
            dim_unlimited = []
            key = []
            for index, dim in enumerate(var.dimensions):
                unlimited = self.dataset.dimensions[dim].isunlimited()
                dim_unlimited.append(unlimited)
                if not unlimited:
                    # if the dimension is not unlimited set the slice to :
                    key.append(slice(None, None, None))
                else:
                    # if unlimited set slice of this dimension to
                    # append meaning
                    # [var.shape[index]:]
                    key.append(slice(var.shape[index], None, None))

            dim_unlimited = np.array(dim_unlimited)
            nr_unlimited = np.where(dim_unlimited)[0].size
            key = tuple(key)
            # if there are unlimited dimensions we can do an append
            if nr_unlimited > 0:
                var[key] = data
            else:
                raise IOError(
                    "Cannot append to variable that has no unlimited dimension")
        else:
            self.write_var(name, data, **kwargs)

[docs]    def read_var(self, name):
        """
        reads variable from netCDF file

        Parameters
        ----------
        name : string
            name of the variable
        """

        if self.mode in ["r", "r+"]:
            if name in self.dataset.variables.keys():
                return self.dataset.variables[name][:]

[docs]    def add_global_attr(self, name, value):
        """
        Add global attribute.

        Parameters
        ----------
        name : str
            Name.
        value : str or number
            Value.
        """
        self.global_attr[name] = value

[docs]    def flush(self):
        """
        Flush data to disk.
        """
        if self.dataset is not None:
            if self.mode in ["w", "r+"]:
                self._set_global_attr()
                self.dataset.sync()

[docs]    def close(self):
        """
        Flush and close file.
        """
        if self.dataset is not None:
            self.flush()
            self.dataset.close()
            self.dataset = None

    def __enter__(self):
        """
        ContextManager enter.
        """
        return self

    def __exit__(self, value_type, value, traceback):
        """
        ContextManager exit.
        """
        self.close()

    def __del__(self):
        """
        Destructor.
        """
        if hasattr(self, "dataset"):
            self.close()