Source code for pynetcf.base

# Copyright (c) 2023, TU Wien, Department of Geodesy and Geoinformation.
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#   * Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#   * Neither the name of the Vienna University of Technology,
#     Department of Geodesy and Geoinformation nor the
#     names of its contributors may be used to endorse or promote products
#     derived from this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
Base classes for reading and writing time series and images in NetCDF files
using Climate Forecast Metadata Conventions (http://cfconventions.org/).
"""

import os
import time
import numpy as np
import netCDF4
import datetime


[docs]class DatasetError(Exception): pass
[docs]class Dataset: """ NetCDF file wrapper class that makes some things easier Parameters ---------- filename : string filename of netCDF file. If already exiting then it will be opened as read only unless the append keyword is set. if the overwrite keyword is set then the file will be overwritten name : string, optional will be written as a global attribute if the file is a new file file_format : string, optional file format mode : string, optional access mode. default "r" "r" means read-only; no data can be modified. "w" means write; a new file is created, an existing file with the same name is deleted. "a" and "r+" mean append (in analogy with serial files); an existing file is opened for reading and writing. Appending s to modes w, r+ or a will enable unbuffered shared access to NETCDF3_CLASSIC or NETCDF3_64BIT formatted files. Unbuffered access may be useful even if you don"t need shared access, since it may be faster for programs that don"t access data sequentially. This option is ignored for NETCDF4 and NETCDF4_CLASSIC formatted files. zlib : boolean, optional Default True if set netCDF compression will be used complevel : int, optional Default 4 compression level used from 1(low compression) to 9(high compression) autoscale : bool, optional If disabled data will not be automatically scaled when reading and writing automask : bool, optional If disabled data will not be masked during reading. This means Fill Values will be used instead of NaN. """ def __init__(self, filename, name=None, file_format="NETCDF4", mode="r", zlib=True, complevel=4, autoscale=True, automask=True): self.dataset_name = name self.filename = filename self.file = None self.file_format = file_format self.buf_len = 0 self.global_attr = {} self.global_attr["id"] = os.path.split(self.filename)[1] s = "%Y-%m-%d %H:%M:%S" self.global_attr["date_created"] = datetime.datetime.now().strftime(s) if self.dataset_name is not None: self.global_attr["dataset_name"] = self.dataset_name self.zlib = zlib self.complevel = complevel self.mode = mode self.autoscale = autoscale self.automask = automask if self.mode == "a" and not os.path.exists(self.filename): self.mode = "w" if self.mode == "w": self._create_file_dir() try: self.dataset = netCDF4.Dataset(self.filename, self.mode, format=self.file_format) except RuntimeError: raise IOError(f"File {self.filename} does not exist") self.dataset.set_auto_scale(self.autoscale) self.dataset.set_auto_mask(self.automask) def _create_file_dir(self): """ Create directory for file to sit in. Avoid race condition if multiple instances are writing files into the same directory. """ path = os.path.dirname(self.filename) if not os.path.exists(path): try: os.makedirs(path) except OSError: time.sleep(1) self._create_file_dir() def _set_global_attr(self): """ Write global attributes to NetCDF file. """ self.dataset.setncatts(self.global_attr) self.global_attr = {}
[docs] def create_dim(self, name, n): """ Create dimension for NetCDF file. if it does not yet exist Parameters ---------- name : str Name of the NetCDF dimension. n : int Size of the dimension. """ if name not in self.dataset.dimensions.keys(): self.dataset.createDimension(name, size=n)
[docs] def write_var(self, name, data=None, dim=None, attr={}, dtype=None, zlib=None, complevel=None, chunksizes=None, **kwargs): """ Create or overwrite values in a NetCDF variable. The data will be written to disk once flush or close is called Parameters ---------- name : str Name of the NetCDF variable. data : np.ndarray, optional Array containing the data. if not given then the variable will be left empty dim : tuple, optional A tuple containing the dimension names. attr : dict, optional A dictionary containing the variable attributes. dtype: data type, string or numpy.dtype, optional if not given data.dtype will be used zlib: boolean, optional explicit compression for this variable if not given then global attribute is used complevel: int, optional explicit compression level for this variable if not given then global attribute is used chunksizes : tuple, optional chunksizes can be used to manually specify the HDF5 chunksizes for each dimension of the variable. """ fill_value = None if "_FillValue" in attr: fill_value = attr.pop("_FillValue") if dtype is None: dtype = data.dtype if zlib is None: zlib = self.zlib if not np.issubdtype(dtype, np.number): # Only numeric data can be compressed zlib = False if complevel is None: complevel = self.complevel if name in self.dataset.variables.keys(): var = self.dataset.variables[name] else: var = self.dataset.createVariable(name, dtype, dim, fill_value=fill_value, zlib=zlib, complevel=complevel, chunksizes=chunksizes, **kwargs) for attr_name in attr: attr_value = attr[attr_name] var.setncattr(attr_name, attr_value) var.set_auto_scale(self.autoscale) if data is not None: var[:] = data
[docs] def append_var(self, name, data, **kwargs): """ append data along unlimited dimension(s) of variable Parameters ---------- name : string Name of variable to append to. data : numpy.array Numpy array of correct dimension. Raises ------ IOError if appending to variable without unlimited dimension """ if name in self.dataset.variables.keys(): var = self.dataset.variables[name] dim_unlimited = [] key = [] for index, dim in enumerate(var.dimensions): unlimited = self.dataset.dimensions[dim].isunlimited() dim_unlimited.append(unlimited) if not unlimited: # if the dimension is not unlimited set the slice to : key.append(slice(None, None, None)) else: # if unlimited set slice of this dimension to # append meaning # [var.shape[index]:] key.append(slice(var.shape[index], None, None)) dim_unlimited = np.array(dim_unlimited) nr_unlimited = np.where(dim_unlimited)[0].size key = tuple(key) # if there are unlimited dimensions we can do an append if nr_unlimited > 0: var[key] = data else: raise IOError( "Cannot append to variable that has no unlimited dimension") else: self.write_var(name, data, **kwargs)
[docs] def read_var(self, name): """ reads variable from netCDF file Parameters ---------- name : string name of the variable """ if self.mode in ["r", "r+"]: if name in self.dataset.variables.keys(): return self.dataset.variables[name][:]
[docs] def add_global_attr(self, name, value): """ Add global attribute. Parameters ---------- name : str Name. value : str or number Value. """ self.global_attr[name] = value
[docs] def flush(self): """ Flush data to disk. """ if self.dataset is not None: if self.mode in ["w", "r+"]: self._set_global_attr() self.dataset.sync()
[docs] def close(self): """ Flush and close file. """ if self.dataset is not None: self.flush() self.dataset.close() self.dataset = None
def __enter__(self): """ ContextManager enter. """ return self def __exit__(self, value_type, value, traceback): """ ContextManager exit. """ self.close() def __del__(self): """ Destructor. """ if hasattr(self, "dataset"): self.close()