Geographic Information Systems Asked on December 15, 2021
I have written a code that transforms a NetCDF4 data (it can be downloaded from here) to GeoTIFF file using this link. This code works fine. But it seems that it rotates my data 90 or -90 degrees:
import sys
import numpy as np
from osgeo import gdal, osr, gdal_array
import xarray as xr
filepath = '/home/gpm/3B-DAY.MS.MRG.3IMERG.20170216-S000000-E235959.V06.nc4'
var_name = 'precipitationCal'
src_ds = gdal.Open(filepath)
if src_ds is None:
print('Open failed')
sys.exit()
# if len(src_ds.GetSubDatasets()) > 1:
# if there is more than one variable in the netCDF4
subdataset = 'NETCDF:"' + filepath + '":' + var_name
print('subdataset:', subdataset)
src_ds_sd = gdal.Open(subdataset)
print('opening dataset successful', src_ds_sd)
# getting info of the variable (subdataset)
ndv = src_ds_sd.GetRasterBand(1).GetNoDataValue()
xsize = src_ds_sd.RasterXSize
ysize = src_ds_sd.RasterYSize
geo_t = src_ds_sd.GetGeoTransform()
# geo_t = (geo_t[3], geo_t[5], geo_t[4], geo_t[0], geo_t[2], geo_t[1])
projection = osr.SpatialReference()
projection.ImportFromWkt(src_ds_sd.GetProjectionRef())
# close the subdataset and the whole dataset
# src_ds_sd = None
# src_ds = None
# read data using xrray
xr_ensemble = xr.open_dataset(filepath)
array = xr_ensemble[var_name]
array = np.ma.masked_array(array, mask=array==ndv, fill_value=ndv)
# array = np.transpose(array, (0, 2, 1))
final = (ndv, xsize, ysize, geo_t, projection, array)
datatype = gdal_array.NumericTypeCodeToGDALTypeCode(array.dtype)
if type(datatype) != np.int:
if datatype.startswith('gdal.GDT_')==False:
datatype = eval('gdal.GDT_' + datatype)
new_file_name = var_name +'33'+ '.tif'
zsize = array.shape[0]
# create a driver
driver = gdal.GetDriverByName('GTiff')
# set nans to the original no data value
array[np.isnan(array)] = ndv
# setup the dataset with zsize bands
dataset = driver.Create(new_file_name, xsize, ysize, zsize, datatype)
dataset.SetGeoTransform(geo_t)
spatial_reference = osr.SpatialReference()
spatial_reference.ImportFromEPSG(4326)
dataset.SetProjection(spatial_reference.ExportToWkt())
#%%
# write each slice of the array along the zsize
for i in range(zsize):
dataset.GetRasterBand(i+1).WriteArray(array[i])
dataset.GetRasterBand(i+1).SetNoDataValue(ndv)
dataset.FlushCache()
I am sure that I should change geo_t
in some way. Currently the shape of the array
is: (1, 3600, 1800)
3600 values for lon
, 1800 values for lat
Here is the output geotiff file in QGIS:
. This is the output geotiff file’s properties in QGIS:
Here is the class I wrote that is able to extract the data from the NetCDF4 file and save it as a new GeoTiff file. I compared my result with @snowman2 ' s and they were the same:
from osgeo import gdal, osr, gdal_array
import xarray as xr
import numpy as np
########################################################################
# below classes are used to handle output netcdf files from queires and save them into local machine
########################################################################
# class that is used to convert output netcdf files into geotiff files
class NetcdfHandler(object):
def __init__(self, filepath, var_name):
self.__filepath = filepath
self.__begin_date = ''
self.__var_name = var_name
# def create_required_outputs(self, newRasterfn, rasterOrigin, pixelWidth, pixelHeight, array):
def create_geotiff(self, new_raster_dir):
result = self.get_variable_info()
if result:
print('result is successful, ')
ndv, rows, cols, geo_t, out_raster_srs, data = result
# data = data[::-1]
# print('rows:', rows)
# print('cols:', cols)
raster_origin = (geo_t[0], geo_t[3])
pixel_width = geo_t[1]
pixel_height = geo_t[5]
# the below line was not necessary for the GPM, however, it was used in the gdal website from which
# I got assistance to write this code
# reversed_arr = data[::-1] # reverse array so the tif looks like the array
return self.array2raster(new_raster_dir, raster_origin, pixel_width, pixel_height, data) # converts array to raster
return 'unsuccessful!'
# get some variables that are used in create_geotiff method
def get_variable_info(self):
# print('directory:', self.__filepath)
src_ds = gdal.Open(self.__filepath)
if src_ds is None:
print('Open failed')
return None
if len(src_ds.GetSubDatasets()) > 1:
# if there is more than one variable in the netCDF4
subdataset = 'NETCDF:"' + self.__filepath + '":' + self.__var_name
src_ds_sd = gdal.Open(subdataset)
print('opening dataset successful', src_ds_sd)
# getting info of the variable (subdataset)
ndv = src_ds_sd.GetRasterBand(1).GetNoDataValue()
cols = src_ds_sd.RasterXSize
rows = src_ds_sd.RasterYSize
geo_t = src_ds_sd.GetGeoTransform()
# just for the GPM products we have to change n_rows and n_cols together,
# otherwise it should be removed so that it can automatically detects the geo_t
# as there was a difficulty with this data, I decided to change this manually
geo_t = (-360+geo_t[3], -geo_t[5], geo_t[4], geo_t[0], geo_t[2], geo_t[1])
# this option also should be checked
# geo_t = (-geo_t[3], -geo_t[5], geo_t[4], geo_t[0], geo_t[2], geo_t[1])
# print('geo_t', geo_t)
out_raster_srs = osr.SpatialReference()
out_raster_srs.ImportFromEPSG(4326)
# print('projection ref', src_ds_sd.GetProjectionRef())
# close the subdataset and the whole dataset
src_ds_sd = None
src_ds = None
# read data using xrray
# TODO: use src_ds_sd to get the dataset without using xr_ensemble again
xr_ensemble = xr.open_dataset(self.__filepath)
self.__begin_date = xr_ensemble.BeginDate
data = xr_ensemble[self.__var_name]
data = np.ma.masked_array(data, mask=data==ndv, fill_value=ndv)
# for gpm we have to change 2nd and 3rd axis because when we want to save the array,
# it will rotate -90 degrees otherwise
data = np.array(np.transpose(data, (0, 2, 1))[0])
cols, rows = rows, cols
return ndv, rows, cols, geo_t, out_raster_srs, data
def array2raster(self, new_raster_dir, raster_origin, pixel_width, pixel_height, array):
cols = array.shape[1]
rows = array.shape[0]
origin_x = raster_origin[0]
origin_y = raster_origin[1]
driver = gdal.GetDriverByName('GTiff')
output_path = ''.join([new_raster_dir, self.__var_name, '-', self.__begin_date, '.tif'])
out_raster = driver.Create(output_path, cols, rows, 1, gdal.GDT_Float32)
out_raster.SetGeoTransform((origin_x, pixel_width, 0, origin_y, 0, pixel_height))
outband = out_raster.GetRasterBand(1)
outband.WriteArray(array)
out_raster_srs = osr.SpatialReference()
out_raster_srs.ImportFromEPSG(4326)
out_raster.SetProjection(out_raster_srs.ExportToWkt())
outband.FlushCache()
return 'successful!'
def create_time(self):
date_string = None
@property
def filepath(self):
return self.__filepath
@filepath.setter
def filepath(self, value):
self.__filepath = value
#######################################
# test the class ######################
#######################################
path = '3B-DAY.MS.MRG.3IMERG.20170216-S000000-E235959.V06.nc4'
handler = NetcdfHandler(path, 'precipitationCal')
result = handler.create_geotiff('')
print('handler status:%s' % result)
Answered by Muser on December 15, 2021
For this, I would recommend rioxarray:
For some reason, GDAL seems to interpret x as y and y as x. If you open it up:
import xarray
import rioxarray
rds = rioxarray.open_rasterio(
"3B-DAY.MS.MRG.3IMERG.20170216-S000000-E235959.V06.nc4",
variable="precipitationCal",
)
You can see that y is way out of bounds with values close to 180 and -180.
So, I would recommend just opening with xarray:
xds = xarray.open_dataset(
"3B-DAY.MS.MRG.3IMERG.20170216-S000000-E235959.V06.nc4",
)
then, some tweaks to get things in the right location and metadata setup:
precip = xds.precipitationCal.transpose('time', 'lat', 'lon')
precip.rio.set_spatial_dims(x_dim="lon", y_dim="lat", inplace=True)
precip.rio.write_crs("EPSG:4326", inplace=True)
And finally, write to a raster:
precip.rio.to_raster(new_file_name)
Answered by snowman2 on December 15, 2021
Get help from others!
Recent Questions
Recent Answers
© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP