Geographic Information Systems Asked by tetje on December 30, 2020
For my code using rioxarray I am opening quite an amount of raster files. One after another. I even added del to remove those from memory, even though I thought I would not be nessary, because they should overwrite each other in the memory?
Nevertheless the ram usage of the code is high.
The function is as follows.:
def write_normalized_raster(parent_folders: List[Path], band_str, resolution) -> None:
"""
classification bands and raster_paths need to have same order
:param parent_folders:
:param band_str:
:param resolution:
:return: None
"""
# ------
# determine median of each image
# ------
local_raster_paths: List[Path] = []
for parent in parent_folders:
path = parent / "R{}m".format(resolution)
path = [x for x in path.iterdir() if str(x).endswith("_{}_{}m.jp2".format(band_str, resolution))][0]
local_raster_paths.append(path)
medians: Union[Dict[str, np.float32], Dict[str, Dict[int, np.float32]]] = {}
# detect clouds
for raster_path in local_raster_paths:
tmp_raster = rxa.open_rasterio(raster_path).astype("float32")
scl = rxa.open_rasterio(get_scl_path(raster_path)).rio.reproject_match(tmp_raster).data
tmp_data_masked = np.where(np.isin(scl, cloud_classes), np.nan, tmp_raster.data)
tmp_raster.data = tmp_data_masked
# median on non-nan pixels
if len(tmp_raster.data.shape) == 2:
medians.update({str(raster_path): np.nanmedian(tmp_data_masked)})
elif len(tmp_raster.data.shape) == 3:
medians.update({str(raster_path): {0: np.nanmedian(tmp_data_masked[0, :, :]),
1: np.nanmedian(tmp_data_masked[1, :, :]),
2: np.nanmedian(tmp_data_masked[2, :, :])}})
del tmp_raster
del tmp_data_masked
del scl
# ------
# determine mean median of all images
# ------
if len(medians[str(local_raster_paths[0])]) == 1:
# mean of median
avg = np.mean(medians.values())
else:
# mean of median
avg0 = np.mean([x[0] for x in list(medians.values())])
avg1 = np.mean([x[1] for x in list(medians.values())])
avg2 = np.mean([x[2] for x in list(medians.values())])
# ------
# relevel image intensities
# ------
for raster_path in local_raster_paths:
tmp_raster = rxa.open_rasterio(raster_path)
original_type = tmp_raster.dtype
tmp_raster = tmp_raster.astype("float32")
if len(tmp_raster.data.shape) == 2:
# apply median of median to each raster
mean = medians[str(raster_path)]
tmp_raster.data = np.divide(np.multiply(tmp_raster.data, avg), mean)
save_path = raster_path.parents[0] / (str(raster_path.stem) + "_normalized.tif")
tmp_raster.rio.to_raster(save_path)
del tmp_raster
elif len(tmp_raster.data.shape) == 3:
mean = medians[str(raster_path)]
tmp_data = tmp_raster.data
tmp_data[0, :, :] = np.rint(np.divide(np.multiply(tmp_data[0, :, :], avg0), mean[0]))
tmp_data[1, :, :] = np.rint(np.divide(np.multiply(tmp_data[1, :, :], avg1), mean[1]))
tmp_data[2, :, :] = np.rint(np.divide(np.multiply(tmp_data[2, :, :], avg2), mean[2]))
tmp_raster.data = tmp_data
tmp_raster = tmp_raster.astype(original_type)
save_path = raster_path.parents[0] / (str(raster_path.stem) + "_normalized.tif")
tmp_raster.rio.to_raster(save_path)
del tmp_data
del tmp_raster
The I think I am closing/ deleting every larger object after the usage. Didn’t I? Why do still have a memory usage that is rising to ab to 15 gb?
Therefore I profiled the code:
Line # Mem usage Increment Occurences Line Contents
============================================================
199 143.2 MiB 143.2 MiB 1 @profile
200 def write_normalized_raster(parent_folders: List[Path], band_str, resolution) -> None:
201 """
202 classification bands and raster_paths need to have same order
203 :param parent_folders:
204 :param band_str:
205 :param resolution:
206 :return: None
207 """
208
209 # ------
210 # determine median of each image
211 # ------
212 143.2 MiB 0.0 MiB 1 local_raster_paths: List[Path] = []
213 143.2 MiB 0.0 MiB 22 for parent in parent_folders:
214 143.2 MiB 0.0 MiB 21 path = parent / "R{}m".format(resolution)
215 143.2 MiB 0.0 MiB 231 path = [x for x in path.iterdir() if str(x).endswith("_{}_{}m.jp2".format(band_str, resolution))][0]
216 143.2 MiB 0.0 MiB 21 local_raster_paths.append(path)
217
218 143.2 MiB 0.0 MiB 1 medians: Union[Dict[str, np.float32], Dict[str, Dict[int, np.float32]]] = {}
219 # detect clouds
220 15987.7 MiB -14832.1 MiB 22 for raster_path in local_raster_paths:
221 15866.0 MiB 16265.2 MiB 21 tmp_raster = rxa.open_rasterio(raster_path).astype("float32")
222 15987.7 MiB -29038.4 MiB 21 scl = rxa.open_rasterio(get_scl_path(raster_path)).rio.reproject_match(tmp_raster).data
223
224 17367.4 MiB 14141.8 MiB 21 tmp_data_masked = np.where(np.isin(scl, cloud_classes), np.nan, tmp_raster.data)
225 15987.7 MiB -43805.9 MiB 21 tmp_raster.data = tmp_data_masked
226
227 # median on non-nan pixels
228 15987.7 MiB -14832.1 MiB 21 if len(tmp_raster.data.shape) == 2:
229 medians.update({str(raster_path): np.nanmedian(tmp_data_masked)})
230 15987.7 MiB -14832.1 MiB 21 elif len(tmp_raster.data.shape) == 3:
231 15987.7 MiB -14832.0 MiB 21 medians.update({str(raster_path): {0: np.nanmedian(tmp_data_masked[0, :, :]),
232 15987.7 MiB -14832.1 MiB 21 1: np.nanmedian(tmp_data_masked[1, :, :]),
233 15987.7 MiB -14832.1 MiB 21 2: np.nanmedian(tmp_data_masked[2, :, :])}})
234 15987.7 MiB -14832.1 MiB 21 del tmp_raster
235 15987.7 MiB -14832.1 MiB 21 del tmp_data_masked
236 15987.7 MiB -14832.1 MiB 21 del scl
237
238 # ------
239 # determine mean median of all images
240 # ------
241 14424.8 MiB -1562.9 MiB 1 if len(medians[str(local_raster_paths[0])]) == 1:
242 # mean of median
243 avg = np.mean(medians.values())
244
245 else:
246 # mean of median
247 14424.8 MiB 0.0 MiB 24 avg0 = np.mean([x[0] for x in list(medians.values())])
248 14424.8 MiB 0.0 MiB 24 avg1 = np.mean([x[1] for x in list(medians.values())])
249 14424.8 MiB 0.0 MiB 24 avg2 = np.mean([x[2] for x in list(medians.values())])
250
251 # ------
252 # relevel image intensities
253 # ------
254 16694.7 MiB -46947.5 MiB 22 for raster_path in local_raster_paths:
255 15997.0 MiB -62202.0 MiB 21 tmp_raster = rxa.open_rasterio(raster_path)
256 15997.0 MiB -51214.2 MiB 21 original_type = tmp_raster.dtype
257 18071.3 MiB -8179.1 MiB 21 tmp_raster = tmp_raster.astype("float32")
258 18071.3 MiB -51307.5 MiB 21 if len(tmp_raster.data.shape) == 2:
259 # apply median of median to each raster
260 mean = medians[str(raster_path)]
261 tmp_raster.data = np.divide(np.multiply(tmp_raster.data, avg), mean)
262 save_path = raster_path.parents[0] / (str(raster_path.stem) + "_normalized.tif")
263 tmp_raster.rio.to_raster(save_path)
264 del tmp_raster
265
266 18071.3 MiB -51307.5 MiB 21 elif len(tmp_raster.data.shape) == 3:
267 18071.3 MiB -51307.5 MiB 21 mean = medians[str(raster_path)]
268 18071.3 MiB -51307.5 MiB 21 tmp_data = tmp_raster.data
269 18071.3 MiB -51307.2 MiB 21 tmp_data[0, :, :] = np.rint(np.divide(np.multiply(tmp_data[0, :, :], avg0), mean[0]))
270 18071.3 MiB -51307.2 MiB 21 tmp_data[1, :, :] = np.rint(np.divide(np.multiply(tmp_data[1, :, :], avg1), mean[1]))
271 18071.3 MiB -51390.6 MiB 21 tmp_data[2, :, :] = np.rint(np.divide(np.multiply(tmp_data[2, :, :], avg2), mean[2]))
272
273 18071.3 MiB -51390.7 MiB 21 tmp_raster.data = tmp_data
274
275 18416.3 MiB -44147.1 MiB 21 tmp_raster = tmp_raster.astype(original_type)
276 18416.3 MiB -51390.6 MiB 21 save_path = raster_path.parents[0] / (str(raster_path.stem) + "_normalized.tif")
277 18074.4 MiB -55087.9 MiB 21 tmp_raster.rio.to_raster(save_path)
278 16694.7 MiB -75921.3 MiB 21 del tmp_data
279 16694.7 MiB -46947.5 MiB 21 del tmp_raster
To be honest with this report, I am totally unable to find, the problem and the reason, I havin it.
With kind regards.
Get help from others!
Recent Answers
Recent Questions
© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP