英文:
Plotting tiles of nighttime lights from multiple h5 files into one big map
问题
I understand your request. Please find the translated code below:
import numpy as np
import matplotlib.pyplot as plt
import h5py
import cartopy.crs as ccrs
import os
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import matplotlib.ticker as mticker
indir = 'D:/PYTHON/NASA/VNP46A1.A2023058.h17v03.001.2023060025350.h5'
data = h5py.File(indir)
list(data.keys())
dnb = data['HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields/DNB_At_Sensor_Radiance_500m']
dnb = np.array(dnb)
dnb = np.where(dnb==65535, np.nan, dnb)
lat_max = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['NorthBoundingCoord']
lat_min = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['SouthBoundingCoord']
lon_min = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['WestBoundingCoord']
lon_max = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['EastBoundingCoord']
lats = np.linspace(lat_max, lat_min, dnb.shape[1])
lons = np.linspace(lon_min, lon_max, dnb.shape[0])
x, y = np.meshgrid(lons, lats)
cmap = plt.get_cmap('cividis')
crs = ccrs.PlateCarree()
fig = plt.figure(figsize=(6,6))
ax = plt.axes(projection=crs)
ax.coastlines(resolution='10m', color='white',alpha=0.5)
ax.pcolormesh(x, y, dnb, cmap = cmap, vmin = 0, vmax = 100)
gl = ax.gridlines(crs=crs, draw_labels=True, alpha=0.5)
gl.xlabels_top = None
gl.ylabels_left = None
xgrid = np.arange(lon_min-2, lon_max+2, 2.)
ygrid = np.arange(lat_min-2, lat_max+2, 2.)
gl.xlocator = mticker.FixedLocator(xgrid.tolist())
gl.ylocator = mticker.FixedLocator(ygrid.tolist())
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
gl.xlabel_style = {'size': 10, 'color': 'black'}
gl.ylabel_style = {'size': 10, 'color': 'black'}
plt.xlim((lon_min,lon_max))
plt.ylim((lat_min,lat_max))
plt.show()
英文:
I am trying to create a map of nighttime lights for Europe. Data source is NASA and I am using their VNP46A1 data for 2022/03/27 (you can find the exact files in my Dropbox, too). For downloading the data, I select the whole EU region which results in about 14 separate files or "tiles". I managed to read in and plot the files individually, but I don't know how to put them together to get one big map of the whole European region. I'm not sure what keywords I should search for because I'm totally unfamiliar with this kind of file format and plotting something like this. Below is my working code for plotting a single file (adapted from this Medium article).
import numpy as np
import matplotlib.pyplot as plt
import h5py
import cartopy.crs as ccrs
import os
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.ticker as mticker
import glob
indir = 'D:/PYTHON/NASA/VNP46A1.A2023058.h17v03.001.2023060025350.h5'
data = h5py.File(indir)
list(data.keys())
dnb = data['HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields/DNB_At_Sensor_Radiance_500m']
dnb = np.array(dnb)
dnb = np.where(dnb==65535, np.nan, dnb)
# np.nanmax(dnb)
lat_max = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['NorthBoundingCoord']
lat_min = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['SouthBoundingCoord']
lon_min = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['WestBoundingCoord']
lon_max = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['EastBoundingCoord']
lats = np.linspace(lat_max, lat_min, dnb.shape[1])
lons = np.linspace(lon_min, lon_max, dnb.shape[0])
x, y = np.meshgrid(lons, lats)
cmap = plt.get_cmap('cividis')
crs = ccrs.PlateCarree()
fig = plt.figure(figsize=(6,6))
ax = plt.axes(projection=crs)
ax.coastlines(resolution='10m', color='white',alpha=0.5)
ax.pcolormesh(x, y, dnb, cmap = cmap, vmin = 0, vmax = 100)
plt.xlim()
gl = ax.gridlines(crs=crs, draw_labels=True, alpha=0.5)
gl.xlabels_top = None
gl.ylabels_left = None
xgrid = np.arange(lon_min-2, lon_max+2, 2.)
ygrid = np.arange(lat_min-2, lat_max+2, 2.)
gl.xlocator = mticker.FixedLocator(xgrid.tolist())
gl.ylocator = mticker.FixedLocator(ygrid.tolist())
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
gl.xlabel_style = {'size': 10, 'color': 'black'}
gl.ylabel_style = {'size': 10, 'color': 'black'}
plt.xlim((lon_min,lon_max))
plt.ylim((lat_min,lat_max))
plt.show()
This is the output I get after running the above code:
My desired end result would be something like this:
I thought about plotting in a loop but that took very long and yielded incorrect results in the end.
答案1
得分: 2
Understood, I will only provide translations of the code parts you provided without additional content:
Step 1 - 获取纬度/经度的最大/最小值:
folder = 'D:/PYTHON/NASA'
h5files = glob.glob(folder + '/*.h5')
n_files = len(h5files)
n_files_lon = 5
n_files_lat = 4
lat_lon_arr = np.empty(shape=(n_files, 4))
for i, h5file in enumerate(h5files):
with h5py.File(h5file) as h5f:
dnb_attr_ds = h5f['HDFEOS/GRIDS/VNP_Grid_DNB']
lat_min = dnb_attr_ds.attrs['SouthBoundingCoord'][0]
lat_max = dnb_attr_ds.attrs['NorthBoundingCoord'][0]
lon_min = dnb_attr_ds.attrs['WestBoundingCoord'][0]
lon_max = dnb_attr_ds.attrs['EastBoundingCoord'][0]
lat_lon_arr[i] = [round(lat_min), round(lat_max), round(lon_min), round(lon_max)]
dnb_ds = h5f['HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields/DNB_At_Sensor_Radiance_500m']
dnb_ds_shape = dnb_ds.shape
map_lat_min = np.amin(lat_lon_arr[:, 0])
map_lat_max = np.amax(lat_lon_arr[:, 1])
map_lon_min = np.amin(lat_lon_arr[:, 2])
map_lon_max = np.amax(lat_lon_arr[:, 3])
print(f'map_lat_min= {map_lat_min:.1f}, map_lat_max={map_lat_max:.1f}',
f'map_lon_min={map_lon_min:.1f}, map_lon_max= {map_lon_max:.1f}\n')
Step 2 - 从上述纬度和经度的最小/最大值创建x和y数组:
lats = np.linspace(map_lat_max, map_lat_min, n_files_lat * dnb_ds_shape[1])#.astype(np.float32)
lons = np.linspace(map_lon_min, map_lon_max, n_files_lon * dnb_ds_shape[0])#.astype(np.float32)
x, y = np.meshgrid(lons, lats)
Step 3 - 创建dnb数组并将H5 DNB数据加载到其中:
dnb = np.empty(shape=x.shape, dtype=np.uint16)
cnt_lat, cnt_lon = 0, 0
for h5file in h5files:
with h5py.File(h5file) as h5f:
dnb_ds = h5f['HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields/DNB_At_Sensor_Radiance_500m']
dnb[cnt_lat * dnb_ds_shape[0]:(cnt_lat + 1) * dnb_ds_shape[0],
cnt_lon * dnb_ds_shape[1]:(cnt_lon + 1) * dnb_ds_shape[1]] = dnb_ds
cnt_lat += 1
if cnt_lat % n_files_lat == 0:
cnt_lat = 0
cnt_lon += 1
Step 4 - 用于绘制5x4网格的matplotlib/cartopy更改:
fig = plt.figure(figsize=(n_files_lat * 6, n_files_lon * 6))
...
xgrid = np.arange(map_lon_min - 2, map_lon_max + 2, 2.)
ygrid = np.arange(map_lat_min - 2, map_lat_max + 2, 2.)
...
plt.xlim((map_lon_min, map_lon_max))
plt.ylim((map_lat_min, map_lat_max))
英文:
Ok, this was more work than I expected. I tried to write a generic procedure (not dependent on latitude/longitude range. Mapping HDF5 DNB data to the NumPy dnb
array got too complicated. (And, there were a few matplotlib and cartopy surprises along the way.) Code and image are at the end.
First, a clarification about the data. There are 20 files (It's a grid of 5x4 datasets/images.). Each covers a 10 deg x 10 deg grid. Overall lat/lon range of the files is +30 to +70 latitude and -10 to +30 longitude. To plot as one image, the x, y, and dnb
arrays have to be sized to hold all 20 datasets, and the HDF5 DNB datasets have to be mapped to the dnb
array.
Here is the procedure:
- Loop over the files to get the max/min for latitude and longitude. (Since you know the range, you could skip this step and hard code the values.) I also get the DNB dataset type and shape, and use to size the
x, y, and dnb
arrays. - Create the
x and y
arrays using the min/max longitude and latitude from values above. - Loop over over the files (again) to read the DNB data and map to the
dnb
array using slice notation. I hard coded the slicing indices. (It could be done more elegantly.) - Once the data is loaded, the matplotlib / cartopy procedure is almost the same -- only a few lines have to be changed to account for the map extents for
all 20 files.
Code for each step below:
Step 1 - Get the max/min latitude/longitude values:
folder = 'D:/PYTHON/NASA'
h5files = glob.glob(folder+'/*.h5')
n_files = len(h5files)
n_files_lon = 5
n_files_lat = 4
lat_lon_arr = np.empty(shape=(n_files,4))
for i, h5file in enumerate(h5files):
with h5py.File(h5file) as h5f:
dnb_attr_ds = h5f['HDFEOS/GRIDS/VNP_Grid_DNB']
lat_min = dnb_attr_ds.attrs['SouthBoundingCoord'][0]
lat_max = dnb_attr_ds.attrs['NorthBoundingCoord'][0]
lon_min = dnb_attr_ds.attrs['WestBoundingCoord'][0]
lon_max = dnb_attr_ds.attrs['EastBoundingCoord'][0]
lat_lon_arr[i] = [round(lat_min), round(lat_max), round(lon_min), round(lon_max)]
dnb_ds = h5f['HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields/DNB_At_Sensor_Radiance_500m']
dnb_ds_shape = dnb_ds.shape
map_lat_min = np.amin(lat_lon_arr[:,0])
map_lat_max = np.amax(lat_lon_arr[:,1])
map_lon_min = np.amin(lat_lon_arr[:,2])
map_lon_max = np.amax(lat_lon_arr[:,3])
print(f'map_lat_min= {map_lat_min:.1f}, map_lat_max={map_lat_max:.1f}',
f'map_lon_min={map_lon_min:.1f}, map_lon_max= {map_lon_max:.1f}\n')
Step 2 - Create the x and y arrays from min/max longitude and latitude values above:
lats = np.linspace(map_lat_max, map_lat_min, n_files_lat*dnb_ds_shape[1])#.astype(np.float32)
lons = np.linspace(map_lon_min, map_lon_max, n_files_lon*dnb_ds_shape[0])#.astype(np.float32)
x, y = np.meshgrid(lons, lats)
Step 3 - Create the dnb array and load H5 DNB data into it:
dnb = np.empty(shape=x.shape,dtype=np.uint16)
cnt_lat, cnt_lon = 0, 0
for h5file in h5files:
with h5py.File(h5file) as h5f:
dnb_ds = h5f['HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields/DNB_At_Sensor_Radiance_500m']
dnb[cnt_lat*dnb_ds_shape[0]:(cnt_lat+1)*dnb_ds_shape[0],
cnt_lon*dnb_ds_shape[1]:(cnt_lon+1)*dnb_ds_shape[1]] = dnb_ds
cnt_lat += 1
if cnt_lat % n_files_lat == 0:
cnt_lat = 0
cnt_lon += 1
Step 4 - matplotlib/cartopy changes to plot 5x4 grid:
fig = plt.figure(figsize=(n_files_lat*6,n_files_lon*6))
...
xgrid = np.arange(map_lon_min-2, map_lon_max+2, 2.)
ygrid = np.arange(map_lat_min-2, map_lat_max+2, 2.)
...
plt.xlim((map_lon_min,map_lon_max))
plt.ylim((map_lat_min,map_lat_max))
答案2
得分: 0
import os
import import glob
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cf
import numpy as np
import h5py
dirname = 'D:/PYTHON/NASA'
h5_files = []
i = 0
for file in list(glob.glob(dirname + '/VNP46A1.*.h5')):
data = h5py.File(file)
# Read dataset.
data2D = data['HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields/DNB_At_Sensor_Radiance_500m']
dnb = data2D[:, :].astype(np.double)
# Read geolocation dataset.
lat_max = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['NorthBoundingCoord']
lat_min = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['SouthBoundingCoord']
lon_min = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['WestBoundingCoord']
lon_max = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['EastBoundingCoord']
latitude_max = lat_max[:]
latitude_min = lat_min[:]
longitude_max = lon_max[:]
longitude_min = lon_min[:]
# Retrieve attributes.
attrs = list(data2D.attrs)
aoa = data2D.attrs['add_offset']
add_offset = aoa[0]
fva = data2D.attrs["_FillValue"]
_FillValue = fva[0]
sfa = data2D.attrs["scale_factor"]
scale_factor = sfa[0]
ua = data2D.attrs["units"]
units = ua[0]
dnb[dnb == _FillValue] = np.nan
dnb = (dnb - add_offset) * scale_factor
datam = np.ma.masked_array(dnb, np.isnan(dnb))
if i == 0:
data_m = datam
latitude_max_m = latitude_max
latitude_min_m = latitude_min
longitude_max_m = longitude_max
longitude_min_m = longitude_min
else:
data_m = np.vstack([data_m, datam])
latitude_max_m = np.vstack([latitude_max_m, latitude_max])
latitude_min_m = np.vstack([latitude_min_m, latitude_min])
longitude_max_m = np.vstack([longitude_max_m, longitude_max])
longitude_min_m = np.vstack([longitude_min_m, longitude_min])
i = i + 1
英文:
I couldn't quite figure out how to do what @kcw78 suggested in the comments, so this is the approach I took (largely adapted from this forum). I still don't think it's right. Plotting it doesn't work either.
import os
import glob
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cf
import numpy as np
import h5py
dirname = 'D:/PYTHON/NASA'
h5_files = []
i = 0
for file in list(glob.glob(dirname+'/VNP46A1.*.h5')):
data = h5py.File(file)
# Read dataset.
data2D = data['HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields/DNB_At_Sensor_Radiance_500m']
dnb = data2D[:,:].astype(np.double)
# Read geolocation dataset.
lat_max = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['NorthBoundingCoord']
lat_min = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['SouthBoundingCoord']
lon_min = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['WestBoundingCoord']
lon_max = data['HDFEOS/GRIDS/VNP_Grid_DNB'].attrs['EastBoundingCoord']
latitude_max = lat_max[:]
latitude_min = lat_min[:]
longitude_max = lon_max[:]
longitude_min = lon_min[:]
# Retrieve attributes.
attrs = list(data2D.attrs)
aoa = data2D.attrs['add_offset']
add_offset = aoa[0]
fva = data2D.attrs["_FillValue"]
_FillValue = fva[0]
sfa = data2D.attrs["scale_factor"]
scale_factor = sfa[0]
ua = data2D.attrs["units"]
units = ua[0]
dnb[dnb == _FillValue] = np.nan
dnb = (dnb - add_offset) * scale_factor
datam = np.ma.masked_array(dnb, np.isnan(dnb))
if i == 0 :
data_m = datam
latitude_max_m = latitude_max
latitude_min_m = latitude_min
longitude_max_m = longitude_max
longitude_min_m = longitude_min
else:
data_m = np.vstack([data_m, datam])
latitude_max_m = np.vstack([latitude_max_m, latitude_max])
latitude_min_m = np.vstack([latitude_min_m, latitude_min])
longitude_max_m = np.vstack([longitude_max_m, longitude_max])
longitude_min_m = np.vstack([longitude_min_m, longitude_min])
i = i + 1
答案3
得分: 0
以下是您请求的内容的翻译:
另一种方法是使用GDAL来进行IO管理,其中的一个好处是,例如使用`gdal.BuildVRT`,你可以让GDAL为你执行任何拼接操作。这在一般情况下可能更容易,尤其是当你的输入文件不构成一个完整的网格时(例如,一些瓦片可能缺失)。
你仍然需要手动从元数据中解析出角点坐标,这通常是在处理这些自定义定义时不可避免的。这可能是最容易出错的步骤。
## 输入/输出
```python
from osgeo import gdal
from pathlib import Path
import os
def get_bbox(filename):
"从元数据中读取/解析坐标,然后按照outputBounds的顺序返回"
meta = gdal.Info(os.fspath(filename), format='json')
# 顺序应与GDAL的"outputBounds"匹配
bbox_keys = [
"HDFEOS_GRIDS_VNP_Grid_DNB_WestBoundingCoord",
"HDFEOS_GRIDS_VNP_Grid_DNB_NorthBoundingCoord",
"HDFEOS_GRIDS_VNP_Grid_DNB_EastBoundingCoord",
"HDFEOS_GRIDS_VNP_Grid_DNB_SouthBoundingCoord",
]
return list(map(lambda key: float(meta["metadata"][""][key]), bbox_keys))
data_dir = Path(r'D:\Temp\VIIRS_DNB')
filenames = sorted(data_dir.glob("VNP46A1.A???????.h??v??.001.*.h5"))
ds_templ = "HDF5:{fn}://HDFEOS/GRIDS/VNP_Grid_DNB/Data_Fields/DNB_At_Sensor_Radiance_500m"
gdal_options = dict(
widthPct=5,
heightPct=5,
resampleAlg=gdal.GRA_Average,
format="VRT",
)
ds = gdal.BuildVRT(
"", list(map(
lambda fn: gdal.Translate("", ds_templ.format(fn=fn), outputBounds=get_bbox(fn), **gdal_options),
filenames,
)),
)
data = ds.ReadAsArray()
gt = ds.GetGeoTransform()
ds = None
请注意,我将最终结果重新采样为原始大小的5%,因为否则它对于在小图像上绘制来说可能太大了。你可以删除或更改gdal_options
中的widthPct
和heightPct
以及resampleAlg
。
为了调试或更好地理解,将中间的VRT文件写入磁盘而不是在内存中使用空字符串可能会有帮助。
绘图
import matplotlib.pyplot as plt
import matplotlib as mpl
import cartopy.crs as ccrs
ys, xs = data.shape
mpl_extent = [
gt[0], # 左上角x坐标
gt[0]+gt[1]*xs, # 右下角x坐标
gt[3]+gt[5]*ys, # 右下角y坐标
gt[3], # 左上角y坐标
]
map_proj = ccrs.PlateCarree()
data_proj = ccrs.PlateCarree()
fig, ax = plt.subplots(
figsize=(8,8), dpi=96, facecolor="w", layout="compressed",
subplot_kw=dict(projection=map_proj),
)
ax.imshow(data, extent=mpl_extent, transform=data_proj, cmap="cividis", norm=mpl.colors.LogNorm(vmin=5, vmax=300))
ax.coastlines(color="w", lw=0.3)
ax.gridlines(color="w", draw_labels=True, lw=0.4, alpha=.4)
希望这有帮助!如果您需要更多信息,请告诉我。
<details>
<summary>英文:</summary>
An alternative way would be to use GDAL for managing the IO, a benefit is that with `gdal.BuildVRT` for example you can have GDAL do any mosaicing for you. Which might be easier in general, but especially when your input files don't form a nice filled grid (eg missing some tiles here and there).
You still have to parse the corner coordinates from the metadata manually, which is often inevitable with these custom definitions. That's probably the most error prone step.
## I/O
```python
from osgeo import gdal
from pathlib import Path
import os
def get_bbox(filename):
"Read/parse the coordinates from the metadata, and the return
it in the order outputBounds expects"
meta = gdal.Info(os.fspath(filename), format='json')
# order should match GDAL's "outputBounds" (!!)
bbox_keys = [
"HDFEOS_GRIDS_VNP_Grid_DNB_WestBoundingCoord",
"HDFEOS_GRIDS_VNP_Grid_DNB_NorthBoundingCoord",
"HDFEOS_GRIDS_VNP_Grid_DNB_EastBoundingCoord",
"HDFEOS_GRIDS_VNP_Grid_DNB_SouthBoundingCoord",
]
return list(map(lambda key: float(meta["metadata"][""][key]), bbox_keys))
data_dir = Path(r'D:\Temp\VIIRS_DNB')
filenames = sorted(data_dir.glob("VNP46A1.A???????.h??v??.001.*.h5"))
ds_templ = "HDF5:{fn}://HDFEOS/GRIDS/VNP_Grid_DNB/Data_Fields/DNB_At_Sensor_Radiance_500m"
gdal_options = dict(
widthPct=5,
heightPct=5,
resampleAlg=gdal.GRA_Average,
format="VRT",
)
ds = gdal.BuildVRT(
"", list(map(
lambda fn: gdal.Translate("", ds_templ.format(fn=fn), outputBounds=get_bbox(fn), **gdal_options),
filenames,
)),
)
data = ds.ReadAsArray()
gt = ds.GetGeoTransform()
ds = None
Note that I resample the final result to be 5% of the original size, because it would otherwise be way too large for plotting on a small image. You can remove or change the widthPct
, heightPct
, resampleAlg
in the gdal_options
.
It can be helpful, for debugging or general understanding, to write the intermediate VRT files to disk instead of doing in-memory with the empty ""
.
Plotting
import matplotlib.pyplot as plt
import matplotlib as mpl
import cartopy.crs as ccrs
ys, xs = data.shape
mpl_extent = [
gt[0], # ulx
gt[0]+gt[1]*xs, # lrx
gt[3]+gt[5]*ys, # lry
gt[3], # uly
]
map_proj = ccrs.PlateCarree()
data_proj = ccrs.PlateCarree()
fig, ax = plt.subplots(
figsize=(8,8), dpi=96, facecolor="w", layout="compressed",
subplot_kw=dict(projection=map_proj),
)
ax.imshow(data, extent=mpl_extent, transform=data_proj, cmap="cividis", norm=mpl.colors.LogNorm(vmin=5, vmax=300))
ax.coastlines(color="w", lw=0.3)
ax.gridlines(color="w", draw_labels=True, lw=0.4, alpha=.4)
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论