Saving multiple variables in a single NetCDF

Question

I have a text file that contains different columns representing individual data variables. See sample data below:

# NRECS: 5844
# DT: 24
# STARTDATE: 2000-01-01 00:00:00
# ALMA_OUTPUT: 0
# NVARS: 10
# YEAR  MONTH   DAY OUT_SNOW_COVER_BAND_0    OUT_SNOW_COVER_BAND_1   OUT_SNOW_COVER_BAND_2   OUT_SNOW_COVER_BAND_3   OUT_SNOW_COVER_BAND_4   OUT_SURF_TEMP   OUT_RUNOFF  OUT_BASEFLOW    OUT_SWE_BAND_0  OUT_SWE_BAND_1  OUT_SWE_BAND_2  OUT_SWE_BAND_3  OUT_SWE_BAND_4  OUT_EVAP    OUT_PREC
2000    01  01  0.0000   0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  1.7175  0.0000  0.0000  0.0000  0.0000  0.0000  0.2250  0.2250
2000    01  02  1.0000   1.0000  0.0000  0.0000  0.0000  -5.1895     0.0000  1.7042  0.7889  0.4855  0.0000  0.0000  0.0000  0.6311  2.2750
2000    01  03  1.0000   1.0000  0.0000  0.0000  0.0000  -14.5538    0.0000  1.6908  1.7781  1.3975  0.0000  0.0000  0.0000  0.2406  1.3000
2000    01  04  1.0000   1.0000  0.0000  0.0000  0.0000  -12.2395    0.0000  1.6773  8.6144  7.2828  0.0000  0.0000  0.0000  0.5643  6.7750
2000    01  05  1.0000   1.0000  0.0000  0.0000  0.0000  -12.2432    0.0000  1.6638  14.4594     12.3727     0.0000  0.0000  0.0000  0.7788  6.1250
2000    01  06  1.0000   1.0000  0.0000  0.0000  0.0000  -13.6909    0.0000  1.6501  14.4793     12.4350     0.0000  0.0000  0.0000  0.2736  0.2250
2000    01  07  1.0000   1.0000  0.0000  0.0000  0.0000  -12.0328    0.0000  1.6365  15.8497     13.7853     0.0000  0.0000  0.0000  0.2366  2.0250

Now, in a normal case for creating a NetCDF file, for instance, if I am interested in a single Snow Cover (variable), I have a Python 2.7 script that will pick that snow cover data and store it as a variable along with X-Y coordinates and daily step dimensions for each gridded cell/pixel. And, the result would be a NetCDF gridded file for Snow Cover. But, now I am interested in basically storing multiple Snow Cover Variables (See attached image below – highlighted column headers are the variables of interest) in a single netCDF file.

If this is possible, can someone point me in the right direction?

Update
This is the code that I usually use for creating a NetCDF file. Please note that I got this code from Github, and I am still in the learning phase on creating NetCDF files, hence I don’t actually know how to modify this code for my intended purpose in this question.

#!/usr/bin/env python

#----------------------------------------------------
# Program to convert VIC fluxes files to NetCDF file
# will ask the user wich variable he wants to export
# and also for wich years. Assumes there is data
# for the entire time period, from 1-jan to 31-dec
# SET UP FOR DAILY TIME STEP. FLUX FILE SHOUD NOT
# CONTAIN HOUR RECORD!!
#----------------------------------------------------

#------------------------------------------------
# Writen by Daniel de Castro Victoria
# [email protected] or [email protected]
# 03-dec-2004
#
# 13-mar-2018: Code update. Change libraries and treat
# header lines. Changes done by Stuart Smith (smit1770 at purdue dot edu)
#-------------------------------------------------

import os
import sys
# handle dates...
import datetime
# SciPy netCDF and NumPy
from scipy.io.netcdf import *
from numpy import *

# In case flux files contains header lines
# set the variable according to the number of lines
skip_lines = 6

# checking user input
print len(sys.argv)
if len(sys.argv) != 2:
    print "Wrong user input"
    print "Convert VIC fluxes files to NetCDF"
    print "usage flux2cdf.py <vic flux dir>"
    print "VIC FLUX DIR SHOULD CONTAIN TRAILING /"
    sys.exit()

if sys.argv[1][-1] != "/":
    print "VIC FLUX DIR SHOULD CONTAIN TRAILING /"
    print "fixing it for you..."
    sys.argv[1] = sys.argv[1] + "/"

print "IMPORTANT: "+sys.argv[1]+" SHOULD CONTAIN ONLY FLUXES FILES!!!"

# building file list and sorted lat lon list
file_list = os.listdir(sys.argv[1])

lat_t = []
lon_t = []
lat = []
lon = []

for f in file_list:
    lat_t.append(float(f.split("_")[1]))
    lon_t.append(float(f.split("_")[2]))

for i in lat_t:
    if i not in lat:
        lat.append(i)

for i in lon_t:
    if i not in lon:
        lon.append(i)


# putting in order. Lat should be from top to botom
# lon from left to rigth
lon.sort()
lat.sort()
lat.reverse()

del(lat_t)
del(lon_t)

#determining the parameter to use
print "Choose output parameter"
print "1 - Snow_Cover_Band"
print "2 - Surface_Temperature"
print "3 - Runoff"
print "4 - Base flow"
print "5 - SWE_Band"
print "6 - Precipitation"
print "7 - Evaporation"
print "8 - Soil Moisture"

varini = input('Choose output (1 a 8)>')

#getting the collumn right
if int (varini) < 8:
    var = varini + 2
elif varini == 8:        #more than one soil layer...
    camada = input('which soil layer?>')
    var = varini + 1 + camada

#set name of out_file. Named after parameter choice
if var == 3:
    var_txt = "Snow_Cover"
    var_name = "Snow_Cover"
elif var == 4:
    var_txt = "Surf_Temp"
    var_name = "Surface_Temperature"
elif var == 5:
    var_txt = "Runoff"
    var_name = "Runoff"
elif var == 6:
    var_txt = "base"
    var_name = "Baseflow"
elif var == 7:
    var_txt = "SWE"
    var_name = "SWE"
elif var == 8:
    var_txt = "Precipitation"
    var_name = "Precipitation"
elif var == 9:
    var_txt = "Evaporation"
    var_name = "Evaporation"
else:
    var_txt = "soil_"+str(camada)
    var_name = "Soil moisture, layer %i", camada

# for what date?
start_year = input("Enter start year:")
end_year = input("End year:")

inidate = datetime.date(start_year,1,1)
enddate = datetime.date(end_year,12,31)

days = enddate.toordinal() - inidate.toordinal()+1

print "Go grab a coffe, this could take a while..."

#
# create array containig all data
# This is going to be huge. Create an array with -9999 (NoData)
# Then populate the array by reading each flux file
#

all_data = zeros([days,len(lat),len(lon)], float)-9999

c = len(file_list)

# for each file in list
for f in file_list:
    # get lat & lon and it's index
    latitude = float(f.split("_")[1])
    longitude = float(f.split("_")[2])
    lat_id = lat.index(latitude)
    lon_id = lon.index(longitude)

    print "%i files to write." % c
    c = c -1

    infile = open(sys.argv[1]+f, "r")
    # here we skip the number of header lines
    # variable set at the begining of the code
    lixo = infile.readlines()[skip_lines:]
    infile.close()
    dado = []

    for l in lixo:
        if int(l.split("t")[0]) in range(inidate.year, enddate.year+1):
            print(l)
            dado.append(float(l.split("t")[var]))
        # putting data inside array.
        # Since data has lat & lon fixed uses dimension [:,lat_index,lon_index]

    all_data[:,lat_id,lon_id] = dado

#
# writing NetCDF
#

ncfile = netcdf_file(var_txt+".nc", "w")

ncfile.Conventions = "COARDS"
ncfile.history = "Created using flux2cdf.py. " + datetime.date.today().isoformat()
ncfile.production = "VIC output"

ncfile.start_date = inidate.isoformat()
ncfile.end_date = enddate.isoformat()

#create dimensions
ncfile.createDimension("X", len(lon))
ncfile.createDimension("Y", len(lat))
ncfile.createDimension("T", days)

#create variables
latvar = ncfile.createVariable("Y", "f4", ("Y",))
latvar.long_name = "Latitude"
latvar.units = "degrees_north"
latvar[:] = lat

lonvar = ncfile.createVariable("X", "f4", ("X",))
lonvar.long_name = "Longitude"
lonvar.units = "degrees_east"
lonvar[:] = lon

timevar = ncfile.createVariable("T", "f4", ("T",))
timevar.long_name = "Time"
timevar.units = "days since " + inidate.isoformat()
timevar[:] = range(0, days)

data_var = ncfile.createVariable(var_txt, "f4", ("T","Y","X"))
data_var.long_name = var_name+" calculated by VIC"
data_var.missing_value = -9999.0
data_var.units = "milimeters"
data_var[:] = all_data

ncfile.close()

netcdf python

Saving multiple variables in a single NetCDF

Add your own answers!

Ask a Question