-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathread_bin.py
293 lines (240 loc) · 12.6 KB
/
read_bin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 21 15:03:50 2020
@author: mmacferrin
"""
import numpy
import argparse
import re
import os
# 332 rows x 316 cols for Antarctic Polar Stereo data,
# per https://nsidc.org/data/polar-stereo/ps_grids.html
#
# If you are using Arctic data or some other grid, change the DEFAULT_GRID_SHAPE below,
# or just use the optional parameter when you call it.
# For Antarctica, (rows, cols) = (332,316)
# For Arctic, (rows, cols) = (448, 304)
DEFAULT_GRID_SHAPE = (332, 316)
GRIDSIZE_25_N = numpy.array(((5850+5350)/25, (3750+3850)/25), dtype=numpy.long) # (448, 304)
GRIDSIZE_25_S = numpy.array(((4350+3950)/25, (3950+3950)/25), dtype=numpy.long) # (332, 316)
GRIDSIZE_12_5_N = GRIDSIZE_25_N * 2 # (896, 608)
GRIDSIZE_12_5_S = GRIDSIZE_25_S * 2 # (664, 632)
GRIDSIZE_6_25_N = GRIDSIZE_25_N * 4
GRIDSIZE_6_25_S = GRIDSIZE_25_S * 4
def read_NSIDC_bin_file(fname,
grid_shape = DEFAULT_GRID_SHAPE,
header_size=0,
element_size=2,
return_type=float,
signed=False,
multiplier=0.1):
"""Read an SSMI file, return a 2D grid of integer values.
header_size - size, in bytes, of the header. Defaults to zero for
brightness-temperature data, but can be one for other data. For instance,
NSIDC sea-ice concentration data has a 300-byte header on it.
element_size - number of bytes for each numerical element. The brightness-temperature
uses 2-byte little-endian integers (with a multiplier factor to turn them into floating-point values).
NSIDC sea-ice concentration data is just 1-byte integers.
return_type can be "int" or "float", or the numpy equivalent therein.
signed - Whether the data values are signed (True) or unsigned (False) data
multiplier -- A value to multiply the dataset by after it's read. Ingored for integer arrays,
useful in some cases for floating point values that are saved as integers but then
multiplied by 0.1 to get floating-point values.
(Example: value "2731" with a multiplier of 0.1 will return 273.1)
This is ingored if the return type is "int" or a numpy integer type.
"""
# Data is in two-byte, little-endian integers, array of size GRID_SHAPE
with open(fname, 'rb') as fin:
raw_data = fin.read()
fin.close()
# Lop off the header from the start of the byte array.
if header_size > 0:
raw_data = raw_data[header_size:]
# Check to make sure the data is the right size, raise ValueError if not.
# TODO: The NSIDC-0051 data has the rows,cols in the header. We could read it from there,
# although right now we just get the grid size from the paramter.
if int(len(raw_data) / element_size) != int(numpy.product(grid_shape)):
raise ValueError("File {0} has {1} elements, does not match grid size {2}.".format(
fname, int(len(raw_data)/element_size), str(grid_shape)))
# Creating a uint16 array to read the data in
int_array = numpy.empty(grid_shape, dtype=return_type)
int_array = int_array.flatten()
# Read the data. The built_int "from_bytes" function does the work here.
for i in range(0, int(len(raw_data)/element_size)):
int_array[i] = int.from_bytes(raw_data[(i*element_size):((i+1)*element_size)],
byteorder="little",
signed=signed)
# Unflatten the array back into the grid shape
int_array.shape = grid_shape
# If the file is meant to be an integer array, just return it.
if return_type in (int, numpy.int, numpy.uint8, numpy.int8, numpy.uint16, numpy.int16, numpy.uint32, numpy.int32, numpy.uint64, numpy.int64):
return_array = numpy.array(int_array, dtype=return_type)
# Else, if it's meant to be a floating-point array, scale by the multiplier
# and return the floating-point array. If the mutiplier is a float (i.e. 0.1),
# numpy will conver and return an array of floats
else:
return_array = numpy.array( int_array * multiplier, dtype=return_type)
return return_array
def get_hemisphere_and_resolution_from_nsidc_filename(fname):
"""Get the resolution from the filename.
From the file specs on https://nsidc.org/data/nsidc-0001
Will be 12.5 or 25 km.
"""
fbase = os.path.splitext(os.path.split(fname)[1])[0]
# This is the filename structure for NSIDC-0001. TODO: Generalize it for other file name structures.
SSMI_REGEX = r"(?<=\Atb_f\d{2}_\d{8}_v\d_)[ns]\d{2}(?=[vh])"
matches = re.search(fbase, SSMI_REGEX)
if matches is None:
return None, None
match = matches.group(0)
hemisphere = match[0].upper()
frequency = int(match[1:2])
# The resolutions for each frequency in the NSIDC data products.
# Dictionary is "frequency:resolutin" key:value pair.
resolution = {19:25.0,
22:25.0,
37:25.0,
85:12.5,
91:12.5}[frequency]
return hemisphere, resolution
def output_array_to_stdout(array):
"""Output a 2D array to stdout."""
for row in array:
print(" ".join([str(i) for i in row]))
return
def testing():
"""Just a few test functions."""
# Testing this out on a few files, examples using different types of data products.
# An NSIDC-0001 brightness-temperature file, in 2-byte little-endian integers
# converted to floating point. No header.
array1 = read_NSIDC_bin_file("../Tb/nsidc-0001/tb_f08_19870709_v5_s19h.bin",
grid_shape=(332,316),
header_size=0,
element_size=2,
return_type=float,
signed=False,
multiplier=0.1)
print(array1.shape, array1.dtype)
print(array1)
# An NSIDC-0051 sea-ice concentration v1 file, in a 1-byte unsigned integer array with
# a 300-byte header.
# For an Arctic file
array2 = read_NSIDC_bin_file("../Tb/nsidc-0051/nt_20201231_f17_v1.1_n.bin",
grid_shape=(448, 304),
header_size=300,
element_size=1,
return_type=int,
signed=False)
print(array2.shape, array2.dtype)
print(array2)
# For an Antarctic file
array3 = read_NSIDC_bin_file("../Tb/nsidc-0051/nt_20201231_f17_v1.1_s.bin",
grid_shape=(332,316),
header_size=300,
element_size=1,
return_type=int,
signed=False)
print(array3.shape, array3.dtype)
print(array3)
# An NSIDC-0079 sea-ice concentration v3 files, in 2-byte unsigned integer array with
# a 300-byte header.
# For an Arctic file, returning the array in integer values.
array4 = read_NSIDC_bin_file("../Tb/nsidc-0079/bt_20201231_f17_v3.1_n.bin",
grid_shape=(448, 304),
header_size=0,
element_size=2,
return_type=int,
signed=False)
print(array4.shape, array4.dtype)
print(array4)
# For an Antarctic file, alternately returning the array in floating-point values (your choice, just pick the parameter you want.)
array5 = read_NSIDC_bin_file("../Tb/nsidc-0079/bt_20201231_f17_v3.1_s.bin",
grid_shape=(332,316),
header_size=0,
element_size=2,
return_type=float,
signed=False,
multiplier=0.1)
print(array5.shape, array5.dtype)
print(array5)
def read_and_parse_args():
"""Read and parse command-line arguments."""
parser = argparse.ArgumentParser(description="""Reads an NSIDC .bin file and outputs the array contents. Use
'convert_bin_to_gtif.py' to output to a GeoTiff. This will just spit the
numbers onto a screen. In order to output to a space-delimited text file, just
route the stdout into a file. Example:
$ python read_bin.py infile.bin > outfile.txt
Read the NSIDC documentation for your data product in order to
choose the correct parameters listed below.""")
parser.add_argument("src", type=str, help="Source file (.bin)")
parser.add_argument("-resolution", "-r", type=float, default=None, help="Resolution (km): 6.25, 12.5, or 25. If omitted, it is interpreted from the file name. If cannot be interpreted, defaults to 25 km. Check your NSIDC data source documentation.")
parser.add_argument("-hemisphere", type=str, default=None, help="Hemisphere: N or S. If omitted, it is interpreted from the file name. If cannot be interpreted, defaults to 'N'.")
parser.add_argument("-header_size", "-hs", type=int, default=0, help="Size of .bin file header (in bytes.) (Default: 0)")
parser.add_argument("-element_size", "-es", type=int, default=2, help="Size of each numerical .bin data element, in bytes. Most NSIDC files use 1- or 2-byte numbers. Check the documentation of the dataset. (Default: 2)")
parser.add_argument("-output_type", "-ot", default="int", help="Output data type: 'int' or 'float'. Default 'int'.")
parser.add_argument("-multiplier","-m", type=str, default="auto", help="A multiplier to create the output numbers. Any number, or 'auto'. With 'auto', defaults to 1 for integers (no modification) and 0.1 for floating-point (2731 becomes 273.1, e.g.). Or, specify your own multiplier here.")
parser.add_argument("--signed", "-s", action="store_true", default=False, help="Read bin as signed data. Default to unsigned.")
return parser.parse_args()
if __name__ == "__main__":
# Parse the command-line arguments.
args = read_and_parse_args()
if args.output_type.lower() in ("float", "f"):
out_type = float
elif args.output_type.lower() in ("int", "i", "d"):
out_type = int
else:
raise ValueError("Uknown output_type (can be: 'int','i','d','float', or 'f'):", str(args.output_type))
if args.multiplier.lower().strip() != "auto":
multiplier = float(args.multiplier)
else:
multiplier = args.multiplier
# Resolve the resolution, from:
# 1) The command line argument
# 2) The filename, or
# 3) Just use the default (25 km))
if args.resolution is None or args.hemisphere is None:
hemisphere_from_fname, resolution_from_fname = get_hemisphere_and_resolution_from_nsidc_filename(args.src)
if args.resolution is None:
if resolution_from_fname is None:
resolution = 25
else:
resolution = resolution_from_fname
else:
resolution = float(args.resolution)
if args.hemisphere is None:
if hemisphere_from_fname is None:
hemisphere = 'N'
else:
hemisphere = hemisphere_from_fname
else:
hemisphere = args.hemisphere.strip().upper()
if hemisphere == 'N':
if resolution == 25:
gridsize = GRIDSIZE_25_N
elif resolution == 12.5:
gridsize = GRIDSIZE_12_5_N
elif resolution == 6.25:
gridsize = GRIDSIZE_6_25_N
else:
raise ValueError("Unknown resolution: {0}".format(resolution))
elif hemisphere == 'S':
if resolution == 25:
gridsize = GRIDSIZE_25_S
elif resolution == 12.5:
gridsize = GRIDSIZE_12_5_S
elif resolution == 6.25:
gridsize = GRIDSIZE_6_25_S
else:
raise ValueError("Unknown resolution: {0}".format(resolution))
else:
raise ValueError("Unknown hemisphere: {0}".format(hemisphere))
# Read the array
array = read_NSIDC_bin_file(args.src,
grid_shape = gridsize,
header_size=args.header_size,
element_size=args.element_size,
return_type=out_type,
signed=args.signed,
multiplier=args.multiplier)
# Print the array to stdout
output_array_to_stdout(array)