-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWATTE_functions.py
798 lines (620 loc) · 30.2 KB
/
WATTE_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
# import os module
import os, sys
import numpy as np
import pandas as pd
import csv
import warnings
import geopandas as gpd
from shapely.geometry import LineString, MultiLineString
from shapely.geometry import Point
import glob
import time
import xarray as xr
from scipy.spatial import cKDTree as KDTree
# import spatial analysis using Gdal
from osgeo import gdal, ogr, osr
from osgeo.gdalconst import *
gdal.AllRegister() # Register all of drivers
gdal.UseExceptions() # Enable exceptions
import rasterio
# Function of output x, y coordinates
def getXY(pt):
return (pt.x, pt.y)
# Function of reordering East to West direction
def E2W(gdf):
point_start = gdf['East'].iloc[0]
point_end = gdf['East'].iloc[-1]
if point_start < point_end: # The string order is East to West
pass
else:
gdf = gdf.loc[::-1] # reorder the contents When polyline direction is West to East side
return gdf
def N2S(gdf):
point_start = gdf['North'].iloc[0]
point_end = gdf['North'].iloc[-1]
if point_start > point_end: # The string order is North to South
pass
else:
gdf = gdf.loc[::-1] # reorder the contents When polyline direction is South to North side
return gdf
# Function of add East and North coordinates and reorder the direction of the polyline
def sort2polyline(in_shp, target_folder,q): # add quadrant information to modify the direction of the polyline (04/15/2024)
gdf = gpd.read_file(in_shp)
centroidseries = gdf['geometry'].centroid
East, North = [list(t) for t in zip(*map(getXY, centroidseries))]
gdf['East'] = East
gdf['North'] = North
if q == 12 or q == 34:
gdf = E2W(gdf)
else:
gdf = N2S(gdf)
out_csv = in_shp.split(".")[0].split("/")[-1] + '.csv' # Linux
Output_point_csv = os.path.join(target_folder, out_csv)
gdf.to_csv(Output_point_csv)
return gdf
# Function of moving average of points
def moving_gdf(gdf,moving_value,transiton_points,target_folder):
if moving_value > transiton_points/3:
print ("check the size of window and transiton")
# Perform the rolling mean on 'East' and 'North' columns
gdf[['East', 'North']] = gdf[['East', 'North']].rolling(moving_value,
min_periods=int(transiton_points/3),
center=True).mean()
# Fill NaN values at the beginning and end with original values
gdf.loc[:int(transiton_points), ['East', 'North']] = gdf.loc[:int(transiton_points), ['East', 'North']].values
gdf.loc[gdf.shape[0] - int(transiton_points):, ['East', 'North']] = gdf.loc[gdf.shape[0] - int(transiton_points):, ['East','North']].values
out_csv = "Shoreline_Delineation_Points_MA" + str(int(moving_value)) + ".csv."
Output_point_csv = os.path.join(target_folder, out_csv)
gdf.to_csv(Output_point_csv)
return gdf
# Function of create a smoothed polyline
def create_smoothed_polyline(gdf,out_shp):
# Create a LineString geometry from the smoothed points
smoothed_line = LineString(gdf[['East', 'North']].values)
smoothed_gdf = gpd.GeoDataFrame(geometry=[smoothed_line])
smoothed_gdf['ID'] = 1
smoothed_gdf = smoothed_gdf.set_crs(gdf.crs, allow_override=True)
smoothed_gdf.to_file(out_shp)
del smoothed_gdf
# Function to get the longest length of a LineString
def get_max_length_row(gdf):
gdf = gdf.sort_values('Length', ascending=False)
if not gdf.empty:
max_length_gdf = gdf.iloc[[0]] # Select the first row (the maximum Length value)
else:
print("The DataFrame 'gdf' is empty.")
return None
# Create a new GeoDataFrame with only the selected row
max_length_gdf = gpd.GeoDataFrame(max_length_gdf, geometry='geometry', crs=gdf.crs)
return max_length_gdf
# Function to extract point values using a raster file
def extract_raster_value(in_shp, in_raster, out_shp, variable_name, NoData_value):
NoData_value = NoData_value
# Open shp file
pointData = gpd.read_file(in_shp)
x = pointData['x']
y = pointData['y']
with rasterio.open(in_raster) as Raster:
rows_raster = Raster.height
cols_raster = Raster.width
# Vectorized point-to-pixel index conversion
row, col = Raster.index(x, y)
# print(row)
# Convert row and col to NumPy arrays
row = np.array(row)
col = np.array(col)
# Filter out-of-range indices
valid_indices = (0 <= row) & (row < rows_raster) & (0 <= col) & (col < cols_raster)
# Create a masked array to handle invalid values
z = np.ma.masked_array(np.full_like(row, NoData_value), mask=~valid_indices)
# Read pixel values for valid indices
z[valid_indices] = Raster.read(1)[row[valid_indices], col[valid_indices]]
# Reshape the array to match the shape of the original grid
z = np.array(z) # Convert the list to a NumPy array
pointData[variable_name] = z
# Set the CRS of the output shapefile to match the input raster
pointData.crs = Raster.crs
# Save the DataFrame as a shape file
pointData.to_file(out_shp)
return pointData
def extract_point_values(raster_path, points_path, indices, no_decay_value):
# Load the shapefile of points
points = gpd.read_file(points_path)
# Load the DEM raster
dem = rasterio.open(raster_path)
# extract xy from point geometry
raster_values = []
array = dem.read(1)
for index, point in zip(indices, points.geometry):
if index: # If the point is within the marsh area, extract the z value
x = point.xy[0][0]
y = point.xy[1][0]
row, col = dem.index(x, y)
# Append the z value to the list of z values
raster_values.append(array[row, col])
else:
# Append no_decay_value to the list of z values
raster_values.append(no_decay_value)
points['DecayC'] = raster_values
points.to_file(points_path, driver='ESRI Shapefile')
del points
return raster_values
# Function to create points
def create_points(in_gdf, j, distance): ## add geodataframe
line = in_gdf['geometry'].iloc[j]
if j < 1:
print('\tLine length is:\t', line.length)
num_points = int(line.length / distance) + 1
points = [line.interpolate(i * distance) for i in range(num_points)] # distance is roughly input meter.
return points
# Function of degree conversion
def cartesian2northtrue(car_deg): ## input Cartesian system degree
N_deg = (450 - car_deg) % 360 # Convert angles to North True
return N_deg
# Function to create points along a LineString
def points_along_lines(in_shp, out_shp, distance, target_folder):
# Initialize lists to store x and y coordinates
id = [] # id of line
xx = [] # all x coordinates
yy = [] # all y coordinates
dd = [] # A distance from origin in each line
angles_lines = []
in_gdf = gpd.read_file(in_shp)
for j in range(in_gdf['geometry'].shape[0]):
# Create points along the LineString
points = create_points(in_gdf, j, distance)
x = [] # x coordinates of jth line
y = [] # y coordinates of jth line
for i, p in enumerate(points):
# Get the x and y coordinates of the point
id.append(j), x.append(p.x), y.append(p.y), dd.append(distance * i)
# print("x coordinate:", p.x, "y coordinate:", p.y, "distance:", distance * i) # Print the coordinates
# The angles between consecutive points
x1 = np.array(x)
y1 = np.array(y)
xx.extend(x)
yy.extend(y)
# Calculate the angles
angles = np.degrees(np.arctan2(y1[1:] - y1[:-1], x1[1:] - x1[:-1]))
angles_N = cartesian2northtrue(angles)
angles_N = np.append(angles_N[0], angles_N) # copied first column
angles_lines.extend(angles_N)
# Create a DataFrame
df = pd.DataFrame()
df['id'] = id
df['x'] = xx
df['y'] = yy
df['distance'] = dd
df['angle'] = angles_lines
# Create contour line points
out_csv = out_shp.split(".")[0].split("/")[-1] + '.csv' # Linux
#out_csv = out_shp.split(".")[0].split("\\")[-1] + '.csv' # Windows
print(out_csv)
Output_point_csv = os.path.join(target_folder, out_csv)
# Create a GeoDataFrame from the DataFrame
out_gdf = gpd.GeoDataFrame(df)
out_gdf.set_geometry(gpd.points_from_xy(out_gdf['x'], out_gdf['y']), inplace=True, crs=in_gdf.crs)
# Save the DataFrame as a CSV file
out_gdf.to_file(out_shp)
out_gdf.to_csv(Output_point_csv, index=False, float_format='%.12f')
return out_shp
# Function to offset a polyline
def offset_polyline(in_shp, out_shp, dist_offset, join_style): # join_style: 1 = round, 2 = mitred
line = gpd.read_file(in_shp)
offset = line.geometry.iloc[0].parallel_offset(dist_offset, 'right', join_style=join_style) # join_style round
# Create a GeoDataFrame with the offset LineString
offset_gdf = gpd.GeoDataFrame(geometry=[offset])
offset_gdf["Length"] = offset_gdf.length
offset_gdf.crs = line.crs # gdf_crs
offset_gdf.to_file(out_shp)
return out_shp
def MultiLineString2LineString(shapefile_path, number_cells,raster_size): # number_cells: 10, cell_size:
# Open the shapefile
gdf = gpd.read_file(shapefile_path)
crs = gdf.crs
# Iterate over each geometry in the GeoSeries
for geometry in gdf.geometry:
# Check if the geometry is a MultiLineString
if isinstance(geometry, MultiLineString):
# Iterate over each LineString in the MultiLineString
x_coords = [] # Initialize empty lists for the x and y coordinates
y_coords = []
for line in geometry.geoms:
# Iterate over the coordinates of the line
for x, y in line.coords:
# Add the coordinates to the respective lists
x_coords.append(x)
y_coords.append(y)
# Calculate the Euclidean distance between the points
distances = [np.sqrt((x_coords[i + 1] - x_coords[i]) ** 2 + (y_coords[i + 1] - y_coords[i]) ** 2) for i
in range(len(x_coords) - 1)]
x_coords = np.array(x_coords)
y_coords = np.array(y_coords)
# Sort the distances in descending order and select the top 10
top_10_distances = np.sort(distances)[-10:]
print("Top 10 MultiLineString distances:", top_10_distances) # to split the line
np.savetxt('top_10_distances.txt', top_10_distances, fmt='%f')
# Find the indices exceeding the threshold
threshold = number_cells * raster_size
# Convert distances to a numpy array
distances = np.array(distances)
# Find the indices exceeding the threshold
indices = np.where(distances > threshold)[0]
print("Indices exceeding the threshold:", indices)
# Initialize an empty list for the lines
lines = []
# Iterate over the indices
for num, i in enumerate(indices):
if num == 0:
# Create a LineString from the x and y coordinates
line = LineString(zip(x_coords[:i + 1], y_coords[:i + 1]))
lines.append(line)
else:
line = LineString(
zip(x_coords[indices[num - 1] + 1:i + 1], y_coords[indices[num - 1] + 1:i + 1]))
lines.append(line)
# Create a GeoDataFrame from the LineStrings
gdf = gpd.GeoDataFrame({"ID": range(len(lines))}, geometry=lines, crs=crs)
gdf['Length'] = gdf.length
else:
# # The geometry is not a MultiLineString, so we can proceed as before
# for x, y in geometry.coords:
# # Add the coordinates to the respective lists
# x_coords.append(x)
# y_coords.append(y)
pass
return gdf
# Function to create a buffer around a polyline
def get_extent(shapefile_path):
gdf = gpd.read_file(shapefile_path)
minx, miny, maxx, maxy = gdf.geometry.total_bounds
return minx, miny, maxx, maxy
def create_buffer(shapefile_path, output_file_path, distance):
# Read the shapefile
gdf = gpd.read_file(shapefile_path)
# Create a buffer around the geometry
gdf['geometry'] = gdf['geometry'].buffer(distance) # Ensure the geometry is a LineString
# Save the GeoDataFrame with the buffered geometry to a new shapefile
gdf.to_file(output_file_path, driver='ESRI Shapefile')
# Function to create raster file using gdal
def create_raster(Rasterdata, Raster_output, data_array, NoData_value, data_type=gdal.GDT_Float32):
# data_array = Rasterdata.GetRasterBand(1).ReadAsArray()
prj = Rasterdata.GetProjection() # Read projection
band = Rasterdata.GetRasterBand(1)
bandnum = Rasterdata.RasterCount
transform = Rasterdata.GetGeoTransform()
gtiff_driver = gdal.GetDriverByName('GTiff') # Use GeoTIFF driver
out_ds = gtiff_driver.Create(Raster_output, # Create a output file
band.XSize, band.YSize, bandnum, data_type)
out_ds.SetProjection(prj)
out_ds.SetGeoTransform(transform)
dst_band = out_ds.GetRasterBand(1)
dst_band.WriteArray(data_array)
out_ds.GetRasterBand(1).SetNoDataValue(NoData_value) # Exclude nodata value
out_ds.GetRasterBand(1).ComputeStatistics(
0) # Calculate statistics for Raster pyramids (Pyramids can speed up the display of raster data)
if np.any(np.isnan(data_array)):
# Replace None with np.nan in data_array
data_array = np.where(data_array == None, np.nan, data_array)
print('\tMaximum value in this domain without nan is', np.nanmax(data_array))
else:
print('\tMaximum value in this domain is', np.max(data_array))
del out_ds
return Raster_output
def read_raster(Rasterdata, GA_stype): #GA_ReadOnly (0) or GA_Update(1)
Read_raster = gdal.Open(Rasterdata, GA_stype)
if Rasterdata is None:
sys.exit('\tCould not open {0}.'.format(Rasterdata))
return Read_raster
def read_raster_info(raster_file, band_num, read_info = False): # Future modify the function read transform and projection information March 11, 2024 (Jin)
raster = gdal.Open(raster_file)
if raster is None:
sys.exit('\tCould not open {0}.'.format(raster_file))
band_values = raster.GetRasterBand(band_num).ReadAsArray()
if read_info:
transforms = raster.GetGeoTransform()
X = raster.RasterXSize # Read column size
Y = raster.RasterYSize # Read row size (negative value)
prj = raster.GetProjection() # Read projection
return transforms, X, Y, prj, band_values
# transforms[0] # Origin x coordinate
# transforms[1] # Pixel width
# transforms[2] # x pixel rotation (0° if image is north up)
# transforms[3] # Origin y coordinate
# transforms[4] # y pixel rotation (0° if image is north up)
# transforms[5] # Pixel height (negative)
else:
return band_values
def get_epsg_code(prj):
projection = osr.SpatialReference(wkt=prj) # Read projection here prj = raster.GetProjection()
EPSG_code = projection.GetAttrValue('AUTHORITY', 1) # Get the EPSG code
return EPSG_code
def read_vector(Vectordata, GA_stype): #GA_ReadOnly (0) or GA_Update(1)
Read_vector = ogr.Open(Vectordata, GA_stype)
if Vectordata is None:
sys.exit('\tCould not open {0}.'.format(Vectordata))
return Read_vector
def degree2radian(degree):
radian = degree * np.pi / 180
return radian
def radian2degree(radian):
degree = radian * 180 / np.pi
return degree
def make_slices(data, win_size):
# """Return a list of slices given a window size.
# data - two-dimensional array to get slices from
# win_size - tuple of (rows, columns) for the moving window
# """
row_num = data.shape[0] - win_size[0] + 1 # Number of rows for the slice
col_num = data.shape[1] - win_size[1] + 1 # Number of columns for the slice
slices = []
for i in range(win_size[0]):
for j in range(win_size[1]):
slices.append(data[i:row_num + i, j:col_num + j])
return slices
# Define the dictionary of marsh decay constants first one is inundation depth and the second one is marsh density
guidance_decay_constants_table = {
("low_low"): 0.035,
("medium_low"): 0.021,
("high_low"): 0.001,
("low_medium"): 0.055,
("medium_medium"): 0.030,
("high_medium"): 0.006,
("low_high"): 0.107,
("medium_high"): 0.090,
("high_high"): 0.015,
("water"): 0.0,
("land"): 9999
}
def set_decay_values2tiff(Inun_level, bio_density, Input_Water_Class,Input_Other_Class, guidance_decay_constants_table):
# Still Considering use Marsh_Class instead of 16, 23, 32. Here, 16 is low, 23 is medium, 32 is high
print("\n#############################################################\n")
print("Set dacay constant values to tiff file using inundation level and marsh density")
print("\n#############################################################")
# Define the masks
marsh_mask_low_low = (Inun_level < 1) & (bio_density == 16)
marsh_mask_medium_low = ((1 <= Inun_level) & (Inun_level < 2)) & (bio_density == 16)
marsh_mask_high_low = (2 <= Inun_level) & (bio_density == 16)
marsh_mask_low_medium = (Inun_level < 1) & (bio_density == 23)
marsh_mask_medium_medium = ((1 <= Inun_level) & (Inun_level < 2)) & (bio_density == 23)
marsh_mask_high_medium = (2 <= Inun_level) & (bio_density == 23)
marsh_mask_low_high = (Inun_level < 1) & (bio_density == 32)
marsh_mask_medium_high = ((1 <= Inun_level) & (Inun_level < 2)) & (bio_density == 32)
marsh_mask_high_high = (2 <= Inun_level) & (bio_density == 32)
mask_water = (bio_density == Input_Water_Class)
mask_other = (bio_density == Input_Other_Class)
# Retrieve the corresponding values from the dictionary
key_list =["water", "low_low", "medium_low", "high_low", "low_medium", "medium_medium", "high_medium", "low_high", "medium_high", "high_high", "water"]
value_list = [guidance_decay_constants_table.get(key) for key in key_list]
value_other = guidance_decay_constants_table.get("other")
# Create a list of masks
mask_list = [mask_water, marsh_mask_low_low, marsh_mask_medium_low, marsh_mask_high_low,
marsh_mask_low_medium, marsh_mask_medium_medium, marsh_mask_high_medium,
marsh_mask_low_high, marsh_mask_medium_high, marsh_mask_high_high,
]
# Initialize decay_tiff_values
decay_tiff_values = np.full(bio_density.shape, value_other, dtype=np.float32)
# Iterate over the masks and values
for mask, key, value in zip(mask_list, key_list, value_list):
# Apply the mask to the decay_tiff_values array and set the corresponding elements to the value
print(f"Value for {key}:", value)
decay_tiff_values[mask] = value
return decay_tiff_values
def check_flag_file(flag_file, file_path):
if flag_file:
try:
assert os.path.exists(file_path), "\tPlease input the valid file path ...{{ (>_<) }}\n"
except:
print('Flag file is not found')
else:
print('Flag file is not set to be used')
def process_local_wave_file(local_wave_file, output_shp_file,buffer_file, ESPG): # 'EPSG:26916'
with xr.open_dataset(local_wave_file) as ds:
print ('reading the local wave file')
print (ds.info())
grid_x_values = ds['grid_x'].values # longitude
grid_y_values = ds['grid_y'].values # latitude
grid_Hs_values = ds['Hs'].values # significant wave height
grid_Tp_values = ds['Tp'].values.astype('float64')/(1e+9) # peak period
grid_Dir_values = ds['Dir'].values # wave direction
grid_Hs_min, grid_Hs_max, grid_Hs_mean, grid_Hs_median, grid_Hs_std = calculate_array_stats(grid_Hs_values)
grid_Tp_min, grid_Tp_max, grid_Tp_mean, grid_Tp_median,grid_Tp_std = calculate_array_stats(grid_Tp_values)
grid_Dir_min, grid_Dir_max, grid_Dir_mean, grid_Dir_median, grid_Dir_std = calculate_array_stats(grid_Dir_values)
print(f"Significant Wave Height: Min={grid_Hs_min}, Max={grid_Hs_max}, Mean={grid_Hs_mean}, Median={grid_Hs_median}, Std={grid_Hs_std}")
print(f"Peak Period: Min={grid_Tp_min}, Max={grid_Tp_max}, Mean={grid_Tp_mean},Median={grid_Tp_median}, Std={grid_Tp_std}")
print(f"Wave Direction: Min={grid_Dir_min}, Max={grid_Dir_max}, Mean={grid_Dir_mean},Median={grid_Dir_median}, Std={grid_Dir_std}")
df = pd.DataFrame({
'x': grid_x_values.flatten(),
'y': grid_y_values.flatten(),
'Hs': grid_Hs_values.flatten(),
'Tp': grid_Tp_values.flatten(),
'Dir': grid_Dir_values.flatten()
})
# Create a GeoDataFrame with the points
geometry = [Point(xy) for xy in zip(df['x'], df['y'])]
gdf = gpd.GeoDataFrame(df, geometry=geometry)
gdf = gdf.set_crs(ESPG)
# Perform a spatial join between the points and the polygon
gdf_buffer = gpd.read_file(buffer_file)
filter_gdf = gpd.sjoin(gdf, gdf_buffer, predicate='within')
filter_gdf.drop(['index_right', 'ID'], axis=1, inplace=True, errors='ignore') # The dropped columns may differ depending on the input shapefile
filter_gdf = filter_gdf.reset_index(drop=True)
print(f"Output file: {output_shp_file}")
filter_gdf.to_file(output_shp_file)
del ds
return filter_gdf
# internal function of process_local_wave_file
def calculate_array_stats(array):
min_value = np.min(array)
max_value = np.max(array)
mean_value = np.mean(array)
median_value = np.median(array)
std_value = np.std(array)
return min_value, max_value, mean_value, median_value, std_value
def add_wave_climate2transect_points(Transect_points_file,wave_shp_file,out_shp_file, nearest_wave_file, no_decay_value, scale_factor, leafsize=10):
gdf = gpd.read_file(Transect_points_file)
gdf['Hs'] = float(no_decay_value)
gdf['Dir'] = float(no_decay_value)
gdf['Tp'] = float(no_decay_value)
base_index = gdf[gdf['distance'].values == 0].index
gdf_xy_point = gdf.loc[base_index, ['x', 'y', 'geometry']]
Wave_base_gdf = get_nearest_variable_index(gdf_xy_point, wave_shp_file, nearest_wave_file, 'd3d', scale_factor, leafsize=10, xy_list=['x', 'y'])
gdf.loc[base_index, 'Hs'] = Wave_base_gdf['Hs'].values
gdf.loc[base_index, 'Dir'] = (Wave_base_gdf['Dir'].values + 180) % 360 # use same direction as the wave file (convert)
gdf.loc[base_index, 'Tp'] = Wave_base_gdf['Tp'].values
gdf.to_file(out_shp_file)
# Function to pick the index of the nearest variable point
def get_nearest_variable_index(target_gdf, reference_shp, new_shp, str_variable, scale_factor, leafsize=10, xy_list=['x', 'y']):
# Read the shapefiles
reference_gdf = gpd.read_file(reference_shp)
# Use a projected system
xy_reference = reference_gdf[xy_list].to_numpy()
xy_reference = xy_reference / scale_factor
xy_target = target_gdf[xy_list].to_numpy()
xy_target = xy_target / scale_factor
dist_tree = KDTree(xy_reference, leafsize=leafsize)
dist, ix = dist_tree.query(xy_target, 1) # find the nearest neighbor for each point
new_gdf = target_gdf.copy()
new_gdf[str_variable + '_dis'] = dist
new_gdf[str_variable + '_index'] = ix
new_gdf['Hs'] = reference_gdf['Hs'].iloc[ix].values
new_gdf['Dir'] = reference_gdf['Dir'].iloc[ix].values
new_gdf['Tp'] = reference_gdf['Tp'].iloc[ix].values
new_gdf.to_file(new_shp)
return new_gdf
# function for filtering (reliable) wave data
def filter_wave_data(gdf, output_shp_file):
# Read the shapefile into a GeoDataFrame
#gdf = gpd.read_file(wave_shp_file)
print('Before filtering: \t gdf.shape', gdf.shape)
# Get the indices of the rows where 'Hs' is not 0
indices = gdf[gdf['Hs'] != 0.0].index
# Select specific columns
columns = ['x','y','Hs', 'Dir', 'Tp','geometry'] # This part may need to be modified depending on the input shapefile
gdf_filter = gdf.loc[indices, columns]
print('After filtering: \t gdf.shape', gdf_filter.shape)
gdf_filter.to_file(output_shp_file)
return gdf_filter
def calculate_wave_refraction(gdf, n_transect, NP, check_diff_file):
# Convert to list and then to numpy array
Hs_base = np.array(gdf['Hs'].tolist())
Hs_dir_base = np.array(gdf['Dir'].tolist())
Transect_ang = np.array(gdf['angle'].tolist())
# Reshape to calculate the wave attenuation value on each transect
Hs_transect = np.reshape(Hs_base, (n_transect, NP))
Hs_dir_transect = np.reshape(Hs_dir_base, (n_transect, NP))
Transect_ang = np.reshape(Transect_ang, (n_transect, NP))
# Calculate the difference between the first column of Hs_dir_transect and Transect_ang for each row
Hs_transect_base = Hs_transect[:, 0]
# Calculate the absolute difference
raw_diff = np.abs(Hs_dir_transect[:, 0] - Transect_ang[:, 0])
# Use modulo operator to handle the wrap-around at 360 degrees
difference = np.where(raw_diff > 180, 360 - raw_diff, raw_diff)
cos_diff = np.cos(difference * np.pi / 180)
# If the angle difference is greater than 90 degrees, set the wave height to zero
Hs_modify = np.where(cos_diff < 0, 0, Hs_transect_base * cos_diff)
# Save the difference to a CSV file
np.savetxt(check_diff_file, difference, delimiter=',')
return Hs_modify
## use the class method. remove function as its slower
class Invdisttree:
""" inverse-distance-weighted interpolation using KDTree:
invdisttree = Invdisttree( X, z ) -- data points, values
interpol = invdisttree( q, nnear=3, eps=0, p=1, weights=None, stat=0 )
interpolates z from the 3 points nearest each query point q;
For example, interpol[ a query point q ]
finds the 3 data points nearest q, at distances d1 d2 d3
and returns the IDW average of the values z1 z2 z3
(z1/d1 + z2/d2 + z3/d3)
/ (1/d1 + 1/d2 + 1/d3)
= .55 z1 + .27 z2 + .18 z3 for distances 1 2 3
q may be one point, or a batch of points.
eps: approximate nearest, dist <= (1 + eps) * true nearest
p: use 1 / distance**p
weights: optional multipliers for 1 / distance**p, of the same shape as q
stat: accumulate wsum, wn for average weights
How many nearest neighbors should one take ?
a) start with 8 11 14 .. 28 in 2d 3d 4d .. 10d; see Wendel's formula
b) make 3 runs with nnear= e.g. 6 8 10, and look at the results --
|interpol 6 - interpol 8| etc., or |f - interpol*| if you have f(q).
I find that runtimes don't increase much at all with nnear -- ymmv.
p=1, p=2 ?
p=2 weights nearer points more, farther points less.
In 2d, the circles around query points have areas ~ distance**2,
so p=2 is inverse-area weighting. For example,
(z1/area1 + z2/area2 + z3/area3)
/ (1/area1 + 1/area2 + 1/area3)
= .74 z1 + .18 z2 + .08 z3 for distances 1 2 3
Similarly, in 3d, p=3 is inverse-volume weighting.
Scaling:
if different X coordinates measure different things, Euclidean distance
can be way off. For example, if X0 is in the range 0 to 1
but X1 0 to 1000, the X1 distances will swamp X0;
rescale the data, i.e. make X0.std() ~= X1.std() .
A nice property of IDW is that it's scale-free around query points:
if I have values z1 z2 z3 from 3 points at distances d1 d2 d3,
the IDW average
(z1/d1 + z2/d2 + z3/d3)
/ (1/d1 + 1/d2 + 1/d3)
is the same for distances 1 2 3, or 10 20 30 -- only the ratios matter.
In contrast, the commonly-used Gaussian kernel exp( - (distance/h)**2 )
is exceedingly sensitive to distance and to h.
"""
#leafsize: The number of points at which the algorithm switches over to brute-force. Default: 10.
def __init__(self, X, z, leafsize=10, stat=0): # scale=scaling factor for distances
assert len(X) == len(z), "len(X) %d != len(z) %d" % (len(X), len(z))
self.tree = KDTree(X, leafsize=leafsize) # build the tree
self.z = z
self.stat = stat
self.wn = 0
self.wsum = None;
def __call__(self, q, nnear=6, eps=0, p=1, weights=None):
# nnear nearest neighbours of each query point --
q = np.asarray(q)
qdim = q.ndim
if qdim == 1:
q = np.array([q])
if self.wsum is None:
self.wsum = np.zeros(nnear)
self.distances, self.ix = self.tree.query( q, k=nnear, eps=eps ) #scipy.spatial.KDTree.query
interpol = np.zeros((len(self.distances),) + np.shape(self.z[0]))
weight_factors = [] # list to store weight factors
jinterpol = 0
for dist, ix in zip(self.distances, self.ix):
if nnear == 1:
weight_factors.append(1.0)
wz = self.z[ix]
elif dist[0] < 1e-10:
wz = self.z[ix[0]]
else: # weight z s by 1/dist --
w = 1 / dist ** p
if weights is not None:
w *= weights[ix] # >= 0
w /= np.sum(w)
weight_factors.append(w) # store the weight factors for each query point
wz = np.dot(w, self.z[ix])
if self.stat:
self.wn += 1
self.wsum += w
interpol[jinterpol] = wz
jinterpol += 1
#return interpol, dist if qdim > 1 else interpol[0]
return interpol, weight_factors if qdim > 1 else interpol[0]
# def create_raster(file, reference_raster, z_array, dtype, no_data_value,stats_flag=False):
# # Create the output raster dataset
# gtiff_driver = gdal.GetDriverByName('GTiff')
# out_ds = gtiff_driver.Create(file, reference_raster.RasterXSize, reference_raster.RasterYSize, reference_raster.RasterCount, dtype) # dtype is e.g. gdal.GDT_Int32 and gdal.GDT_Float32
# out_ds.SetProjection(reference_raster.GetProjection())
# out_ds.SetGeoTransform(reference_raster.GetGeoTransform())
# dst_band = out_ds.GetRasterBand(1)
# dst_band.WriteArray(z_array)
# dst_band.SetNoDataValue(no_data_value) # Exclude nodata value
# stats = dst_band.ComputeStatistics(0)
# min_val, max_val, mean_val, std_dev_val = stats
# if stats_flag:
# print(
# f'Made a raster file. Statistics:\n\tMinimum: {min_val}, Maximum: {max_val}, Mean: {mean_val}, Standard Deviation: {std_dev_val}')
# else:
# print('Made a raster file')
# out_ds = None
# return