From 4ddd4521589812d415e7a77c4821a835783946b1 Mon Sep 17 00:00:00 2001 From: Dipak Shetty Date: Thu, 16 Apr 2026 13:07:45 -0500 Subject: [PATCH 1/4] Updated according to Spatial Extent Refactor changes --- twdhcli.py | 97 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 47 deletions(-) diff --git a/twdhcli.py b/twdhcli.py index 2767d67..305b728 100644 --- a/twdhcli.py +++ b/twdhcli.py @@ -480,7 +480,7 @@ def patch_fn_clear_spatial_data(ctx,dataset,data): if test_run: return False - remote.action.package_patch( id=dataset.get("id"), gazetteer="" ) + remote.action.package_patch( id=dataset.get("id"), spatial_extent="", spatial_full="" ) except Exception as e: if str(e) == 'Not found': @@ -502,11 +502,7 @@ def patch_fn_clear_spatial_data_full(ctx,dataset,data): if test_run: return False - gazetteer = dataset.get('gazetteer', {}) - if 'spatial_full' in gazetteer: - gazetteer['spatial_full'] = "" - - remote.action.package_patch( id=dataset.get("id"), spatial_simp=gazetteer['spatial_simp'], spatial_full=gazetteer['spatial_full'] ) + remote.action.package_patch( id=dataset.get("id"), spatial_extent="", spatial_full="" ) except Exception as e: if str(e) == 'Not found': @@ -525,16 +521,17 @@ def patch_fn_set_spatial_data(ctx,dataset,data): logecho = ctx.obj['logecho'] test_run = ctx.obj['test_run'] + spatial_extent = data.get('spatial_extent') or data.get('spatial_simp', '{}') + spatial_full = data.get('spatial_full', '{}') + try: - spatial_simp = data.get('spatial_simp', '{}') - parsed_spatial_simp = json.loads(spatial_simp) + json.loads(spatial_extent) except json.JSONDecodeError as e: - logecho(f"JSON parsing error on spatial_simp: {e}, value: {spatial_simp}", 'error') + logecho(f"JSON parsing error on spatial_simp: {e}, value: {spatial_extent}", 'error') try: - spatial_full = data.get('spatial_full', '{}') - parsed_spatial_simp = json.loads(spatial_full) + json.loads(spatial_full) except json.JSONDecodeError as e: logecho(f"JSON parsing error on spatial_full: {e}, value: {spatial_full}",'error') @@ -543,7 +540,7 @@ def patch_fn_set_spatial_data(ctx,dataset,data): if test_run: return False - remote.action.package_patch( id=dataset.get("id"), spatial_simp=spatial_simp, spatial_full=spatial_full ) + remote.action.package_patch( id=dataset.get("id"), spatial_extent=spatial_extent, spatial_full=spatial_full ) except Exception as e: if str(e) == 'Not found': @@ -673,33 +670,36 @@ def restore_spatial(ctx, patch_file, confirm_each): run_patch = True - if 'gazetteer' in dataset: - - spatial_full = dataset['gazetteer'].get('spatial_full', None) - spatial_simp = dataset['gazetteer'].get('spatial_simp', None) - - if spatial_full != None or spatial_simp != None: - - logecho( "Spatial data found for dataset \"{}\"".format(dataset['name']), "info" ) - - if confirm_all: - if click.confirm("🟢 Proceed to patch dataset \"{}\"? ".format(dataset['name']), abort=False, default=True): - run_patch = True - else: - logecho( "Patch cancelled", "warning" ) - run_patch = False - - if run_patch: - if patch_fn_set_spatial_data( ctx, dataset, dataset.get('gazetteer', None)): - logecho( "... patched", "info" ) - else: - logecho( "Error patching dataset \"{}\"".format(dataset['name']), "info" ) - - else: - logecho( "No spatial data found for \"{}\"".format(dataset['name']), "info" ) + spatial_full = None + spatial_extent = None + if not spatial_extent: + spatial_extent = dataset.get('spatial_extent') + if not spatial_full: + spatial_full = dataset.get('spatial_full') + + if spatial_full != None or spatial_extent != None: + + logecho( "Spatial data found for dataset \"{}\"".format(dataset['name']), "info" ) + + if confirm_all: + if click.confirm("🟢 Proceed to patch dataset \"{}\"? ".format(dataset['name']), abort=False, default=True): + run_patch = True + else: + logecho( "Patch cancelled", "warning" ) + run_patch = False + + if run_patch: + if patch_fn_set_spatial_data( ctx, dataset, { + "spatial_extent": spatial_extent, + "spatial_full": spatial_full + }): + logecho( "... patched", "info" ) + else: + logecho( "Error patching dataset \"{}\"".format(dataset['name']), "info" ) else: - logecho( "No gazetteer attribute found for \"{}\"".format(dataset['name']), "info" ) + logecho( "No spatial data found for \"{}\"".format(dataset['name']), "info" ) + @twdhcli.command() @click.option('--new-size', @@ -760,10 +760,12 @@ def update_spatial_simp(ctx, new_size, ids, confirm_each, allow_enlarge, skip_sn return for dataset in datasets: - gazetteer = dataset.get("gazetteer", {}) - if 'spatial_full' in gazetteer and gazetteer['spatial_full'] != None: - if not allow_enlarge and len(dataset["gazetteer"]["spatial_simp"].encode('utf-8')) < new_size: - logecho( "+ {} ({}) spatial_simp = {} already less than {}".format(dataset.get("title"),dataset.get("id"),len(dataset["gazetteer"]["spatial_simp"].encode('utf-8')),new_size), 'info') + spatial_full = dataset.get("spatial_full") + spatial_extent = dataset.get("spatial_extent") + + if spatial_full: + if not allow_enlarge and spatial_extent and len(spatial_extent.encode('utf-8')) < new_size: + logecho( "+ {} ({}) spatial_simp = {} already less than {}".format(dataset.get("title"),dataset.get("id"),len(spatial_extent.encode('utf-8')),new_size), 'info') else: logecho( "About to patch {} ({})".format(dataset.get("title"),dataset.get("id")), 'info') @@ -774,14 +776,15 @@ def update_spatial_simp(ctx, new_size, ids, confirm_each, allow_enlarge, skip_sn logecho( "Update cancelled", "warning" ) continue try: - if len(dataset["gazetteer"]["spatial_full"].encode('utf-8')) < new_size: - logecho( " {} ({}) spatial_full = {} already less than {}, setting spatial_simp = spatial_full".format(dataset.get("title"),dataset.get("id"),len(dataset["gazetteer"]["spatial_simp"].encode('utf-8')),new_size), 'info') - gazetteer['spatial_simp'] = gazetteer['spatial_full'] + if len(spatial_full.encode('utf-8')) < new_size: + new_spatial_extent = spatial_full else: - #logecho( " updating {} ({})".format(dataset.get("title"),dataset.get("id")), 'info') - gazetteer['spatial_simp'] = h.simplify_geojson_by_size(ctx,gazetteer['spatial_full'],new_size) + new_spatial_extent = h.simplify_geojson_by_size(spatial_full, new_size) - if patch_fn_set_spatial_data(ctx,dataset,gazetteer): + if patch_fn_set_spatial_data(ctx,dataset,{ + "spatial_extent": new_spatial_extent, + "spatial_full": spatial_full + }): logecho( "Updated spatial_simp on dataset \"{}\"".format(dataset['name']), "info" ) else: logecho( "Error updating spatial_simp on dataset \"{}\"".format(dataset['name']), "info" ) From f669c569fe73331e4bc1032fdeff0d07900670a1 Mon Sep 17 00:00:00 2001 From: Ben Bright Date: Mon, 27 Apr 2026 10:53:10 -0500 Subject: [PATCH 2/4] Robustification Added check for API key that doesn't validate to prevent hidden failures in snapshot command Added list-patch-functions command --- helpers.py | 39 ++++++++++++++++++++++++++++++++++----- twdhcli.py | 26 +++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/helpers.py b/helpers.py index b279cf7..786dd51 100644 --- a/helpers.py +++ b/helpers.py @@ -3,6 +3,7 @@ import csv import json import subprocess +import traceback from datetime import datetime, date @@ -14,6 +15,21 @@ from shapely.ops import unary_union +def apikey_validates(ctx,apikey): + + twdh = ctx.obj['twdh'] + logecho = ctx.obj['logecho'] + + try: + results = twdh.action.user_list() + logecho('API Key test passed', level='info') + return True + except Exception as e: + logecho('API Key is not valid ', level='error') + #print(traceback.format_exc()) + sys.exit(1) + + def snapshot(ctx,dest): twdh = ctx.obj['twdh'] @@ -142,25 +158,38 @@ def snapshot(ctx,dest): obj_file = '{}/{}.jsonl'.format(snap_dest, obj_type) try: + """ command = "ckanapi dump {obj_type} --apikey={apikey} --all -O {obj_file} -r {url}".format( \ obj_type=obj_type, \ apikey=twdh.apikey, \ obj_file=obj_file, \ url=twdh.address \ ) + """ + + command = [ + "ckanapi", + "dump", "{obj_type}".format(obj_type=obj_type), + "--apikey={apikey}".format(apikey=twdh.apikey), + "--all", + "-O", "{obj_file}".format(obj_file=obj_file), + "-r", "{url}".format(url=twdh.address) + ] + + #breakpoint() #logecho( command, 'info' ) logecho( 'Dumping {}...\n'.format(obj_type), 'info' ) - output = subprocess.getoutput(command) - logecho( output, 'info' ) + subprocess.check_call(command) logecho( 'Created snapshot file: {}'.format(obj_file), 'info' ) - except FileNotFoundError: - logecho( "Unable to write JSONL / Destination not found error", 'error' ) - sys.exit(1) + #except FileNotFoundError: + #logecho( "Unable to write JSONL / Destination not found error", 'error' ) + #sys.exit(1) except Exception as e: logecho( "An error occurred: {}".format(e), 'error' ) + print(traceback.format_exc()) sys.exit(1) diff --git a/twdhcli.py b/twdhcli.py index aba1f0f..8c7bc9b 100644 --- a/twdhcli.py +++ b/twdhcli.py @@ -146,6 +146,7 @@ def logecho(message, level='info'): if apikey == None: logecho("Cannot continue: --apikey parameter not set and APIKEY not found in .env.secrets","error") exit(1) + logecho("apikey set", "detail") if host == None: @@ -169,6 +170,11 @@ def logecho(message, level='info'): ctx.obj['logecho'] = logecho ctx.obj['test_run'] = test_run + if not h.apikey_validates(ctx,apikey): + logecho("Cannot continue: --apikey parameter value is not a valid key","error") + exit(1) + + @twdhcli.command() @click.option('--dest', type=click.Path(), @@ -183,6 +189,24 @@ def snapshot(ctx,dest): h.snapshot(ctx,dest) +@twdhcli.command() +@click.pass_context +def list_patch_functions(ctx ): + """ + Patch datasets + """ + + twdh = ctx.obj['twdh'] + logecho = ctx.obj['logecho'] + test_run = ctx.obj['test_run'] + + patch_fn_dict = get_patch_functions() + + for patch_fn in patch_fn_dict: + logecho( "{}".format(patch_fn), "info" ) + + + @twdhcli.command() @click.option('--patch-fn', required=True, @@ -528,7 +552,7 @@ def patch_fn_set_spatial_data(ctx,dataset,data): json.loads(spatial_extent) except json.JSONDecodeError as e: - logecho(f"JSON parsing error on spatial_simp: {e}, value: {spatial_extent}", 'error') + logecho(f"JSON parsing error on spatial_extent: {e}, value: {spatial_extent}", 'error') try: json.loads(spatial_full) From e6d50bc3ada36612d83a21ee1dd21a6a827325ac Mon Sep 17 00:00:00 2001 From: Ben Bright Date: Mon, 27 Apr 2026 13:08:43 -0500 Subject: [PATCH 3/4] Added `migrate_spatial` command --- twdhcli.py | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/twdhcli.py b/twdhcli.py index 8c7bc9b..7c91612 100644 --- a/twdhcli.py +++ b/twdhcli.py @@ -644,6 +644,85 @@ def patch_fn_set_app_email(ctx,dataset,data): return True +@twdhcli.command() +@click.option('--patch-file', + required=True, + default=None, + help='JSON file containing patch data') +@click.option('--confirm-each', + default=False, + is_flag=True, + help='Confirm each patch operation instead of just once at the start') +@click.pass_context +def migrate_spatial(ctx, patch_file, confirm_each): + """ + RestoreMigrate spatial data to remove old GZTR model and use new model with dedicated table for `spatial_full` and move `gazettteer.spatial_simp` to `spatial_extent` + """ + + twdh = ctx.obj['twdh'] + logecho = ctx.obj['logecho'] + + try: + with open(patch_file, "r") as file: + patch_data = json.load(file) + except FileNotFoundError: + logecho("Error: The file was not found.", 'error') + sys.exit(1) + except json.JSONDecodeError as e: + logecho(f"Error: Could not decode JSON from '{patch_file}'. Check if the file contains valid JSON.", 'error') + logecho( f"{e}", 'error' ) + sys.exit(1) + except Exception as e: + logecho(f"An unexpected error occurred: {e}", 'error') + sys.exit(1) + logecho( "Restoring spatial data from {} ...".format(patch_file), "info" ) + + if not confirm_each: + logecho( "Hint: Use --confirm-each if you want to confirm one at a time", "note" ) + if click.confirm('🟢 Proceed with all patches from {}? '.format(patch_file)): + logecho( "Proceeding with patches ...", "info" ) + else: + logecho( "Operation cancelled", "warning" ) + sys.exit(0) + confirm_all = False + else: + confirm_all = True + + for dataset in patch_data['results']: + + logecho( "", "divider" ) + + run_patch = True + + if 'gazetteer' in dataset: + + old_spatial_full = dataset['gazetteer'].get('spatial_full', None) + old_spatial_simp = dataset['gazetteer'].get('spatial_simp', None) + + if old_spatial_full != None or old_spatial_simp != None: + + logecho( "Spatial data found for dataset \"{}\"".format(dataset['name']), "info" ) + + if confirm_all: + if click.confirm("🟢 Proceed to patch dataset \"{}\"? ".format(dataset['name']), abort=False, default=True): + run_patch = True + else: + logecho( "Patch cancelled", "warning" ) + run_patch = False + + if run_patch: + if patch_fn_set_spatial_data( ctx, dataset, { + "spatial_extent": old_spatial_simp, + "spatial_full": old_spatial_full + }): + logecho( "... patched", "info" ) + else: + logecho( "Error patching dataset \"{}\"".format(dataset['name']), "info" ) + + else: + logecho( "No spatial data found for \"{}\"".format(dataset['name']), "info" ) + + @twdhcli.command() @click.option('--patch-file', required=True, From e4c1e9dd1fcec8d55cc3881db29c00bc2155d5de Mon Sep 17 00:00:00 2001 From: Ben Bright Date: Wed, 29 Apr 2026 08:57:16 -0500 Subject: [PATCH 4/4] Added ids filter to spatial_migrate() --- twdhcli.py | 66 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/twdhcli.py b/twdhcli.py index 7c91612..e9a518d 100644 --- a/twdhcli.py +++ b/twdhcli.py @@ -545,6 +545,7 @@ def patch_fn_set_spatial_data(ctx,dataset,data): logecho = ctx.obj['logecho'] test_run = ctx.obj['test_run'] + breakpoint() spatial_extent = data.get('spatial_extent') or data.get('spatial_simp', '{}') spatial_full = data.get('spatial_full', '{}') @@ -653,8 +654,12 @@ def patch_fn_set_app_email(ctx,dataset,data): default=False, is_flag=True, help='Confirm each patch operation instead of just once at the start') +@click.option('--ids', + required=False, + default=None, + help='Space-separated list of dataset ids to patch') @click.pass_context -def migrate_spatial(ctx, patch_file, confirm_each): +def migrate_spatial(ctx, patch_file, confirm_each, ids): """ RestoreMigrate spatial data to remove old GZTR model and use new model with dedicated table for `spatial_full` and move `gazettteer.spatial_simp` to `spatial_extent` """ @@ -677,6 +682,12 @@ def migrate_spatial(ctx, patch_file, confirm_each): sys.exit(1) logecho( "Restoring spatial data from {} ...".format(patch_file), "info" ) + if ids is not None: + dataset_filter = ids.split(' ') + logecho( "Limiting migration to the following datasets: {}".format(ids), 'info' ) + else: + dataset_filter = [] + if not confirm_each: logecho( "Hint: Use --confirm-each if you want to confirm one at a time", "note" ) if click.confirm('🟢 Proceed with all patches from {}? '.format(patch_file)): @@ -692,36 +703,43 @@ def migrate_spatial(ctx, patch_file, confirm_each): logecho( "", "divider" ) - run_patch = True + if len(dataset_filter) == 0 or dataset.get('id') in dataset_filter or dataset.get('name') in dataset_filter: + logecho( "Migrating {}".format( dataset['name'] ), 'info' ) - if 'gazetteer' in dataset: + run_patch = True - old_spatial_full = dataset['gazetteer'].get('spatial_full', None) - old_spatial_simp = dataset['gazetteer'].get('spatial_simp', None) + if 'gazetteer' in dataset: - if old_spatial_full != None or old_spatial_simp != None: - - logecho( "Spatial data found for dataset \"{}\"".format(dataset['name']), "info" ) + old_spatial_full = dataset['gazetteer'].get('spatial_full', None) + old_spatial_simp = dataset['gazetteer'].get('spatial_simp', None) - if confirm_all: - if click.confirm("🟢 Proceed to patch dataset \"{}\"? ".format(dataset['name']), abort=False, default=True): - run_patch = True - else: - logecho( "Patch cancelled", "warning" ) - run_patch = False - - if run_patch: - if patch_fn_set_spatial_data( ctx, dataset, { - "spatial_extent": old_spatial_simp, - "spatial_full": old_spatial_full - }): - logecho( "... patched", "info" ) - else: - logecho( "Error patching dataset \"{}\"".format(dataset['name']), "info" ) + if old_spatial_full != None or old_spatial_simp != None: + + logecho( "Spatial data found for dataset \"{}\"".format(dataset['name']), "info" ) + + if confirm_all: + if click.confirm("🟢 Proceed to patch dataset \"{}\"? ".format(dataset['name']), abort=False, default=True): + run_patch = True + else: + logecho( "Patch cancelled", "warning" ) + run_patch = False + + if run_patch: + if patch_fn_set_spatial_data( ctx, dataset, { + "spatial_simp": old_spatial_simp, + "spatial_full": old_spatial_full + }): + logecho( "... patched", "info" ) + else: + logecho( "Error patching dataset \"{}\"".format(dataset['name']), "info" ) + else: + logecho( "No spatial data found in gazetteer attribute for \"{}\"".format(dataset['name']), "info" ) else: - logecho( "No spatial data found for \"{}\"".format(dataset['name']), "info" ) + logecho( "No gazetteer attribute found", "info" ) + else: + logecho( "Skipping because not found in filter: \"{}\"".format(dataset['name']), "info" ) @twdhcli.command() @click.option('--patch-file',