diff --git a/tagbase_server/tagbase_server/test/test_ingest.py b/tagbase_server/tagbase_server/test/test_ingest.py index 68e5e04..fa1eb1e 100644 --- a/tagbase_server/tagbase_server/test/test_ingest.py +++ b/tagbase_server/tagbase_server/test/test_ingest.py @@ -92,6 +92,68 @@ def test_get_dataset_id(self, mock_connect): tag_id = pu.get_tag_id(cur, 1) assert tag_id, "1" + @mock.patch("psycopg2.connect") + def test_is_only_metadata_change(self, mock_connect): + metadata_hash_stored = ["some_hash"] + file_md_hash = "some_other_hash" + # result of psycopg2.connect(**connection_stuff) + mock_con = mock_connect.return_value + # result of con.cursor(cursor_factory=DictCursor) + mock_cur = mock_con.cursor.return_value + # return this when calling cur.fetchall() + mock_cur.fetchall.return_value = metadata_hash_stored + conn = psycopg2.connect( + dbname="test", + user="test", + host="localhost", + port="32780", + password="test", + ) + cur = conn.cursor() + + # if the method returns anything means that metadata found is different + is_only_metadata_change = pu.is_only_metadata_change( + cur, metadata_hash_stored[0], file_md_hash + ) + assert is_only_metadata_change, True + + # no different metadata found + mock_cur.fetchall.return_value = None + is_only_metadata_change = pu.is_only_metadata_change( + cur, metadata_hash_stored[0], file_md_hash + ) + assert is_only_metadata_change, False + + @mock.patch("psycopg2.connect") + def test_update_submission_metadata(self, mock_connect): + submission_id = 1 + metadata_attributes = [ + (submission_id, "instrument_name", "some_instrument"), + (submission_id, "model", "some_model"), + ] + # result of psycopg2.connect(**connection_stuff) + mock_con = mock_connect.return_value + # result of con.cursor(cursor_factory=DictCursor) + mock_cur = mock_con.cursor.return_value + # return this when calling cur.fetchall() + mock_cur.fetchall.return_value = metadata_attributes + + conn = psycopg2.connect( + dbname="test", + user="test", + host="localhost", + port="32780", + password="test", + ) + cur = conn.cursor() + tag_id = 1 + dataset_id = 1 + metadata_hash = "some_hash" + + pu.update_submission_metadata( + cur, tag_id, metadata_attributes, submission_id, dataset_id, metadata_hash + ) + @mock.patch("psycopg2.connect") def test_processing_file_metadata_with_existing_attributes(self, mock_connect): metadata_attribs_in_db = [[1, "instrument_name"], [2, "model"]] diff --git a/tagbase_server/tagbase_server/utils/processing_utils.py b/tagbase_server/tagbase_server/utils/processing_utils.py index 57fa4c7..d61a06f 100644 --- a/tagbase_server/tagbase_server/utils/processing_utils.py +++ b/tagbase_server/tagbase_server/utils/processing_utils.py @@ -262,13 +262,13 @@ def get_dataset_properties(submission_filename): ) -def is_only_metadata_change(cursor, metadata_hash, file_content_hash): +def is_only_metadata_change(cursor, metadata_hash, file_data_hash): logger.debug("Detecting metadata submitted...") cursor.execute( "SELECT md_sha256 FROM submission WHERE md_sha256 <> %s AND data_sha256 = %s ", ( metadata_hash, - file_content_hash, + file_data_hash, ), ) db_results = cursor.fetchone() @@ -312,27 +312,30 @@ def update_submission_metadata( ): # update submission information current_time = dt.now(tz=pytz.utc).astimezone(get_localzone()) - cur.execute( - "UPDATE submission SET md_sha256 = '{}', date_time = '{}'" - " WHERE tag_id = {} AND dataset_id = {} AND submission_id = {}".format( + update_submission_info_query = ( + "UPDATE submission SET md_sha256 = '{}', date_time = '{}' " + "WHERE tag_id = {} AND dataset_id = {} AND submission_id = {}".format( metadata_hash, current_time, tag_id, dataset_id, submission_id ) ) + cur.execute(update_submission_info_query) logger.info( "Submission_id=%s updated with metadata hash=%s", submission_id, metadata_hash ) - # update metadata attributes - for x in metadata: - submission_id = x[0] - attribute_id = x[1] - attribute_value = x[2] - attribute_value = str(attribute_value).strip('"') - cur.execute( - "UPDATE metadata SET attribute_value = '{}' WHERE submission_id = {} AND tag_id = {} AND attribute_id = {}".format( - attribute_value, submission_id, tag_id, attribute_id - ) + # delete previous metadata since we are going to override it + delete_md_query = ( + "DELETE FROM metadata WHERE submission_id = {} AND tag_id = {}".format( + submission_id, tag_id ) + ) + cur.execute(delete_md_query) + logger.debug( + "Removed old metadata from submission_id=%s tag_id=%s", submission_id, tag_id + ) + + # insert new metadata + insert_metadata(cur, metadata, submission_id) logger.info("Updated metadata attributes: %s", metadata) @@ -347,7 +350,6 @@ def process_etuff_file(file, version=None, notes=None): conn = connect() conn.autocommit = True - # TODO we should read the file once and return the hashes we need (metadata/content/entire-file) ( instrument_name, serial_number, @@ -359,11 +361,14 @@ def process_etuff_file(file, version=None, notes=None): number_global_attributes_lines, ) = get_dataset_properties(submission_filename) content_hash = make_hash_sha256(file_content) - logger.debug("Content Hash: %s", content_hash) metadata_hash = make_hash_sha256(metadata_content) - logger.debug("MD Hash: %s", metadata_hash) entire_file_hash = compute_file_sha256(submission_filename) - logger.debug("File Hash: %s", entire_file_hash) + logger.debug( + "Content Hash: %s\tMetadata Hash: %s\tFile Hash: %s", + content_hash, + metadata_hash, + entire_file_hash, + ) with conn: with conn.cursor() as cur: @@ -415,17 +420,11 @@ def process_etuff_file(file, version=None, notes=None): ) return 1 + # at this point we have already read form the file all global attribute lines proc_obs = [] variable_lookup = {} - # at this point we have already read form the file all global attribute lines - # line_counter = number_global_attributes_lines - # # TODO we should use the 'content' variable in the following s_time = time.perf_counter() - # with open(file, "rb") as data: - # lines = [line.decode("utf-8", "ignore") for line in data.readlines()] - # lines_length = len(lines) - num_lines_content = len(file_content) logger.debug( "len number_global_atttributes_lines: '%s' len lines_length: '%s'",