From 08540bcc8c2476138444bfc789386bdb49b908c3 Mon Sep 17 00:00:00 2001 From: Ektoras Patrikios Date: Sun, 19 Feb 2023 01:36:22 +0200 Subject: [PATCH 1/3] Issue 1 completed, not/or/and/between --- miniDB/database.py | 3 +- miniDB/misc.py | 21 ++++++++++- miniDB/table.py | 94 ++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 112 insertions(+), 6 deletions(-) diff --git a/miniDB/database.py b/miniDB/database.py index a3ac6be7..0c8f5da2 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -358,7 +358,8 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ return table_name._select_where(columns, condition, distinct, order_by, desc, limit) if condition is not None: - condition_column = split_condition(condition)[0] + if split_condition(condition) is not None: + condition_column = split_condition(condition)[0] else: condition_column = '' diff --git a/miniDB/misc.py b/miniDB/misc.py index aefada74..feb389ff 100644 --- a/miniDB/misc.py +++ b/miniDB/misc.py @@ -27,13 +27,30 @@ def split_condition(condition): if len(splt)>1: left, right = splt[0].strip(), splt[1].strip() + ''' + Here, we check if an operator exists more than 2 times, so we break it in different pieces, else + code breaks and errors pop up. So now, we have a fixed version and can use in a condition + the same operator multiple times. + ''' + if condition.count(op_key) > 1: + new_cond = condition.split() + new_cond_index = new_cond.index(op_key) + + new_cond1 = new_cond[:new_cond_index] + new_cond2 = new_cond[new_cond_index + 1:] + new_cond1 = ' '.join(new_cond1) + new_cond2 = ' '.join(new_cond2) + + left = new_cond1 + right = new_cond2 + if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them. right = right.strip('"') elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw. raise ValueError(f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.') - if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones) - raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.') + # if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones) + # raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.') return left, op_key, right diff --git a/miniDB/table.py b/miniDB/table.py index f5c7d937..7c8b49bf 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -233,9 +233,16 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by # if condition is None, return all rows # if not, return the rows with values where condition is met for value if condition is not None: - column_name, operator, value = self._parse_condition(condition) - column = self.column_by_name(column_name) - rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] + # check the condition status (not,between,or,and) + condition = self.Check_condition(condition, return_cols) + + # check the type of the returned value of the condition. If it's a list , the rows are ready. + if isinstance(condition, list): + rows = condition + else: + column_name, operator, value = self._parse_condition(condition) + column = self.column_by_name(column_name) + rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] else: rows = [i for i in range(len(self.data))] @@ -269,6 +276,86 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by return s_table + # Check Condition, in general + def Check_condition(self, condition, cols): + new_condition = condition + if new_condition.startswith("not "): + new_condition = self.Check_NotCondition(condition, cols) + elif " between " in new_condition: + new_condition = self.Check_BetweenCondition(condition, cols); + elif " and " in new_condition: + new_condition = self.Check_AndCondition(condition, cols) + elif " or " in new_condition: + new_condition = self.Check_OrCondition(condition, cols); + + return new_condition + + # Target is to find the operator and reverse it for the `not` condition + def Check_NotCondition(self, condition, cols): + new_condition = condition.replace("not","") + + if "<=" in new_condition: + new_condition = new_condition.replace("<=", ">") + elif ">=" in new_condition: + new_condition = new_condition.replace(">=", "<") + elif "<" in new_condition: + new_condition = new_condition.replace("<", ">=") + elif ">" in new_condition: + new_condition = new_condition.replace(">", "<=") + + return new_condition + + # Target is to find column name and the values between the `and` and then make a new custom condition. + # The new condition follows the `and` condition. + def Check_BetweenCondition(self, condition, cols): + new_condition = condition.replace("between", "") + new_condition = new_condition.replace("and", "") + new_condition = new_condition.split() + + new_condition = new_condition[0] + " >= " + new_condition[1] + " and " + new_condition[0] + "<=" + new_condition[2] + new_condition = self.Check_AndCondition(new_condition, cols) + return new_condition + + # In this condition (`and`), we need to get the common values from the lists created in this. Values between `and` + # could be 2+, so we need to loop through all the possible values. + # in the end a list with all values remaining. + def Check_AndCondition(self, condition, cols): + new_condition = condition.split(" and ") + + final_result = [] + counter = 0 + for x in new_condition: + current_results = self.GetResults_Condition(x, cols) + if counter == 0: + final_result = current_results + counter += 1 + final_result = list(set(final_result)&set(current_results)) + + new_condition = final_result + return new_condition + + # In this condition (`or`), we need to get the all the values from the lists created in this. Values between `or` + # could be 2+, so we need to loop through all the possible values. + # in the end a list with all values remaining. + def Check_OrCondition(self, condition, cols): + new_condition = condition.split(" or ") + + final_result = [] + for x in new_condition: + current_results = self.GetResults_Condition(x, cols) + final_result += current_results + + new_condition = list(set(final_result)) + return new_condition + + # Get a condition as parameter and return the rows + def GetResults_Condition(self, condition, cols): + column_name, operator, value = self._parse_condition(condition) + column = self.column_by_name(column_name) + rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)] + + return rows + def _select_where_with_btree(self, return_columns, bt, condition, distinct=False, order_by=None, desc=True, limit=None): @@ -558,6 +645,7 @@ def _parse_condition(self, condition, join=False): # cast the value with the specified column's type and return the column name, the operator and the casted value left, op, right = split_condition(condition) + if left not in self.column_names: raise ValueError(f'Condition is not valid (cant find column name)') coltype = self.column_types[self.column_names.index(left)] From 775a0b8b8a141fa873fde91ec0891f827ab9da64 Mon Sep 17 00:00:00 2001 From: Ektoras Patrikios Date: Mon, 20 Feb 2023 21:33:05 +0200 Subject: [PATCH 2/3] CREATE TABLE enriched with UNIQUE declaration. BTree index over unique(non-pk) columns is now supported! --- mdb.py | 8 ++++++ miniDB/database.py | 65 +++++++++++++++++++++++++++++++++++----------- miniDB/joins.py | 2 +- miniDB/table.py | 15 +++++++++-- 4 files changed, 72 insertions(+), 18 deletions(-) diff --git a/mdb.py b/mdb.py index a981e5be..9fd87daf 100644 --- a/mdb.py +++ b/mdb.py @@ -97,6 +97,7 @@ def create_query_plan(query, keywords, action): args = dic['create table'][dic['create table'].index('('):dic['create table'].index(')')+1] dic['create table'] = dic['create table'].removesuffix(args).strip() arg_nopk = args.replace('primary key', '')[1:-1] + arg_nopk = arg_nopk.replace('unique', '')[1:-1] arglist = [val.strip().split(' ') for val in arg_nopk.split(',')] dic['column_names'] = ','.join([val[0] for val in arglist]) dic['column_types'] = ','.join([val[1] for val in arglist]) @@ -105,6 +106,13 @@ def create_query_plan(query, keywords, action): dic['primary key'] = arglist[arglist.index('primary')-2] else: dic['primary key'] = None + + # CREATING ENVIROMENT FOR UNIQUE (INCLUDE `UNIQUE` IN DIC) + if 'unique' in args: + arglist = args[1:-1].split(' ') + dic['unique'] = arglist[arglist.index('unique,') - 2] + else: + dic['unique'] = None if action=='import': dic = {'import table' if key=='import' else key: val for key, val in dic.items()} diff --git a/miniDB/database.py b/miniDB/database.py index 0c8f5da2..89d99344 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -101,7 +101,7 @@ def _update(self): self._update_meta_insert_stack() - def create_table(self, name, column_names, column_types, primary_key=None, load=None): + def create_table(self, name, column_names, column_types, primary_key=None, unique=None, load=None): ''' This method create a new table. This table is saved and can be accessed via db_object.tables['table_name'] or db_object.table_name @@ -109,13 +109,14 @@ def create_table(self, name, column_names, column_types, primary_key=None, load= name: string. Name of table. column_names: list. Names of columns. column_types: list. Types of columns. + unique: string. Unique column(if it exists). primary_key: string. The primary key (if it exists). load: boolean. Defines table object parameters as the name of the table and the column names. ''' # print('here -> ', column_names.split(',')) - self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, load=load)}) + self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, unique=unique, load=load)}) # self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load) - # check that new dynamic var doesnt exist already + # check that new dynamic var doesn't exist already # self.no_of_tables += 1 self._update() self.save_database() @@ -352,7 +353,6 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ distinct: boolean. If True, the resulting table will contain only unique rows. ''' - # print(table_name) self.load_database() if isinstance(table_name,Table): return table_name._select_where(columns, condition, distinct, order_by, desc, limit) @@ -367,6 +367,8 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ # self.lock_table(table_name, mode='x') if self.is_locked(table_name): return + + if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]: index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0] bt = self._load_idx(index_name) @@ -660,14 +662,15 @@ def create_index(self, index_name, table_name, index_type='btree'): table_name: string. Table name (must be part of database). index_name: string. Name of the created index. ''' - if self.tables[table_name].pk_idx is None: # if no primary key, no index - raise Exception('Cannot create index. Table has no primary key.') + + # if self.tables[table_name].pk_idx is None: # if no primary key, no index + # raise Exception('Cannot create index. Table has no primary key.') if index_name not in self.tables['meta_indexes'].column_by_name('index_name'): # currently only btree is supported. This can be changed by adding another if. if index_type=='btree': - logging.info('Creating Btree index.') + # logging.info('Creating Btree index.') # insert a record with the name of the index and the table on which it's created to the meta_indexes table - self.tables['meta_indexes']._insert([table_name, index_name]) + # self.tables['meta_indexes']._insert([table_name, index_name]) # crate the actual index self._construct_index(table_name, index_name) self.save_database() @@ -681,16 +684,48 @@ def _construct_index(self, table_name, index_name): Args: table_name: string. Table name (must be part of database). index_name: string. Name of the created index. + + Check if a column was specified`table_name(column_name)` + + Actions: + If column is not specified, raise exception + We want both table_name and column_name, to check for PK or Unique column + if one of two exists then btree index is valid ''' bt = Btree(3) # 3 is arbitrary - # for each record in the primary key of the table, insert its value and index to the btree - for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)): - if key is None: - continue - bt.insert(key, idx) - # save the btree - self._save_index(index_name, bt) + if "(" in table_name: + brackett = table_name.split("(") + table_name = brackett[0].strip() + column_name = brackett[1].replace(")", "").strip() + table_to_check_all, table_to_check = None, None + + if self.tables[table_name].pk is not None: + if self.tables[table_name].pk == column_name: + table_to_check_all = self.tables[table_name].column_by_name(self.tables[table_name].pk) + table_to_check = list(set(self.tables[table_name].column_by_name(self.tables[table_name].pk))) + if self.tables[table_name].unique is not None: + if self.tables[table_name].unique == column_name: + table_to_check_all = self.tables[table_name].column_by_name(self.tables[table_name].unique) + table_to_check = list(set(self.tables[table_name].column_by_name(self.tables[table_name].unique))) + + if table_to_check_all is not None: + if len(table_to_check_all) == len(table_to_check): + self.tables['meta_indexes']._insert([table_name, index_name]) + + # for each record in the primary key of the table, insert its value and index to the btree + for idx, key in enumerate(table_to_check): + if key is None: + continue + bt.insert(key, idx) + + # save the btree + print("Btree index", index_name, "created successfully!") + self._save_index(index_name, bt) + else: + raise Exception('Cannot create index. Column is not PK or UNIQUE!') + else: + raise Exception('Cannot create index. Column Name was not specified!') def _has_index(self, table_name): diff --git a/miniDB/joins.py b/miniDB/joins.py index 81fd0915..efc63e96 100644 --- a/miniDB/joins.py +++ b/miniDB/joins.py @@ -156,7 +156,7 @@ def join(self): join_table_name = '' join_table_colnames = left_names + right_names join_table_coltypes = self.left_table.column_types + self.right_table.column_types - join_table = Table(name=join_table_name, column_names=join_table_colnames, column_types= join_table_coltypes) + join_table = Table(name=join_table_name, column_names=join_table_colnames, column_types=join_table_coltypes) # Save merged file first. The hypothesis is that the RAM cannot fit the file, thus we have it saved # However we load the file to display it like this, might need to be changed in the future diff --git a/miniDB/table.py b/miniDB/table.py index 7c8b49bf..969110b2 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -26,7 +26,7 @@ class Table: - a dictionary that includes the appropriate info (all the attributes in __init__) ''' - def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, load=None): + def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, unique=None, load=None): if load is not None: # if load is a dict, replace the object dict with it (replaces the object with the specified one) @@ -67,7 +67,14 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key= else: self.pk_idx = None + if unique is not None: + print("unique_idx", unique, self.column_names.index(unique)) + self.unique_idx = self.column_names.index(unique) + else: + self.unique_idx = None + self.pk = primary_key + self.unique = unique # self._update() # if any of the name, columns_names and column types are none. return an empty table object @@ -465,7 +472,7 @@ class CustomFailException(Exception): join_table_name = '' join_table_colnames = left_names+right_names join_table_coltypes = self.column_types+table_right.column_types - join_table = Table(name=join_table_name, column_names=join_table_colnames, column_types= join_table_coltypes) + join_table = Table(name=join_table_name, column_names=join_table_colnames, column_types=join_table_coltypes) return join_table, column_index_left, column_index_right, operator @@ -620,6 +627,10 @@ def show(self, no_of_rows=None, is_locked=False): if self.pk_idx is not None: # table has a primary key, add PK next to the appropriate column headers[self.pk_idx] = headers[self.pk_idx]+' #PK#' + if self.unique_idx is not None: + # table has unique column, add UNIQUE next to the appropriate column + headers[self.unique_idx] = headers[self.unique_idx] + ' #UNIQUE#' + # detect the rows that are no tfull of nones (these rows have been deleted) # if we dont skip these rows, the returning table has empty rows at the deleted positions non_none_rows = [row for row in self.data if any(row)] From 7c0b5294b357b767ca4dc551066482a17ddb5535 Mon Sep 17 00:00:00 2001 From: kostasbekos Date: Mon, 20 Feb 2023 23:06:13 +0200 Subject: [PATCH 3/3] Added some comments + fixes for CREATE INDEX --- miniDB/database.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/miniDB/database.py b/miniDB/database.py index 89d99344..5f3dd5c2 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -698,30 +698,31 @@ def _construct_index(self, table_name, index_name): brackett = table_name.split("(") table_name = brackett[0].strip() column_name = brackett[1].replace(")", "").strip() - table_to_check_all, table_to_check = None, None + table_to_check_all = None if self.tables[table_name].pk is not None: + # check if column with pk is equal to column specified if self.tables[table_name].pk == column_name: table_to_check_all = self.tables[table_name].column_by_name(self.tables[table_name].pk) - table_to_check = list(set(self.tables[table_name].column_by_name(self.tables[table_name].pk))) + if self.tables[table_name].unique is not None: + # check if unique column with pk is equal to column specified if self.tables[table_name].unique == column_name: table_to_check_all = self.tables[table_name].column_by_name(self.tables[table_name].unique) - table_to_check = list(set(self.tables[table_name].column_by_name(self.tables[table_name].unique))) + if table_to_check_all is not None: - if len(table_to_check_all) == len(table_to_check): - self.tables['meta_indexes']._insert([table_name, index_name]) + self.tables['meta_indexes']._insert([table_name, index_name]) - # for each record in the primary key of the table, insert its value and index to the btree - for idx, key in enumerate(table_to_check): - if key is None: - continue + # for each record in the primary key of the table, insert its value and index to the btree + for idx, key in enumerate(table_to_check_all): + if key is None: + continue bt.insert(key, idx) - # save the btree - print("Btree index", index_name, "created successfully!") - self._save_index(index_name, bt) + # save the btree + print("Btree index", index_name, "created successfully!") + self._save_index(index_name, bt) else: raise Exception('Cannot create index. Column is not PK or UNIQUE!') else: