Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
23 changes: 0 additions & 23 deletions .autopilot.json

This file was deleted.

32 changes: 0 additions & 32 deletions .cursor/rules/after_each_chat.mdc

This file was deleted.

2 changes: 1 addition & 1 deletion ADFAR/src/delete_folder.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash

# 设置目标路径
base_dir="/projects/p32013/DNABERT-meta/ADFAR/src/experiments/GUE"
base_dir="ADFAR/src/experiments/GUE"

# 遍历 base_dir 下所有子文件夹
for dir in "$base_dir"/*/; do
Expand Down
2 changes: 1 addition & 1 deletion ADFAR/src/generate_freq.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def generate_subword_freq(data_dir):


# Generate frequencies for GUE datasets
gue_dir = "/projects/p32013/DNABERT-meta/GUE"
gue_dir = "GUE"
generate_subword_freq(gue_dir)


38 changes: 19 additions & 19 deletions ADFAR/src/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


# Set the base directory for the datasets
base_dir = '/projects/p32013/DNABERT-meta/GUE'
base_dir = 'GUE'

dataset_dirs = ["H3", "H3K14ac", "H3K36me3", "H3K4me1", "H3K4me2", "H3K4me3", "H3K79me3",
"H3K9ac", "H4", "H4ac", "prom_core_all", "prom_core_notata", "prom_core_tata",
Expand All @@ -23,7 +23,7 @@
for dataset_dir in dataset_dirs:
dataset_path = os.path.join(base_dir, dataset_dir, 'cat.csv')
### model ckpt
target_model_path = f"/scratch/hlv8980/Attack_Benchmark/models/{task}/{dataset_dir}/origin"
target_model_path = f"/scratch/anonymous/Attack_Benchmark/models/{task}/{dataset_dir}/origin"

# # Check if the dataset file exists
# if os.path.exists(dataset_path):
Expand All @@ -45,26 +45,26 @@
f'--target_model {task} ' \
f'--target_model_path {target_model_path} ' \
'--max_seq_length 256 --batch_size 32 ' \
f'--counter_fitting_embeddings_path /projects/p32013/DNABERT-meta/TextFooler/embeddings/subword_{task}_embeddings.txt ' \
f'--counter_fitting_cos_sim_path /projects/p32013/DNABERT-meta/TextFooler/cos_sim_counter_fitting/cos_sim_counter_fitting_{task}.npy ' \
'--USE_cache_path /projects/p32013/DNABERT-meta/TextFooler/tf_cache ' \
f'--counter_fitting_embeddings_path TextFooler/embeddings/subword_{task}_embeddings.txt ' \
f'--counter_fitting_cos_sim_path TextFooler/cos_sim_counter_fitting/cos_sim_counter_fitting_{task}.npy ' \
'--USE_cache_path TextFooler/tf_cache ' \
f'--nclasses 2 --output_dir adv_results/{task}/{dataset_dir}'


command4 = 'python get_pure_adversaries.py ' \
f'--adversaries_path adv_results/{task}/{dataset_dir}/adversaries.txt ' \
f'--output_path /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/attacked_data ' \
f'--output_path GUE/{dataset_dir}/{task}/attacked_data ' \
'--times 1 ' \
'--change 0 ' \
'--txtortsv tsv ' \
'--datasize 9662'

# 1.3 Construct the training data
command5 = 'python combine_data.py ' \
f'--add_file /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/attacked_data/pure_adversaries.tsv ' \
f'--add_file GUE/{dataset_dir}/{task}/attacked_data/pure_adversaries.tsv ' \
'--change_label 2 ' \
f'--original_dataset /projects/p32013/DNABERT-meta/GUE/{dataset_dir} ' \
f'--output_path /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/ ' \
f'--original_dataset GUE/{dataset_dir} ' \
f'--output_path GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/ ' \
'--isMR 0'

command6 = 'python run_simplification.py ' \
Expand All @@ -73,17 +73,17 @@
'--syn_num 20 ' \
'--most_freq_num 10 ' \
'--simplify_version random_freq_v1 ' \
f'--cos_sim_file /projects/p32013/DNABERT-meta/TextFooler/cos_sim_counter_fitting/cos_sim_counter_fitting_{task}.npy ' \
f'--counterfitted_vectors /projects/p32013/DNABERT-meta/TextFooler/embeddings/subword_{task}_embeddings.txt ' \
f'--file_to_simplify /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/train.tsv ' \
f'--output_path /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/simplified_data/2times_adv_0-3/ ' \
f'--freq_file /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/subword_frequencies.json'
f'--cos_sim_file TextFooler/cos_sim_counter_fitting/cos_sim_counter_fitting_{task}.npy ' \
f'--counterfitted_vectors TextFooler/embeddings/subword_{task}_embeddings.txt ' \
f'--file_to_simplify GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/train.tsv ' \
f'--output_path GUE/{dataset_dir}/{task}/simplified_data/2times_adv_0-3/ ' \
f'--freq_file GUE/{dataset_dir}/subword_frequencies.json'

command7 = 'python combine_data.py ' \
f'--add_file /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/simplified_data/2times_adv_0-3/train.tsv ' \
f'--add_file GUE/{dataset_dir}/{task}/simplified_data/2times_adv_0-3/train.tsv ' \
'--change_label 4 ' \
f'--original_dataset /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/ ' \
f'--output_path /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/4times_adv_0-7/ --isMR 0 '
f'--original_dataset GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/ ' \
f'--output_path GUE/{dataset_dir}/{task}/combined_data/4times_adv_0-7/ --isMR 0 '

# Step2. Train our proposed model on the constructed training data
command8 = 'WANDB_DISABLED=true python run_classification_adv.py ' \
Expand All @@ -92,8 +92,8 @@
'--do_train ' \
'--do_eval ' \
'--attention 2 ' \
f'--data_dir /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/4times_adv_0-7/ ' \
f'--output_dir /projects/p32013/DNABERT-meta/ADFAR/src/experiments/GUE/{dataset_dir}/{task}/4times_adv_double_0-7 ' \
f'--data_dir GUE/{dataset_dir}/{task}/combined_data/4times_adv_0-7/ ' \
f'--output_dir ADFAR/src/experiments/GUE/{dataset_dir}/{task}/4times_adv_double_0-7 ' \
f'--model_name_or_path {target_model_path} ' \
'--per_device_train_batch_size 2 ' \
'--per_device_eval_batch_size 2 ' \
Expand Down
2 changes: 1 addition & 1 deletion ADFAR/src/run.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
tasks=("nt1")

cd /projects/p32013/DNABERT-meta/ADFAR/src
cd ADFAR/src

for task in "${tasks[@]}"
do
Expand Down
38 changes: 19 additions & 19 deletions ADFAR/src/run_DNABERT.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


# Set the base directory for the datasets
base_dir = '/projects/p32013/DNABERT-meta/GUE'
base_dir = 'GUE'

dataset_dirs = ["2", "3"]

Expand All @@ -20,7 +20,7 @@
for dataset_dir in dataset_dirs:
dataset_path = os.path.join(base_dir, dataset_dir, 'cat.csv')
### model ckpt
target_model_path = f"/scratch/hlv8980/Attack_Benchmark/models/{task}/{dataset_dir}/origin"
target_model_path = f"/scratch/anonymous/Attack_Benchmark/models/{task}/{dataset_dir}/origin"

# # Check if the dataset file exists
# if os.path.exists(dataset_path):
Expand All @@ -42,26 +42,26 @@
f'--target_model {task} ' \
f'--target_model_path {target_model_path} ' \
'--max_seq_length 256 --batch_size 32 ' \
f'--counter_fitting_embeddings_path /projects/p32013/DNABERT-meta/TextFooler/embeddings/subword_{task}_embeddings.txt ' \
f'--counter_fitting_cos_sim_path /projects/p32013/DNABERT-meta/TextFooler/cos_sim_counter_fitting/cos_sim_counter_fitting_{task}.npy ' \
'--USE_cache_path /projects/p32013/DNABERT-meta/TextFooler/tf_cache ' \
f'--counter_fitting_embeddings_path TextFooler/embeddings/subword_{task}_embeddings.txt ' \
f'--counter_fitting_cos_sim_path TextFooler/cos_sim_counter_fitting/cos_sim_counter_fitting_{task}.npy ' \
'--USE_cache_path TextFooler/tf_cache ' \
f'--nclasses 2 --output_dir adv_results/{task}/{dataset_dir}'


command4 = 'python get_pure_adversaries.py ' \
f'--adversaries_path adv_results/{task}/{dataset_dir}/adversaries.txt ' \
f'--output_path /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/attacked_data ' \
f'--output_path GUE/{dataset_dir}/{task}/attacked_data ' \
'--times 1 ' \
'--change 0 ' \
'--txtortsv tsv ' \
'--datasize 9662'

# 1.3 Construct the training data
command5 = 'python combine_data.py ' \
f'--add_file /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/attacked_data/pure_adversaries.tsv ' \
f'--add_file GUE/{dataset_dir}/{task}/attacked_data/pure_adversaries.tsv ' \
'--change_label 2 ' \
f'--original_dataset /projects/p32013/DNABERT-meta/GUE/{dataset_dir} ' \
f'--output_path /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/ ' \
f'--original_dataset GUE/{dataset_dir} ' \
f'--output_path GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/ ' \
'--isMR 0'

command6 = 'python run_simplification.py ' \
Expand All @@ -70,17 +70,17 @@
'--syn_num 20 ' \
'--most_freq_num 10 ' \
'--simplify_version random_freq_v1 ' \
f'--cos_sim_file /projects/p32013/DNABERT-meta/TextFooler/cos_sim_counter_fitting/cos_sim_counter_fitting_{task}.npy ' \
f'--counterfitted_vectors /projects/p32013/DNABERT-meta/TextFooler/embeddings/subword_{task}_embeddings.txt ' \
f'--file_to_simplify /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/train.tsv ' \
f'--output_path /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/simplified_data/2times_adv_0-3/ ' \
f'--freq_file /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/subword_frequencies.json'
f'--cos_sim_file TextFooler/cos_sim_counter_fitting/cos_sim_counter_fitting_{task}.npy ' \
f'--counterfitted_vectors TextFooler/embeddings/subword_{task}_embeddings.txt ' \
f'--file_to_simplify GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/train.tsv ' \
f'--output_path GUE/{dataset_dir}/{task}/simplified_data/2times_adv_0-3/ ' \
f'--freq_file GUE/{dataset_dir}/subword_frequencies.json'

command7 = 'python combine_data.py ' \
f'--add_file /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/simplified_data/2times_adv_0-3/train.tsv ' \
f'--add_file GUE/{dataset_dir}/{task}/simplified_data/2times_adv_0-3/train.tsv ' \
'--change_label 4 ' \
f'--original_dataset /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/ ' \
f'--output_path /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/4times_adv_0-7/ --isMR 0 '
f'--original_dataset GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/ ' \
f'--output_path GUE/{dataset_dir}/{task}/combined_data/4times_adv_0-7/ --isMR 0 '

# Step2. Train our proposed model on the constructed training data
command8 = 'WANDB_DISABLED=true python run_classification_adv.py ' \
Expand All @@ -89,8 +89,8 @@
'--do_train ' \
'--do_eval ' \
'--attention 2 ' \
f'--data_dir /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/4times_adv_0-7/ ' \
f'--output_dir /projects/p32013/DNABERT-meta/ADFAR/src/experiments/GUE/{dataset_dir}/{task}/4times_adv_double_0-7 ' \
f'--data_dir GUE/{dataset_dir}/{task}/combined_data/4times_adv_0-7/ ' \
f'--output_dir ADFAR/src/experiments/GUE/{dataset_dir}/{task}/4times_adv_double_0-7 ' \
f'--model_name_or_path {target_model_path} ' \
'--per_device_train_batch_size 2 ' \
'--per_device_eval_batch_size 2 ' \
Expand Down
2 changes: 1 addition & 1 deletion ADFAR/src/run_DNABERT.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
tasks=("dnabert")

cd /projects/p32013/DNABERT-meta/ADFAR/src
cd ADFAR/src

for task in "${tasks[@]}"
do
Expand Down
2 changes: 1 addition & 1 deletion ADFAR/src/run_hyena.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
tasks=("hyena")

cd /projects/p32013/DNABERT-meta/ADFAR/src
cd ADFAR/src

for task in "${tasks[@]}"
do
Expand Down
38 changes: 19 additions & 19 deletions ADFAR/src/run_nt1.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


# Set the base directory for the datasets
base_dir = '/projects/p32013/DNABERT-meta/GUE'
base_dir = 'GUE'

dataset_dirs = ["H3K9ac"]

Expand All @@ -20,7 +20,7 @@
for dataset_dir in dataset_dirs:
dataset_path = os.path.join(base_dir, dataset_dir, 'cat.csv')
### model ckpt
target_model_path = f"/scratch/hlv8980/Attack_Benchmark/models/{task}/{dataset_dir}/origin"
target_model_path = f"/scratch/anonymous/Attack_Benchmark/models/{task}/{dataset_dir}/origin"

# # Check if the dataset file exists
# if os.path.exists(dataset_path):
Expand All @@ -42,26 +42,26 @@
f'--target_model {task} ' \
f'--target_model_path {target_model_path} ' \
'--max_seq_length 256 --batch_size 32 ' \
f'--counter_fitting_embeddings_path /projects/p32013/DNABERT-meta/TextFooler/embeddings/subword_{task}_embeddings.txt ' \
f'--counter_fitting_cos_sim_path /projects/p32013/DNABERT-meta/TextFooler/cos_sim_counter_fitting/cos_sim_counter_fitting_{task}.npy ' \
'--USE_cache_path /projects/p32013/DNABERT-meta/TextFooler/tf_cache ' \
f'--counter_fitting_embeddings_path TextFooler/embeddings/subword_{task}_embeddings.txt ' \
f'--counter_fitting_cos_sim_path TextFooler/cos_sim_counter_fitting/cos_sim_counter_fitting_{task}.npy ' \
'--USE_cache_path TextFooler/tf_cache ' \
f'--nclasses 2 --output_dir adv_results/{task}/{dataset_dir}'


command4 = 'python get_pure_adversaries.py ' \
f'--adversaries_path adv_results/{task}/{dataset_dir}/adversaries.txt ' \
f'--output_path /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/attacked_data ' \
f'--output_path GUE/{dataset_dir}/{task}/attacked_data ' \
'--times 1 ' \
'--change 0 ' \
'--txtortsv tsv ' \
'--datasize 9662'

# 1.3 Construct the training data
command5 = 'python combine_data.py ' \
f'--add_file /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/attacked_data/pure_adversaries.tsv ' \
f'--add_file GUE/{dataset_dir}/{task}/attacked_data/pure_adversaries.tsv ' \
'--change_label 2 ' \
f'--original_dataset /projects/p32013/DNABERT-meta/GUE/{dataset_dir} ' \
f'--output_path /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/ ' \
f'--original_dataset GUE/{dataset_dir} ' \
f'--output_path GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/ ' \
'--isMR 0'

command6 = 'python run_simplification.py ' \
Expand All @@ -70,17 +70,17 @@
'--syn_num 20 ' \
'--most_freq_num 10 ' \
'--simplify_version random_freq_v1 ' \
f'--cos_sim_file /projects/p32013/DNABERT-meta/TextFooler/cos_sim_counter_fitting/cos_sim_counter_fitting_{task}.npy ' \
f'--counterfitted_vectors /projects/p32013/DNABERT-meta/TextFooler/embeddings/subword_{task}_embeddings.txt ' \
f'--file_to_simplify /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/train.tsv ' \
f'--output_path /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/simplified_data/2times_adv_0-3/ ' \
f'--freq_file /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/subword_frequencies.json'
f'--cos_sim_file TextFooler/cos_sim_counter_fitting/cos_sim_counter_fitting_{task}.npy ' \
f'--counterfitted_vectors TextFooler/embeddings/subword_{task}_embeddings.txt ' \
f'--file_to_simplify GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/train.tsv ' \
f'--output_path GUE/{dataset_dir}/{task}/simplified_data/2times_adv_0-3/ ' \
f'--freq_file GUE/{dataset_dir}/subword_frequencies.json'

command7 = 'python combine_data.py ' \
f'--add_file /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/simplified_data/2times_adv_0-3/train.tsv ' \
f'--add_file GUE/{dataset_dir}/{task}/simplified_data/2times_adv_0-3/train.tsv ' \
'--change_label 4 ' \
f'--original_dataset /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/ ' \
f'--output_path /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/4times_adv_0-7/ --isMR 0 '
f'--original_dataset GUE/{dataset_dir}/{task}/combined_data/2times_adv_0-3/ ' \
f'--output_path GUE/{dataset_dir}/{task}/combined_data/4times_adv_0-7/ --isMR 0 '

# Step2. Train our proposed model on the constructed training data
command8 = 'WANDB_DISABLED=true python run_classification_adv.py ' \
Expand All @@ -89,8 +89,8 @@
'--do_train ' \
'--do_eval ' \
'--attention 2 ' \
f'--data_dir /projects/p32013/DNABERT-meta/GUE/{dataset_dir}/{task}/combined_data/4times_adv_0-7/ ' \
f'--output_dir /projects/p32013/DNABERT-meta/ADFAR/src/experiments/GUE/{dataset_dir}/{task}/4times_adv_double_0-7 ' \
f'--data_dir GUE/{dataset_dir}/{task}/combined_data/4times_adv_0-7/ ' \
f'--output_dir ADFAR/src/experiments/GUE/{dataset_dir}/{task}/4times_adv_double_0-7 ' \
f'--model_name_or_path {target_model_path} ' \
'--per_device_train_batch_size 2 ' \
'--per_device_eval_batch_size 2 ' \
Expand Down
2 changes: 1 addition & 1 deletion ADFAR/src/run_nt1.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
tasks=("nt1")

cd /projects/p32013/DNABERT-meta/ADFAR/src
cd ADFAR/src

for task in "${tasks[@]}"
do
Expand Down
Loading