Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 28 additions & 28 deletions ncm.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def pearson_def(x, y):
def createDataSetFromDir(base_dir, bedFile):
for root, dirs, files in os.walk(base_dir):
for file in files:
if not file.endswith(".vcf"):
if not file.endswith(".vcf"):
continue

link = root + '/' + file
Expand Down Expand Up @@ -223,7 +223,7 @@ def createDataSetFromDir(base_dir, bedFile):
sum_file[file] = sum[file]

for key in features:
if glob_scores.has_key(file):
if file in glob_scores:
glob_scores[file].append(scores[key])
else:
glob_scores[file] = [scores[key]]
Expand All @@ -236,7 +236,7 @@ def createDataSetFromDir(base_dir, bedFile):
for key in features:
if key not in score_set[file]:
score_set[file][key] = 0
if glob_scores.has_key(file):
if file in glob_scores:
glob_scores[file].append(score_set[file][key])
else:
glob_scores[file] = [score_set[file][key]]
Expand Down Expand Up @@ -406,7 +406,7 @@ def createDataSetFromList(base_list, bedFile):
sum_file[file] = sum[file]

for key in features:
if glob_scores.has_key(file):
if file in glob_scores:
glob_scores[file].append(scores[key])
else:
glob_scores[file] = [scores[key]]
Expand All @@ -419,7 +419,7 @@ def createDataSetFromList(base_list, bedFile):
for key in features:
if key not in score_set[file]:
score_set[file][key] = 0
if glob_scores.has_key(file):
if file in glob_scores:
glob_scores[file].append(score_set[file][key])
else:
glob_scores[file] = [score_set[file][key]]
Expand Down Expand Up @@ -481,7 +481,7 @@ def createDataSetFromDir_TEST(base_dir, bedFile,order):
sum_file[file] = sum

for key in features:
if glob_scores.has_key(file):
if file in glob_scores:
glob_scores[file].append(scores[key])
else:
glob_scores[file] = [scores[key]]
Expand Down Expand Up @@ -542,7 +542,7 @@ def createDataSetFromList_TEST(base_list, bedFile,order):
sum_file[file] = sum

for key in features:
if glob_scores.has_key(file):
if file in glob_scores:
glob_scores[file].append(scores[key])
else:
glob_scores[file] = [scores[key]]
Expand Down Expand Up @@ -608,7 +608,7 @@ def clustering(K):
sum = sum + max_value
Pos_count = Pos_count + 1
outPOS=str(label[target]) + "\tmatched to\t" + str(label[max_indice])+ "\tscore=\t" + str(max_value)
print outPOS
print(outPOS)
#POS_F.write(outPOS + "\n")
orderCount = orderCount + 1

Expand Down Expand Up @@ -683,8 +683,8 @@ def classify(T):
NEG_F.write(outNEG + "\n")


print sum/Pos_count
print othersum/Neg_count
print(sum/Pos_count)
print(othersum/Neg_count)

POS_F.close()
NEG_F.close()
Expand Down Expand Up @@ -915,8 +915,8 @@ def classifying():
output_matrix = dict()

if out_tag!="stdout":
out_f = open(outdir + "/" + out_tag + "_all.txt","w")
out_matched = open(outdir + "/" + out_tag + "_matched.txt","w")
out_f = open(outdir + "/" + out_tag + "_all.txt","w")
out_matched = open(outdir + "/" + out_tag + "_matched.txt","w")

for i in range(0, len(keyList)):
output_matrix[keyList[i]] = dict()
Expand All @@ -939,7 +939,7 @@ def classifying():
p1V,p1S, p0V, p0S = trainNV(array(trainMatrix),array(trainCategory))
result = classifyNV(samples[i],p0V,p0S, p1V, p1S)
if result[1] == 1:
print str(temp[i][0]) + '\tsample is matched to\t',str(temp[i][1]),'\t', samples[i]
print(str(temp[i][0]) + '\tsample is matched to\t',str(temp[i][1]),'\t', samples[i])
predStrength.append(result[0])
else :
for i in range(0,len(samples)):
Expand All @@ -955,13 +955,13 @@ def classifying():
output_matrix[temp[i][0].strip()][temp[i][1].strip()] = samples[i]
output_matrix[temp[i][1].strip()][temp[i][0].strip()] = samples[i]
if out_tag=="stdout":
print str(temp[i][0]) + '\tmatched\t',str(temp[i][1]),'\t', round(samples[i],4),'\t',round(depth,2)
print(str(temp[i][0]) + '\tmatched\t',str(temp[i][1]),'\t', round(samples[i],4),'\t',round(depth,2))
else :
out_f.write(str(temp[i][0]) + '\tmatched\t' + str(temp[i][1]) + '\t'+ str(round(samples[i],4)) + '\t' + str(round(depth,2)) + '\n')
out_matched.write(str(temp[i][0]) + '\tmatched\t' + str(temp[i][1]) + '\t'+ str(round(samples[i],4)) + '\t' + str(round(depth,2)) + '\n')
else:
if out_tag=="stdout":
print str(temp[i][0]) + '\tunmatched\t',str(temp[i][1]),'\t', round(samples[i],4),'\t',round(depth,2)
print(str(temp[i][0]) + '\tunmatched\t',str(temp[i][1]),'\t', round(samples[i],4),'\t',round(depth,2))
else :
out_f.write(str(temp[i][0]) + '\tunmatched\t' + str(temp[i][1]) + '\t'+ str(round(samples[i],4)) + '\t' + str(round(depth,2)) + '\n')
predStrength.append(result[0])
Expand Down Expand Up @@ -990,8 +990,8 @@ def classifying():

output_matrix_f.close()
if out_tag!="stdout":
out_f.close()
out_matched.close()
out_f.close()
out_matched.close()



Expand Down Expand Up @@ -1070,7 +1070,7 @@ def classifying_test():
p1V,p1S, p0V, p0S = trainNV(array(trainMatrix),array(trainCategory))
result = classifyNV(samples[i],p0V,p0S, p1V, p1S)
if result[1] == 1:
print str(temp[i][0]) + '\tsample is matched to\t',str(temp[i][1]),'\t', samples[i]
print(str(temp[i][0]) + '\tsample is matched to\t',str(temp[i][1]),'\t', samples[i])
predStrength.append(result[0])
else :
for i in range(0,len(samples)):
Expand All @@ -1081,13 +1081,13 @@ def classifying_test():
output_matrix[temp[i][0].strip()][temp[i][1].strip()] = samples[i]
output_matrix[temp[i][1].strip()][temp[i][0].strip()] = samples[i]
if out_tag=="stdout":
print str(temp[i][0]) + '\tmatched\t',str(temp[i][1]),'\t', round(samples[i],4),'\t',round(depth,2)
print(str(temp[i][0]) + '\tmatched\t',str(temp[i][1]),'\t', round(samples[i],4),'\t',round(depth,2))
else :
out_f.write(str(temp[i][0]) + '\tmatched\t' + str(temp[i][1]) + '\t'+ str(round(samples[i],4)) + '\t' + str(round(depth,2)) + '\n')
out_matched.write(str(temp[i][0]) + '\tmatched\t' + str(temp[i][1]) + '\t'+ str(round(samples[i],4)) + '\t' + str(round(depth,2)) + '\n')
else:
if out_tag=="stdout":
print str(temp[i][0]) + '\tunmatched\t',str(temp[i][1]),'\t', round(samples[i],4),'\t',round(depth,2)
print(str(temp[i][0]) + '\tunmatched\t',str(temp[i][1]),'\t', round(samples[i],4),'\t',round(depth,2))
else :
out_f.write(str(temp[i][0]) + '\tunmatched\t' + str(temp[i][1]) + '\t'+ str(round(samples[i],4)) + '\t' + str(round(depth,2)) + '\n')
predStrength.append(result[0])
Expand Down Expand Up @@ -1179,12 +1179,12 @@ def run_mpileup():
if "NCM_HOME" in os.environ.keys():
INSTALL_DIR=os.environ['NCM_HOME'] + "/"
else:
print "WARNNING : NCM_HOME is not defined yet. Therefore, program will try to search ncm.conf file from the current directory"
print("WARNNING : NCM_HOME is not defined yet. Therefore, program will try to search ncm.conf file from the current directory")
INSTALL_DIR=""
with open(INSTALL_DIR + "ncm.conf",'r') as F:
for line in F.readlines():
temp = line.split('=')
if temp[0].startswith("SAMTOOLS"):
if temp[0].startswith("SAMTOOLS"):
SAMTOOLS = temp[1].strip()
elif temp[0].startswith("BCFTOOLS"):
BCFTOOLS = temp[1].strip()
Expand All @@ -1203,7 +1203,7 @@ def run_mpileup():
if version_tag.find("Version") != -1:
version_list = version_tag.split(" ")
version = version_list[1]
print version
print(version)

for sample in bam_list:
filename = sample.split("/")
Expand All @@ -1212,7 +1212,7 @@ def run_mpileup():
command = SAMTOOLS + " mpileup -I -uf " + REF + " -l " + bedFile + " " + sample + " | " + BCFTOOLS + " view -cg - > " + outdir + "/" + tag + ".vcf"
else:
command = SAMTOOLS + " mpileup -uf " + REF + " -l " + bedFile + " " + sample + " | " + BCFTOOLS + " call -c > " + outdir + "/" + tag + ".vcf"
print command
print(command)
call(command,shell=True)
# proc = subprocess.Popen(command, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
# return_code = proc.wait()
Expand Down Expand Up @@ -1443,7 +1443,7 @@ def output_filter():
get_bam_list()
run_mpileup()
base_dir = outdir
print "Generate Data Set from " + base_dir + "\nusing this bed file : " + bedFile
print("Generate Data Set from " + base_dir + "\nusing this bed file : " + bedFile)
if args.testsamplename != None:
testsamplename = args.testsamplename
createDataSetFromDir_TEST(base_dir,bedFile,"1")
Expand All @@ -1454,7 +1454,7 @@ def output_filter():
classifying()
elif args.VCF_type != False :
if args.datadir != None :
print "Generate Data Set from " + base_dir + "\nusing this bed file : " + bedFile
print("Generate Data Set from " + base_dir + "\nusing this bed file : " + bedFile)
if args.testsamplename != None:
testsamplename = args.testsamplename
createDataSetFromDir_TEST(base_dir,bedFile,"1")
Expand All @@ -1464,7 +1464,7 @@ def output_filter():
createDataSetFromDir(base_dir,bedFile)
classifying()
elif args.datalist != None :
print "Generate Data Set from " + base_list + "\nusing this bed file : " + bedFile
print("Generate Data Set from " + base_list + "\nusing this bed file : " + bedFile)
if args.testsamplename != None:
testsamplename = args.testsamplename
createDataSetFromList_TEST(base_list,bedFile,"1")
Expand All @@ -1487,4 +1487,4 @@ def output_filter():
pdf_tag = out_tag
generate_R_scripts()
run_R_scripts()
# remove_internal_files()
# remove_internal_files()
34 changes: 17 additions & 17 deletions ncm_fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def createDataSetFromDir(base_dir, bedFile):
# sum_file[file] = sum

for key in features:
if glob_scores.has_key(file):
if file in glob_scores:
glob_scores[file].append(scores[key])
else:
glob_scores[file] = [scores[key]]
Expand Down Expand Up @@ -185,7 +185,7 @@ def createDataSetFromDir_test(base_dir, bedFile,order):
# sum_file[file] = sum

for key in features:
if glob_scores.has_key(file):
if file in glob_scores:
glob_scores[file].append(scores[key])
else:
glob_scores[file] = [scores[key]]
Expand Down Expand Up @@ -298,15 +298,15 @@ def plotROC(predStrengths, classLabels):
plt.title('ROC curves')
ax.axis([0,1,0,1])
plt.show()
print "the Area Under the Curve is: ",ySum*xStep
print("the Area Under the Curve is: ",ySum*xStep)


def run_fastq_version():
INSTALL_DIR=""
if "NCM_HOME" in os.environ.keys():
INSTALL_DIR=os.environ['NCM_HOME'] + "/"
else :
print "WARNNING : NCM_HOME is not defined yet. Therefore, program will try to search ngscheckmate_fastq file from the current directory"
print("WARNNING : NCM_HOME is not defined yet. Therefore, program will try to search ngscheckmate_fastq file from the current directory")
INSTALL_DIR="./"

command = INSTALL_DIR + "ngscheckmate_fastq "
Expand All @@ -326,9 +326,9 @@ def run_fastq_version():
if PE == 1:
command = command + "-1 " + fastq1 + " -2 " + fastq2 +" " + bed_file +" > " + outdir + "/" + temp_out + ".ncm"
if PE == 0:
command = command + "-1 " + fastq1 +" " + bed_file +" > " + outdir + "/" + temp_out + ".ncm"
command = command + "-1 " + fastq1 +" " + bed_file +" > " + outdir + "/" + temp_out + ".ncm"

print command
print(command)

proc = subprocess.Popen(command, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
Expand Down Expand Up @@ -420,7 +420,7 @@ def classifying():
p1V,p1S, p0V, p0S = trainNV(array(trainMatrix),array(trainCategory))
result = classifyNV(samples[i],p0V,p0S, p1V, p1S)
if result[1] == 1:
print str(temp[i][0]) + '\tsample is matched to\t',str(temp[i][1]),'\t', samples[i]
print(str(temp[i][0]) + '\tsample is matched to\t',str(temp[i][1]),'\t', samples[i])
predStrength.append(result[0])
# AUCs.append(calAUC(mat(predStrength),classLabel))
# plotROC(mat(predStrength),classLabel)
Expand All @@ -439,13 +439,13 @@ def classifying():
output_matrix[temp[i][0].strip()][temp[i][1].strip()] = samples[i]
output_matrix[temp[i][1].strip()][temp[i][0].strip()] = samples[i]
if out_tag=="stdout":
print str(temp[i][0][:-4]) + '\tmatched\t',str(temp[i][1][:-4]),'\t', round(samples[i],4),'\t',round(depth,2)
print(str(temp[i][0][:-4]) + '\tmatched\t',str(temp[i][1][:-4]),'\t', round(samples[i],4),'\t',round(depth,2))
else :
out_f.write(str(temp[i][0][:-4]) + '\tmatched\t' + str(temp[i][1][:-4]) + '\t'+ str(round(samples[i],4)) + '\t' + str(round(depth,2)) + '\n')
out_matched.write(str(temp[i][0][:-4]) + '\tmatched\t' + str(temp[i][1][:-4]) + '\t'+ str(round(samples[i],4)) + '\t' + str(round(depth,2)) + '\n')
else:
if out_tag=="stdout":
print str(temp[i][0][:-4]) + '\tunmatched\t',str(temp[i][1][:-4]),'\t', round(samples[i],4),'\t',round(depth,2)
print(str(temp[i][0][:-4]) + '\tunmatched\t',str(temp[i][1][:-4]),'\t', round(samples[i],4),'\t',round(depth,2))
else :
out_f.write(str(temp[i][0][:-4]) + '\tunmatched\t' + str(temp[i][1][:-4]) + '\t'+ str(round(samples[i],4)) + '\t' + str(round(depth,2)) + '\n')
#print sum_file[temp[i][0]],sum_file[temp[i][1].strip()]
Expand Down Expand Up @@ -546,7 +546,7 @@ def classifying_test():
p1V,p1S, p0V, p0S = trainNV(array(trainMatrix),array(trainCategory))
result = classifyNV(samples[i],p0V,p0S, p1V, p1S)
if result[1] == 1:
print str(temp[i][0]) + '\tsample is matched to\t',str(temp[i][1]),'\t', samples[i]
print(str(temp[i][0]) + '\tsample is matched to\t',str(temp[i][1]),'\t', samples[i])
predStrength.append(result[0])
# AUCs.append(calAUC(mat(predStrength),classLabel))
# plotROC(mat(predStrength),classLabel)
Expand All @@ -560,13 +560,13 @@ def classifying_test():
output_matrix[temp[i][0].strip()][temp[i][1].strip()] = samples[i]
output_matrix[temp[i][1].strip()][temp[i][0].strip()] = samples[i]
if out_tag=="stdout":
print str(temp[i][0][:-4]) + '\tmatched\t',str(temp[i][1][:-4]),'\t', round(samples[i],4),'\t',round(depth,2)
print(str(temp[i][0][:-4]) + '\tmatched\t',str(temp[i][1][:-4]),'\t', round(samples[i],4),'\t',round(depth,2))
else :
out_f.write(str(temp[i][0][:-4]) + '\tmatched\t' + str(temp[i][1][:-4]) + '\t'+ str(round(samples[i],4)) + '\t' + str(round(depth,2)) + '\n')
out_matched.write(str(temp[i][0][:-4]) + '\tmatched\t' + str(temp[i][1][:-4]) + '\t'+ str(round(samples[i],4)) + '\t' + str(round(depth,2)) + '\n')
else:
if out_tag=="stdout":
print str(temp[i][0][:-4]) + '\tunmatched\t',str(temp[i][1][:-4]),'\t', round(samples[i],4),'\t',round(depth,2)
print(str(temp[i][0][:-4]) + '\tunmatched\t',str(temp[i][1][:-4]),'\t', round(samples[i],4),'\t',round(depth,2))
else :
out_f.write(str(temp[i][0][:-4]) + '\tunmatched\t' + str(temp[i][1][:-4]) + '\t'+ str(round(samples[i],4)) + '\t' + str(round(depth,2)) + '\n')
#print sum_file[temp[i][0]],sum_file[temp[i][1].strip()]
Expand Down Expand Up @@ -881,9 +881,9 @@ def output_filter():
temp_out = temp[1]
run_fastq_version()
else:
print "Input File Error: Each line should be contain one or two fastq files name with tab delimited"
print line.strip()
print "upper format is invalid"
print("Input File Error: Each line should be contain one or two fastq files name with tab delimited")
print(line.strip())
print("upper format is invalid")

# set directories
base_dir = outdir
Expand Down Expand Up @@ -920,12 +920,12 @@ def output_filter():

if args.testsamplename != None:
testsamplename = args.testsamplename
print "Generate Data Set from " + outdir + "\nusing this bed file : " + bedFile
print("Generate Data Set from " + outdir + "\nusing this bed file : " + bedFile)
createDataSetFromDir_test(outdir,bedFile,"1")
createDataSetFromDir_test(outdir,bedFile,"2")
classifying_test()
else:
print "Generate Data Set from " + outdir + "\nusing this bed file : " + bedFile
print("Generate Data Set from " + outdir + "\nusing this bed file : " + bedFile)
createDataSetFromDir(outdir,bedFile)
classifying()

Expand Down
Loading