diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..9414382 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +Dockerfile diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..7a3a289 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,29 @@ +name: Build and push docker images + +on: + push: + branches: [main] + +jobs: + build-and-push: + name: Build image and push to Docker Hub + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + name: Checkout source code + - name: Build the Docker image + run: | + repo_name_lower=$(echo ${{ github.event.repository.name }}| tr [:upper:] [:lower:]) + tag="synbiohub/${repo_name_lower}:snapshot" + echo $tag + docker build . --tag $tag + - uses: azure/docker-login@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - name: Push the image to Docker Hub + run: | + repo_name_lower=$(echo ${{ github.event.repository.name }}| tr [:upper:] [:lower:]) + tag="synbiohub/${repo_name_lower}:snapshot" + echo $tag + docker push $tag diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..82adb58 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__ +venv diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8ac627d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM synbiohub/docker-base-python:snapshot + +COPY . . + +EXPOSE 5000 + +RUN pip3 install -r requirements.txt + +ENV FLASK_APP=app.py + +CMD ["waitress-serve", "--port=5000", "app:app" ] diff --git a/Input_Query.txt b/Input_Query.txt new file mode 100644 index 0000000..dcf8215 --- /dev/null +++ b/Input_Query.txt @@ -0,0 +1,29 @@ +PREFIX rdf: +PREFIX dcterms: +PREFIX dc: +PREFIX sbh: +PREFIX prov: +PREFIX sbol: +PREFIX xsd: +PREFIX rdfs: +PREFIX purl: + +SELECT +?def +?displayId +?title +(count(?def) as ?count) +?role +WHERE { +FILTER regex(?role, "identifiers") +?s sbol:component ?comp . +?comp sbol:definition . +?s sbol:component ?comp2 . +?comp2 sbol:definition ?def . +OPTIONAL {?def sbol:role ?role} . +OPTIONAL {?def sbol:displayId ?displayId} . +OPTIONAL {?def dcterms:title ?title} . + } +ORDER BY DESC (?count) + +#OFFSET 0 LIMIT 50 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2cf06ef --- /dev/null +++ b/LICENSE @@ -0,0 +1,25 @@ +BSD 2-Clause License + +Copyright (c) 2020, Zach Zundel +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Looking b/Looking deleted file mode 100644 index ebb82af..0000000 --- a/Looking +++ /dev/null @@ -1,34 +0,0 @@ -> import plotly - -data = dict( - type='sankey', - node = dict( - pad = 15, - thickness = 20, - line = dict( - color = "black", - width = 0.5 - ), - label = ['A1', - 'A2', - 'B1', - 'B2', - 'C1', - 'C2'], - color = ["blue", "blue", "blue", "blue", "blue", "blue"] - ), - link = dict( - source = [0,1,0,2,3,3], - target = [2,3,3,4,4,5], - value = [8,4,2,8,4,2] - )) - -layout = dict( - title = "Basic Sankey Diagram", - font = dict( - size = 10 - ) -) - -fig = dict(data=[data], layout=layout) -plotly.offline.plot(fig, filename = 'issue.html', auto_open=True)< diff --git a/Most_Used_By_Type_Query.txt b/Most_Used_By_Type_Query.txt new file mode 100644 index 0000000..064bba0 --- /dev/null +++ b/Most_Used_By_Type_Query.txt @@ -0,0 +1,27 @@ +PREFIX rdf: +PREFIX dcterms: +PREFIX dc: +PREFIX sbh: +PREFIX prov: +PREFIX sbol: +PREFIX xsd: +PREFIX rdfs: +PREFIX purl: + +SELECT + ?def + ?displayId + ?title + (count(?def) as ?count) + ?role +WHERE { +FILTER (?role = ) . + ?s sbol:component ?comp . + ?comp sbol:definition ?def . + OPTIONAL {?def sbol:role ?role} . + OPTIONAL {?def sbol:displayId ?displayId} . + OPTIONAL {?def dcterms:title ?title} . + } +ORDER BY DESC (?count) + +#OFFSET 0 LIMIT 50 diff --git a/Most_Used_Query.txt b/Most_Used_Query.txt new file mode 100644 index 0000000..b942054 --- /dev/null +++ b/Most_Used_Query.txt @@ -0,0 +1,27 @@ +PREFIX rdf: +PREFIX dcterms: +PREFIX dc: +PREFIX sbh: +PREFIX prov: +PREFIX sbol: +PREFIX xsd: +PREFIX rdfs: +PREFIX purl: + +SELECT + ?def + ?displayId + ?title + (count(?def) as ?count) + ?role +WHERE { + ?s sbol:component ?comp . + ?comp sbol:definition ?def . + filter (regex(?role, 'http://identifiers.org/so/SO:')) . + OPTIONAL {?def sbol:role ?role} . + OPTIONAL {?def sbol:displayId ?displayId} . + OPTIONAL {?def dcterms:title ?title} . + } +ORDER BY DESC (?count) + +#OFFSET 0 LIMIT 50 diff --git a/Preceding_Percent_Query.txt b/Preceding_Percent_Query.txt new file mode 100644 index 0000000..8fc667a --- /dev/null +++ b/Preceding_Percent_Query.txt @@ -0,0 +1,49 @@ +PREFIX rdf: +PREFIX dcterms: +PREFIX dc: +PREFIX sbh: +PREFIX prov: +PREFIX sbol: +PREFIX xsd: +PREFIX rdfs: +PREFIX purl: + +select + #?s + ?def2 + (COUNT(?def2) AS ?count) + ?role + ?title + ?displayId + (AVG(xsd:integer(?preceeding)) AS ?average_preceeding) # 1 means all preceeding, 0 means all following, is percentage preceeding + #?preceeding #if negative then component ends before start of queried component + #?start + #?end + #?startb + #?endb + #?def +where { + ?s sbol:component ?comp . + ?s sbol:component ?comp2 . + ?comp sbol:definition ?def . + ?comp2 sbol:definition ?def2 . + ?s sbol:sequenceAnnotation ?sa . + ?sa sbol:component ?comp2 . + ?sa sbol:location ?loc . + #?loc sbol:start ?start . + ?loc sbol:end ?end . + ?s sbol:sequenceAnnotation ?sb . + ?sb sbol:component ?comp . + ?sb sbol:location ?locb . + ?locb sbol:start ?startb . + #?locb sbol:end ?endb . + OPTIONAL {?def2 sbol:role ?role} . + OPTIONAL {?def2 dcterms:title ?title} . + OPTIONAL {?def2 sbol:displayId ?displayId} . + BIND(if(xsd:integer(?end)-xsd:integer(?startb) < 0, "0", "1") AS ?preceeding) . + filter(?def = ) . + filter(?def2 != ) . + filter (regex(?role, 'http://identifiers.org/so/SO:')) . + #filter (?role = || (?role = )|| (?role = )|| (?role = )) . + } +ORDER BY DESC(?count) diff --git a/README.md b/README.md new file mode 100644 index 0000000..a52f4a0 --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +# INSTALL +## Docker +Run `docker run --publish 8080:5000 --detach --name component-use-plug synbiohub/plugin-visual-component-use` Check it is up using localhost:8080/sankey/status + +## Python +Using python run `pip install -r requirements.txt` to install the requirements. +Then run `FLASK_APP=app python -m flask run`. +A flask module will run at localhost:5000/sankey/. diff --git a/Toggle_Switch_html.txt b/Toggle_Switch_html.txt new file mode 100644 index 0000000..e1a008f --- /dev/null +++ b/Toggle_Switch_html.txt @@ -0,0 +1,39 @@ + + + + + +
+

By type

+
+ +
+

Frequency

+
+ + + + + diff --git a/app.py b/app.py index 76bbe16..e0e9c19 100644 --- a/app.py +++ b/app.py @@ -1,39 +1,184 @@ -from flask import Flask, request +#FLASK_APP=app.py flask run +from flask import Flask, request, abort +from input_data import input_data +from find_role_name import find_role_name +from sankey import sankey +from sankey_graph import sankey_graph +from retrieve_html import retrieve_html + +from most_used_bar import most_used_bar +from bar_plot import bar_plot +from most_used_by_type_bar import most_used_by_type_bar +from toggle_bars import toggle_bars + +import tempfile, os, shutil + app = Flask(__name__) -# flask run --host=0.0.0.0 +#flask run --host=0.0.0.0 +@app.route("/sankey/status") +def Sankey_Status(): + return("The sankey plugin is up and running") + +@app.route("/sankey/evaluate", methods=["POST"]) +def Sankey_Evaluate(): + data = request.get_json(force=True) + rdf_type = data['type'] + + ########## REPLACE THIS SECTION WITH OWN RUN CODE ################# + #uses rdf types + accepted_types = {'Component'} + + acceptable = rdf_type in accepted_types + + # #to ensure it shows up on all pages + # acceptable = True + ################## END SECTION #################################### + + if acceptable: + return f'The type sent ({rdf_type}) is an accepted type', 200 + else: + return f'The type sent ({rdf_type}) is NOT an accepted type', 415 + + +@app.route("/sankey/run", methods=["POST"]) +def Sankey_Run(): + data = request.get_json(force=True) + + top_level_url = data['top_level'] + complete_sbol = data['complete_sbol'] + instance_url = data['instanceUrl'] + size = data['size'] + rdf_type = data['type'] + shallow_sbol = data['shallow_sbol'] + + url = complete_sbol.replace('/sbol','') + + try: + + #instance_ur = 'https://synbiohub.org/' + #url = 'https://synbiohub.org/public/igem/BBa_B0012/1' + #top_level_url = 'https://dev.synbiohub.org/public/igem/BBa_B0012/1' + + #retrieve information about the poi + self_df, display_id, title, role, count = input_data(top_level_url, instance_url) + + #print("Find role name") + #Find the role name in the ontology of the part of interest + role_link = find_role_name(role, plural = False) + + #create data for the sankey diagram and format it correctly + df_sankey = sankey(url, top_level_url, title, instance_url) + + sankey_title = "Parts Co-Located with "+ title + " (a "+role_link+")" + + #create a temporary directory + temp_dir = tempfile.TemporaryDirectory() + + #name file + filename = os.path.join(temp_dir.name, "Sankey.html") + + + #create the sankey diagram + sankey_graph(filename, df_sankey, 'Node, Label', + 'Link', 'Color', 'Source','Target', 'Value', + 'Link Color', sankey_title, url_not_name=False) + + + #obtain the html from the sankey diagram + result = retrieve_html(filename) + + return result + except Exception as e: + print(e) + abort(400) + +#flask run --host=0.0.0.0 +@app.route("/bar/status") +def Bar_Status(): + return("The bar plugin is up and running") + + +@app.route("/bar/evaluate", methods=["POST"]) +def Bar_Evaluate(): + data = request.get_json(force=True) + rdf_type = data['type'] + + ########## REPLACE THIS SECTION WITH OWN RUN CODE ################# + #uses rdf types + accepted_types = {'Component'} + + acceptable = rdf_type in accepted_types + + # #to ensure it shows up on all pages + # acceptable = True + ################## END SECTION #################################### + + if acceptable: + return f'The type sent ({rdf_type}) is an accepted type', 200 + else: + return f'The type sent ({rdf_type}) is NOT an accepted type', 415 + + +@app.route("/bar/run", methods=["POST"]) +def Bar_Run(): + data = request.get_json(force=True) + + top_level_url = data['top_level'] + complete_sbol = data['complete_sbol'] + instance_url = data['instanceUrl'] + size = data['size'] + rdf_type = data['type'] + shallow_sbol = data['shallow_sbol'] + + url = complete_sbol.replace('/sbol','') + + try: -@app.route("/jet") -def hello(): - return render("Blah!") + #create input data + self_df, display_id, title, role, count = input_data(top_level_url, instance_url) + + #create and format data for the most_used barchart + bar_df = most_used_bar(top_level_url, instance_url, display_id, title, role, + count) + + #graph title for most used barchart + graph_title = f'Top Ten Parts by Number of Uses Compared to {title}' + #create a temporary directory + temp_dir = tempfile.TemporaryDirectory() + + #name file + filename1 = os.path.join(temp_dir.name, "Most_Used.html") -def render(url): - return url + #create the most used barchart + bar_plot('title','count','color',bar_df, graph_title, filename1, 'deff') -def empty(): - pass + #retrieve html + most_used = retrieve_html(filename1) -@app.route("/status") -def imdoingfine(): - return("I'm doing fine") + #find poi role ontology link + role_link = find_role_name(role, plural = False) -@app.route("/run", methods=["GET", "POST"]) -def wrapper(): - data = request.json + bar_df = most_used_by_type_bar(top_level_url,instance_url, display_id, title, + role, count) + + #graph title for most used barchart + graph_title = f'Top Ten {role_link} by Number of Uses Compared to {title}' - print(data['complete_sbol']) - # Use the `requests` package to GET this URL -> it will return SBOL - # Load this SBOL into PySBOL to manipulate and interact with - # Build SPARQL queries and post them to /sparql - # Get the results back and save things to the static/ directory here + #name file + filename2 = os.path.join(temp_dir.name, "Most_Used_Type.html") + + #create the most used barchart + bar_plot('title','count','color',bar_df, graph_title, filename2, 'deff') - htmlToReturn = "" - htmlToReturn += '' + #retrieve html + by_role = retrieve_html(filename2) - something = False - if something: - htmlToReturn = "WOW!" + #create bar toggle html + toggle_display = toggle_bars(most_used,by_role) - print("Hi, I'm not broken yet") - return htmlToReturn + return toggle_display + except Exception as e: + print(e) + abort(400) diff --git a/bar_plot.py b/bar_plot.py new file mode 100644 index 0000000..177bfdb --- /dev/null +++ b/bar_plot.py @@ -0,0 +1,81 @@ +import plotly +import plotly.graph_objs as go + +def bar_plot (names,freq, colour, bar_df, graph_title, filename, url_link): + """ + Uses the plotly library to plot a bargraph with the colour determined by one input + and the names linking out to the appropriate pages. + + Requirements + ------- + plotly + plotly.graph_objs as go + + Parameters + ---------- + names : string + the column name that contains the 'names' of each of the bars, i.e. the text to display below each bar + freq : string + the column name that contains the frequency or count for each bar (determines bar height) + colour : string + name of the column in bar_df which contains the colours for each of the in bars + url_link : string + name of the column in bar_df which contains the urls which the bar labels will link out to when clicked + bar_df : pandas dataframe, shape (n, 4 ) + Dataframe with the bar graph information. It needs at least 4 columns with the names provided in names (string), + freq (number), colour (string e.g. 'rgba(4,187,61,1)'), and url_link (string) + graph_title: string + Title to put on the bar graph + filename : string + File location where the html document containing the bargraph is saved (e.g. 'current_working_directory//bar_graph_name.html') + + Returns + ------- + Nothing is returned but an image is created at the address given by the filename + + + Example + -------- + import pandas as pd + import os + d = {'bar_labels': ['cats', 'dogs'], 'frequency': [3, 4], 'colours':[ 'rgba(0, 140, 255, 1)', ' rgba(226, 47, 129, 1)'], + 'urls':['https://en.wikipedia.org/wiki/Cat','https://en.wikipedia.org/wiki/Dog']} + df = pd.DataFrame(data=d) + + cwd = os.getcwd() + filename = os.path.join(cwd, 'Cats_versus_Dogs.html' + bar_plot ('bar_labels','frequency', 'colours', df, graph_title, filename, 'urls') + """ + + #drop any blank names or links + xnames = bar_df[names].dropna(axis=0, how='any') + xlinks = bar_df[url_link].dropna(axis=0, how='any') + + #create a label with the title of the part displayed linking out to + #the link specified by url_link + sourcethings = ''+xnames+'' + + #format data as needed + #https://plotly.github.io/plotly.py-docs/generated/plotly.graph_objects.bar.html + + data = go.Bar( + x= sourcethings, #names for each of the bars + y=bar_df[freq].dropna(axis=0, how='any'), #heights for each of the bars + hoverinfo = "y", #if you hover over the bar the information from the y axis is given, i.e. the count + marker=dict(color=bar_df[colour].dropna(axis=0, how='any')), #colours for bars + ) + + #provide data in the appopriate format + data = [data] + + #https://plotly.github.io/plotly.py-docs/generated/plotly.graph_objects.layout.title.html + layout = go.Layout(title = graph_title) #set graph title + + #put all of the information together + fig = go.Figure(data=data, layout=layout) + + #create the graph + #need filename =filename not just filename, otherwise a random filename is generated + plotly.offline.plot(fig, filename = filename, auto_open=False) + + return diff --git a/capitalise_each_word.py b/capitalise_each_word.py new file mode 100644 index 0000000..4728317 --- /dev/null +++ b/capitalise_each_word.py @@ -0,0 +1,38 @@ +def capitalise_each_word(str): + """ + Capitalises each character after a space, and the first character, of a string + + Requirements + ------- + None + + Parameters + ---------- + str : string + the string to be capitalised + Returns + ------- + newstr: string + The newly capitalised string + + Example + -------- + intro_str = 'hello world. how are you today?' + new_intro = capitalise_each_word(intro_str) + + Output: 'Hello World. How Are You Today?' + """ + #a is a list of the indexes of spaces in the string + a = [index for index, character in enumerate(str) if character == ' '] + + #initiate an empty string + newstr = "" + + #for first letter or letters after spaces capitalise the letter + for i in range(0, len(str)): + if i-1 in a or i == 0: + newstr = newstr + str[i].capitalize() + else: + newstr = newstr + str[i] + + return(newstr) diff --git a/find_role_name.py b/find_role_name.py new file mode 100644 index 0000000..07d51a7 --- /dev/null +++ b/find_role_name.py @@ -0,0 +1,71 @@ +import requests +from bs4 import BeautifulSoup +from capitalise_each_word import capitalise_each_word + +def find_role_name(role_number, plural = False): + """ + Use the role_number (e.g. 0000167) to find the human readable name for the role and create an html link to the ontology + explaining the human readable name + + Requirements + ------- + import requests + from bs4 import BeautifulSoup + from capitalise_each_word import capitalise_each_word + + Parameters + ---------- + role_number : string + the sequence ontology number for the role (e.g. 0000167) + plural : boolean, default:False + Returns the html for a url with the visible text in plural (e.g. if plural=True returns Promoters instead of Promoter) + + Returns + ------- + role_link: string + a piece of html with a clickable link (e.g. Role) + + Example + -------- + number = '0000316' + role_link = find_role_name(number, plural = True) + + Output: "CDSs" + """ + + #url to get the part name + url = 'http://www.ontobee.org/ontology/SO?iri=http://purl.obolibrary.org/obo/SO_'+role_number + + #get the part type ontology page information + response = requests.get(url) + + # parse html + page = str(BeautifulSoup(response.content, "lxml")) + + #find the location of the start of the particular page + a = page.find('\n + +
+ +
+ + + """ + + #open the htmllink file + fl = open(html_link, "r") + + #read in the html + read_html = fl.read() + + return (read_html) diff --git a/sankey.py b/sankey.py new file mode 100644 index 0000000..3571f6b --- /dev/null +++ b/sankey.py @@ -0,0 +1,287 @@ +import requests +import json +import pandas as pd +from pandas.io.json import json_normalize +from uri_to_url import uri_to_url + +def sankey(url, uri, title, instance): + """ + This function creates the table needed to make the sankey diagram + to create the sankey diagram two linked tables are needed + 1) about nodes: indexes, names and colours + 2) about the links: from node a (index), to node b (index), width, colour + + Requirements + ------- + import requests + import json + import pandas as pd + from pandas.io.json import json_normalize + Preceding_Percent_Query.txt + + + Parameters + ---------- + url : string + the url that links to the part, note that due to spoofing it may not be the same as the uri + e.g. url = 'https://dev.synbiohub.org/public/igem/BBa_E0040/1' (uri may be https://synbiohub.org/public/igem/BBa_E0040/1) + uri : string + the unique identifier of a part, note that due to spoofing it may not be the same as the url + e.g. uri = 'https://synbiohub.org/public/igem/BBa_E0040/1' (url may be https://dev.synbiohub.org/public/igem/BBa_E0040/1) + title: string + The human readable name of the poi e.g. 'GFP' + instance : string + the synbiohub instance where information is to be retrieved from (where the sparql query is to be run) + e.g. 'https://synbiohub.org/' + + Returns + ------- + skey: pandas dataframe, shape(n, 7) + Dataframe with the columns: 'Source' (integer, from here), 'Target' (integer, to here), 'Value' (integer, width of link), + 'Color' (string, node colour (hex) e.g. #04BB3D), 'Node, Label' (str, name of the node e.g. GFP), + 'Link' (str, link for the node e.g. https://synbiohub.org/public/igem/BBa_R0040/1), + 'Link Color' (string, (hex) e.g. rgba(4,187,61,0.5) + + Example + -------- + uri = 'https://synbiohub.org/public/igem/BBa_E0040/1' + url = 'https://dev.synbiohub.org/public/igem/BBa_E0040/1' + instance = 'https://dev.synbiohub.org/' + title = 'GFP' + + skey = sankey(url, uri, title, instance) + """ + + #read in the sparql query to perform + fl = open("Preceding_Percent_Query.txt", "r") + sparqlquery = fl.read() + + #substitute in the name of the particular part + sparqlquery = sparqlquery.replace('https://synbiohub.org/public/igem/BBa_E0040/1',uri) + r = requests.post(instance+"sparql", data = {"query":sparqlquery}, headers = {"Accept":"application/json"}) + + #reformat query results + d = json.loads(r.text) + order_df = json_normalize(d['results']['bindings']) + + #rename columns + rename_dict = {'average_preceeding.datatype':'ad', 'average_preceeding.type':'at', 'average_preceeding.value':'centfol', 'count.datatype':'cd', 'count.type':'ct', 'count.value':'count', 'def2.type':'dt', 'def2.value':'deff', 'displayId.type':'dt1', 'displayId.value':'displayId', 'role.type':'rt', 'role.value':'roletog', 'title.type':'tt', 'title.value':'title'} + order_df.columns = [rename_dict[col] for col in order_df.columns] + + #drop unneeded columns + order_df = order_df.drop(['ad', 'at', 'cd', 'ct', 'dt', 'dt1', 'rt', 'tt'], axis=1) + + """ + #columns left are: + #'centfol' - percentage of the instance of co-occurance where + #this part follows the part of interest (0 means all instances + #have the poi following this part) + #'count' - total number of part co-occurances + #'deff'- uri of the part + #'displayId' - display id of the part + #'roletog' - role of the part (e.g. http://identifiers.org/so/SO:0000141) + #'title' - human name of the part + """ + + #change number columns from strings to number type + order_df['count'] = order_df['count'].apply(pd.to_numeric) + order_df['centfol'] = order_df['centfol'].apply(pd.to_numeric) + + #makes sure uris point to the correct instance (even for dev.synbiohub.org) + #if spoofing is happening the uri instance is different than the instance + spoofed_instance = uri[:uri.find('/', 8)+1] + order_df['deff'] = uri_to_url(order_df['deff'], instance, spoofed_instance) + + #parts which have no title have the title field filled in using the displayId field + order_df.title[order_df.title.isnull()] = order_df.displayId[order_df.title.isnull()] + + #Break the dataframe down into dataframes for: + #promoters, rbs, cds, terminators, and other parts + + #df with just promoters + prom = order_df.loc[order_df['roletog'] == 'http://identifiers.org/so/SO:0000167'] + + #df with just rbs + rbs = order_df.loc[order_df['roletog'] == 'http://identifiers.org/so/SO:0000139'] + + #df with just cds + cds = order_df.loc[order_df['roletog'] == 'http://identifiers.org/so/SO:0000316'] + + #df with just terminators + term = order_df.loc[order_df['roletog'] == 'http://identifiers.org/so/SO:0000141'] + + #remove all parts that were in any of the other part type data frames + other = order_df[order_df['roletog'] != 'http://identifiers.org/so/SO:0000167'] + other = other[other['roletog'] != 'http://identifiers.org/so/SO:0000139'] + other = other[other['roletog'] != 'http://identifiers.org/so/SO:0000316'] + other = other[other['roletog'] != 'http://identifiers.org/so/SO:0000141'] + + #create a list containing the previous data frames + part_array = [prom, rbs, cds, term, other] + + #array of the lengths of the above + part_array_lengths = [len(prom), len(rbs), len(cds), len(term), len(other)] + + #set up colours for links and nodes + #slightly transparent ones(green, purple, blue, red, orange) + link_colours = ["rgba(4,187,61,0.5)", "rgba(149,110,219,0.5)", "rgba(119,157,205,0.5)", "rgba(202,58,32,0.5)", "rgba(255, 128,0,0.5)"] + + #fully coloured ones(green, purple, blue, red, orange) + node_colours = ["#04BB3D", "#956EDB", "#779DCC", "#CA3A20", "#FF8000"] + + #colour of the node for the part itself (black) + part_colour = "#000000" + + #initialise lists + source = [] + list_target = [] + value = [] + percent = [] + multiplier = [] + list_link_colour = [] + len_part_type = [] + len_part_type = [] + target = 5 #target starts at 5 as the node with index 5 is the first promoter + # (after preceeding __ nodes for each type) + + #list of node colours starts with the five colours as the first + #five nodes are the outflow ones for preceeding in each category + list_node_colour = node_colours + + #first five node names + node_label = ["Preceeding Promoters", "Preceeding RBS", "Preceeding CDS", "Preceeding Terminator", "Preceeding Other"] + + #the first five nodes don't have a url link + node_url = ["NA", "NA","NA","NA", "NA"] + + + """part type to preceding parts""" + for part_type_index in range(0,5): + #make sure no length used is greater than 10 + len_part_type += [min(part_array_lengths[part_type_index], 10)] + + #iterate over the parts upto ten in the array prom, rbs, cds, term, other + for index in range(0,len_part_type[part_type_index]): + #add a link colour for the link from preceeding label to node in this list + list_link_colour.append(link_colours[part_type_index]) + + #add a node colour for the node of this list + list_node_colour.append(node_colours[part_type_index]) + + #add the label to the list of node labels (e.g. p(tetR)) + node_label.append(part_array[part_type_index]['title'].iloc[index]) + + #add the uri to the list of node uris + node_url.append(part_array[part_type_index]['deff'].iloc[index]) + + #add the percentage following to the list of percentages + percent.append(part_array[part_type_index]['centfol'].iloc[index]) + + #add the count of co-occurances to the list of counts + multiplier.append(part_array[part_type_index]['count'].iloc[index]) + + + #add the counts*(1-percent) to the list of link widths + value.append((part_array[part_type_index]['count'].iloc[index])*(1-part_array[part_type_index]['centfol'].iloc[index])) + + #the links all start from index associated with the preceeding number + source.append(part_type_index) + + #add target to list of targets + list_target.append(target) + + #increment target by one + target += 1 + + + + """Preceding parts to POI""" + #tells the number of links made so far + #how many promoters, rbs, cds, terminators, and other parts preceed the poi + num_colocated_parts = len(list_link_colour) + + #as the links to and from preceeding parts + #all must have the same colour in the same order + #just double the list of the link colours + list_link_colour += list_link_colour + + #for the same reason add the values currently in values to the end + #of the values link again (links are the same width) + value += value + + #the targets from preceeding must become the source going to the poi + #so add the targets to the end of the source list + source += list_target + + #the target for each of them is the poi so add that + list_target += [target]*num_colocated_parts + + """Add POI node""" + node_label.append(title) + + #add the uri/url link + node_url.append(url) + + #add the colour of the part node + list_node_colour.append(part_colour) + + """POI to following""" + #source will be from poi (and the source is used as many times as there were parts + #found colocated with the poi, upto 50) + source += [target]*num_colocated_parts + + #target list will be the same length as the original number of nodes added + #and continous from there + list_target += list(range(target+1, target+num_colocated_parts+1)) + + #add link colours (same as for preceeding inbound links) + list_link_colour += list_link_colour[:num_colocated_parts] + + #add link width based on count and the percentage not preceeding + value += [(p)*m for p,m in zip(percent,multiplier)] + + + #add node labels same as the preceeding ones + node_label += node_label[5:-1] + + #add node colours (same as for preceeding colocated parts) + list_node_colour += list_node_colour[5:-1] + + #add node uris (same as for preceeding colocated parts) + node_url += node_url[5:-1] + + """Following to Following labels""" + """link colour, link width, source, target, node labels, node colours, node uris""" + #source will be from colocated parts to following parts groups + #(e.g. following terminators) + source += list(range(target+1, target+num_colocated_parts+1)) + + #target list will be based on the original groups of the preceeding parts + #but shifted to take into account the current point in the list + list_target += [x +target+num_colocated_parts+1 for x in source[:num_colocated_parts]] + + #add link colours (same as for preceeding inbound links) + list_link_colour += list_link_colour[:num_colocated_parts] + + #add link width based on count and the percentage not preceeding + value += [(p)*m for p,m in zip(percent,multiplier)] + + #add node labels for following groups + node_label += ["Following Promoters", "Following RBS", "Following CDS", "Following Terminator", "Following Other"] + + #final node colours are based on the groups + list_node_colour += ["#04BB3D", "#956EDB", "#779DCC", "#CA3A20", "#FF8000"] + + #final group nodes have no uri/urls + node_url += ["NA", "NA","NA","NA", "NA"] + + + """Make DataFrame""" + skey = pd.DataFrame(data =[source, list_target, value, list_node_colour, + node_label, node_url, list_link_colour], + index =["Source", "Target", "Value","Color","Node, Label", + "Link","Link Color"]) + + #transpose so columns are source, target, etc + skey = skey.T + return(skey) diff --git a/sankey_graph.py b/sankey_graph.py new file mode 100644 index 0000000..0806b48 --- /dev/null +++ b/sankey_graph.py @@ -0,0 +1,139 @@ +import plotly + +def sankey_graph(filename, component_df, node_label_col, url_col, + node_colour_col, source_col,target_col,value_col, + link_colour_col, graph_title, url_not_name=True ): + """ + Uses the plotly library to plot a sankey graph with the colour and width of links + determined by the input as well as the colour of the nodes. + + Requirements + ------- + import plotly + + Parameters + ---------- + filename : string + File location where the html document containing the sankey graph is saved (e.g. 'current_working_directory//sankey_name.html') + component_df: pandas dataframe, shape(n, 7) + Dataframe with the columns: 'Source' (integer, from here), 'Target' (integer, to here), 'Value' (integer, width of link), + 'Color' (string, node colour (hex) e.g. #04BB3D), 'Node, Label' (str, name of the node e.g. GFP), + 'Link' (str, link for the node e.g. https://synbiohub.org/public/igem/BBa_R0040/1), + 'Link Color' (string, (hex) e.g. rgba(4,187,61,0.5) + node_label_col: string + the component_df column name that contains the 'names' of each of the nodes, i.e. the text to display above each node + url_col: string + the column name for the column in component_df that contains the urls for the nodes to link out to (these + links will only be used if url_not_name is False - though currently there is a bug in plotly so it doesn't work) + source_col: string + the component_df column name that contains the 'sources' of each of the links. + For example if 0 then a link will be formed from the node described in row zero (goes to the node referenced + by the target in the same row as the source) + target_col: string + the component_df column name that contains the 'targets' of each of the links. + For example if 1 then a link will be formed to the node described in row zero (goes from the node referenced + by the source in the same row as the target) + value_col: string + the component_df column name that contains the widths of each of the links (it refers to the width of the link + described by the corresponding source and target in the row) + link_colour_col: string + the component_df column name that contains the colours of each of the links (it refers to the colour of the link + described by the corresponding source and target in the row) + graph_title: string + Title to put on the bar graph + url_not_name: Boolean, default:True + When true nodes are labelled with clickable links rather than just names (currenlty there is a bug in plotly so + this doesn't work) + + Returns + ------- + Nothing is returned but an image is created at the address given by the filename + + Example + -------- + import pandas as pd + import os + + diction = {'Source': [0, 1, 2, 3, 4, 5, 6, nan], + 'Target': [5, 5, 6, 6, 6, 7, 7, nan], + 'Value': [100.0, 100.0, 100.0, 100.0, 100.0, 200.0, 300.0, nan], + 'Color': ['#04BB3D', '#04BB3D', '#956EDB', '#956EDB', '#956EDB', '#04BB3D', '#956EDB', '#CA3A20'], + 'Node, Label': ['Cat', 'Dog', 'Gold Fish', 'Tuna', 'Salmon', 'Mammals', 'Fish', 'Animals'], + 'Link': ['https://en.wikipedia.org/wiki/Animal','https://en.wikipedia.org/wiki/Cat','https://en.wikipedia.org/wiki/Dog', + 'https://en.wikipedia.org/wiki/Goldfish','https://en.wikipedia.org/wiki/Tuna', + 'https://en.wikipedia.org/wiki/Salmon', 'https://en.wikipedia.org/wiki/Mammal', + 'https://en.wikipedia.org/wiki/Fish'], + 'Link Color': ['rgba(4,187,61,0.5)', 'rgba(4,187,61,0.5)', 'rgba(4,187,61,0.5)', 'rgba(4,187,61,0.5)', + 'rgba(4,187,61,0.5)', 'rgba(4,187,61,0.5)', 'rgba(4,187,61,0.5)', nan]} + + component_df = pd.DataFrame.from_dict(diction) + + cwd = os.getcwd() + filename = os.path.join(cwd, 'Animal_Groups_Sankey.html') + + sankey_graph(filename, component_df, 'Node, Label', 'Link', 'Color', 'Source', 'Target', 'Value', + 'Link Color', 'Animal Groups', url_not_name=False) + + + Output: + (Sankey connections will look like this (in colour and with appropriate thicknesses though) + + Cat ------------\ + Mamals --------\ + Dog ------------/ \ + Animals + Gold Fish ------\ / + Tuna -----------Fish ----------/ + Salmon ---------/ + """ + + #removes any NAs from the list of node names + xnames = component_df[node_label_col].dropna(axis=0, how='any') + + #removes any pandas NAs from the list of links + xlinks = component_df[url_col].dropna(axis=0, how='any') + + #if url_not_name is true than uses an html version that + #displays the names but links to the urls + if url_not_name: + sourcethings = ''+xnames+'' + else: + sourcethings = xnames + + #make the data package for plotly to use + #https://plotly.github.io/plotly.py-docs/generated/plotly.graph_objects.Sankey.html + + data_trace = dict( + type='sankey', + domain = dict( + x = [0,1], #indicates the plot streches from x = 0 to x = 1 + y = [0,1]), #indicates the plot streches from y = 0 to y = 1 + orientation = "h", #Sets the orientation of the Sankey diagram - h = horizontal + valueformat = ".0f", #Sets the label value formatting rule using d3 formatting mini-language + arrangement = "perpendicular", #nodes can only move perpendicular to direction of flow, so preserve 'columns' in this case + node = dict( + pad = 10, #Sets the padding (in px) between the nodes + thickness = 30, #Sets the thickness, here it means the nodes are 30 pixels wide + line = dict( #outlines nodes with a black 0.5 line + color = "black", + width = 0.5), + label = sourcethings, #the labels to use for the nodes + color = component_df[node_colour_col].dropna(axis=0, how='any'),), #colours of nodes + link = dict( + source = component_df[source_col].dropna(axis=0, how='any'), #link goes from this source + target = component_df[target_col].dropna(axis=0, how='any'), #to this target + value = component_df[value_col].dropna(axis=0, how='any'), #with this thickness/width + color = component_df[link_colour_col].dropna(axis=0, how='any'),)) #in this colour + + layout = dict( + title = graph_title, #add a title to the figure + height = 772, #set figure height + width = 950, #set figure width + font = dict(size = 10),) #set fontsize to 10 + + fig = dict(data=[data_trace], layout=layout) + + #create the graph + #need filename =filename not just filename, otherwise a random filename is generated + plotly.offline.plot(fig, filename=filename, auto_open=False) + return diff --git a/static/scary-happy.jpg b/static/scary-happy.jpg deleted file mode 100644 index 01b95a7..0000000 Binary files a/static/scary-happy.jpg and /dev/null differ diff --git a/toggle_bars.py b/toggle_bars.py new file mode 100644 index 0000000..43b7e66 --- /dev/null +++ b/toggle_bars.py @@ -0,0 +1,91 @@ +def toggle_bars(bar1,bar2, save_html = False, filename=None): + """ + Combines two pieces of html and shows them depending on the toggle state + + Requirements + ------- + Toggle_Switch_html.txt + + Parameters + ---------- + bar1 : string + An html formatted string for the content shown when the toggle is not selected (the default setting). + The html must include '\n\n' and + '\n\n\n
\n '. + bar2: string + An html formatted string for the content shown when the toggle is selected (not the default setting). + The html must include '
\n\n' and + '\n\n\n
\n '. + save_html: boolean, default:False + if True then the html toggle document will be saved, if false then no document with the html string will be saved + filename : string, default: None + File location where the html document containing the toggled is saved (e.g. 'current_working_directory//toggle_name.html') + This is only used if save_html is True. Example of possible filename: 'current_working_directory\\toggle_display.html' + + Returns + ------- + display: string + An html string which contains a toggle element and two pieces of html to vary between + + Example + -------- + import os + + #current working directory + cwd = os.getcwd() + + #create input data + self_df, display_id, title, role, count = input_data(uri, instance) + + #create and format data for the most_used barchart + bar_df = most_used_bar(uri, instance, display_id, title, role, + count) + + #where to save the file + filename1= os.path.join(cwd, f'bar1_{display_id}.html') + + #create the most used barchart + bar_plot('title','count','color',bar_df, 'Top Ten Parts by Number of Uses', filename1, 'deff') + + #retrieve html + most_used = retrieve_html(filename1) + + bar_df = most_used_by_type_bar(uri,instance, display_id, title, + role, count) + + #where to save the file + filename2= os.path.join(cwd, f'bar2_{display_id}.html') + + #create the most used barchart + bar_plot('title','count','color',bar_df, 'Top Ten by Number of Uses', filename2, 'deff') + + #retrieve html + by_role = retrieve_html(filename2) + + #create bar toggle html + toggle_display = toggle_bars(most_used,by_role) + """ + + htmls = [bar1, bar2] + + for index, bar_html in enumerate(htmls): + #remove header and footer of the html + bar_html = bar_html.replace('
\n\n','') + bar_html = bar_html.replace('\n\n\n
\n ','') + htmls[index] = bar_html + + #read in toggle html text + fl = open("Toggle_Switch_html.txt", "r") + display = fl.read() + + #put in the two bargraphs + display = display.replace('

Frequency

',htmls[0]) + display = display.replace('

By type

',htmls[1]) + + #saves html toggle file + if save_html: + Html_file= open(filename,"w") + Html_file.write(display) + Html_file.close() + + return (display) diff --git a/uri_to_url.py b/uri_to_url.py new file mode 100644 index 0000000..a60c451 --- /dev/null +++ b/uri_to_url.py @@ -0,0 +1,71 @@ +import pandas as pd + +def uri_to_url(data, instance, spoofed_instance): + """ + Change the instance that is being refered to in the data to the instance given by instance if the current instance + they are referencing is the spoofed_instance + + Requirements + ------- + import pandas as pd + + Parameters + ---------- + data : pandas series OR string + The input with urls that must be changed. The cells/string should containg string(s) of the format: + 'https://something_to_be_replaced/something_to_keep' + instance : string + the synbiohub instance where information is to be retrieved from + e.g. 'https://synbiohub.org/' (note it must have the https:// and the / at the start/end) + spoofed_instance : string + the synbiohub instance that it is pretending to be + e.g. 'https://dev.synbiohub.org/' pretends to be 'https://synbiohub.org/' + + Returns + ------- + data: pandas series OR string + The df column/string with uris converted to urls of the format: 'instance something_to_keep', + e.g. 'https://synbiohub.org/someting_to_keep' + + Example + -------- + import pandas as pd + + lst = ['https://shouldnt_change.org/public/igem/BBa_E1010/1', 'https://synbiohub.org/public/igem/BBa_C0051/1', + 'https://synbiohub.org/public/igem/BBa_C0040/1'] + series = pd.Series(lst) + + new_series = uri_to_url(series, 'https://dev.synbiohub.org/', 'https://synbiohub.org/') + + Output: + 0 https://shouldnt_change.org/public/igem/BBa_E1010/1 + 1 https://dev.synbiohub.org/public/igem/BBa_C0051/1 + 2 https://dev.synbiohub.org/public/igem/BBa_C0040/1 + """ + #checks if any changes need to be made + if spoofed_instance != instance: + + #finds the data type of the input data + data_type = type(data) + + #case that it is a column of a pandas dataframe (i.e. it is a Pandas Series) + if data_type == pd.core.series.Series: + + for idx, deff in data.items(): + #find the instance used int the cell + uri_instance = deff[:deff.find('/', 8)+1] + + #if the current instance is the spoofed_instance we want to change replace it with the new instance + if uri_instance == spoofed_instance: + data[idx] = deff.replace(uri_instance, instance) + + #case that it is a simple string + elif data_type == str: + #find the instance used int the cell + uri_instance = data[:data.find('/', 8)+1] + + #if the current instance is the spoofed_instance we want to change replace it with the new instance + if uri_instance == spoofed_instance: + data = data.replace(uri_instance, instance) + + return(data)