Workshopping some names.

#Cytogenetic
#Ideogram
#Karyotype
#Structural
#Variant

Download wikimedia chromosomes, download NCBI genome assembly table with bands, apply locations to pixel heights (by percentage), cut and paste chromosomes around according to karyotype, ooo maybe change the color too

send a HTTP request to the server and save

the HTTP response in a response object called r

Set up environment

mkdir ~/autoideo
cd ~/autoideo
conda create -n autoideo
conda activate autoideo
pip3 install wget svgpathtools pandas cairosvg

brew install imagemagick -i; ./configure –disable-osx-universal-binary –prefix=/opt/homebrew/Cellar/imagemagick/7.1.1-3 –disable-silent-rules –with-x11 make install

import pandas as pd
import wget
import seaborn as sns
import os
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import colorsys
os.chdir('/Users/rmulqueen/autoideo')

"""Color set up"""
def rgb_to_hex(r, g, b):
    return '#{:02x}{:02x}{:02x}'.format(r, g, b)

def lighten_bands(band_color,lighten=False):
	"""Function to apply staining saturation to chromosome defined colors or maintain saturated value for outline"""
	for i in range(0,len(band_color)):
		band_tmp=list(band_color[i])
		if lighten==True:
			band_tmp[1]=1-(band_tmp[1] * iscn.loc[iscn.index[i],'density']/100)
		band_tmp_rgb=colorsys.hls_to_rgb(band_tmp[0],band_tmp[1],band_tmp[2])
		band_tmp_hex=rgb_to_hex(int(band_tmp_rgb[0]*255),int(band_tmp_rgb[1]*255),int(band_tmp_rgb[2]*255))
		band_color[i]=band_tmp_hex
	return(band_color)

def set_chr_colors(iscn,pallet="husl"):
	""" Function to set colors per chromosome """
	chr_list=iscn['#chromosome'].unique()
	chr_list=[str(i) for i in chr_list]
	pal = sns.color_palette(pallet, len(chr_list))
	chr_color = dict(zip(chr_list, [colorsys.rgb_to_hls(i[0],i[1],i[2]) for i in pal]))
	band_color=[chr_color[i] for i in list(iscn["#chromosome"])] #add band color based on chr_color
	iscn["chr_color"]=lighten_bands(band_color,lighten=False)
	band_color=[chr_color[i] for i in list(iscn["#chromosome"])] #add band color based on band
	iscn["band_color"]=lighten_bands(band_color,lighten=True)
	return(iscn)

"""Input reference data"""
#Band to genomic loci data set from NCBI: ftp://ftp.ncbi.nlm.nih.gov/pub/gdp/ideogram_9606_GCF_000001305.14_850_V1
def download_geneloc_to_band():
	"""Function to download public domain NCBI chromosome band info"""
	table_ftp="ftp://ftp.ncbi.nlm.nih.gov/pub/gdp/ideogram_9606_GCF_000001305.14_850_V1"
	table_name=table_ftp.split("/")[-1]
	if table_name not in os.listdir():
		wget.download(table_ftp)
	else:
		print(table_name+" Found! Proceeding...")

"""Karyotype Building Functions"""
def get_sub_chr(chrom,arm,band,subband):
	"""Function to return subchr """
	subchr=iscn[iscn["#chromosome"]==chrom].copy() #subset by chr
	if arm == "q":
		p_arm=subchr[subchr["arm"]=="p"].copy() #grab all the p arm
		q_arm=subchr[subchr["arm"]=="q"].copy()
		q_arm=q_arm[(q_arm["large_band"].astype(int)<int(band)) | ((q_arm["large_band"].astype(int)==int(band)) & (q_arm["sub_band"].astype(int)<=int(subband)))].copy()
		merged_subchr=pd.concat([p_arm,q_arm])
	else:
		p_arm=subchr[subchr["arm"]=="p"].copy() #grab all the p arm
		p_arm=p_arm[(p_arm["large_band"].astype(int)<int(band)) | ((p_arm["large_band"].astype(int)==int(band)) & (p_arm["sub_band"].astype(int)<=int(subband)))].copy()
		merged_subchr=p_arm
	return(merged_subchr)
	####Think this through to grab the proper sections

def get_translocation_chr(N):
	"""Function to generate translocation chr, of format t(9;22)(q34.1;q11.2)"""
	out=pd.DataFrame()
	chr_in=N.split("t(")[1].split(")")[0].split(";")
	chr_in_boundaries=N.split("(")[2].split(")")[0].split(";")
	chr_in_boundary_arms=[i[0] for i in chr_in_boundaries] #isolate chr arms
	chr_in_boundary_bands=[i[1:3] for i in chr_in_boundaries] #isolate chr bands
	chr_in_boundary_subbands=[i[3] for i in chr_in_boundaries] #isolate chr subbands
	chr_in_boundary_subbands=[str(i).ljust(3,'0') for i in chr_in_boundary_subbands] #add 0 to expand precision if not included
	for n in range(0,len(chr_in)):
		chrom=chr_in[n]
		arm=chr_in_boundary_arms[n]
		band=chr_in_boundary_bands[n]
		subband=chr_in_boundary_subbands[n]
		out=pd.concat([out,get_sub_chr(chrom,arm,band,subband)])
	return(out)

#def get_derivative_chr(N):
#"""Function to generate translocation chr, of format +der(22)t(9;22)(q34.1;q11.2)"""

#def get_i_chr isocentric mirror chr

def set_up_chr(N):
	"""Return reference data on chrN"""
	if N in iscn['#chromosome'].unique():
		out=iscn[iscn['#chromosome'] == N].copy()
	elif N.startswith("t"):
		out=get_translocation_chr(N)
	return(out)

def set_up_karyotype(kary):
	i=0
	out_karyotype=pd.DataFrame()
	for N in karyotype:
		uniq_N=str(N)+"_"+str(i)
		out_N=set_up_chr(str(N))
		out_N['uniq_chr']=[uniq_N]*out_N.shape[0]
		i+=1
		out_karyotype=pd.concat([out_karyotype,out_N])
	return(out_karyotype)

### Set up reference Data ###
download_geneloc_to_band() #download geneloc table
iscn=pd.read_table("ideogram_9606_GCF_000001305.14_850_V1") #read in iscn band data
#ISCN description of bands includes stain information (including density) meaning we can scale chromosome bands on greyscale by density and then recolor
#Convert stain +/- and density to greyscale color
iscn.loc[iscn['density'].isnull(),['density']] = 0 #set gneg rows to 0 density
iscn['size']=iscn['iscn_stop']-iscn['iscn_start']
iscn['large_band']=[str(i).split(".")[0] for i in iscn['band']]
iscn['sub_band']=[str(str(i).split(".")[1].ljust(3,'0')) for i in iscn['band']] #add 0 to expand precision if not included
iscn["band_name"]=iscn["#chromosome"]+iscn["arm"]+iscn["band"].astype('string') #set band name for hover display

### Set band colors based on user pallet ###
iscn=set_chr_colors(iscn,pallet="Spectral") #supply chr color palette you want

### Build Karyotype ###
#Input is standard ISCN karyotype
#Example from cydas 47<2n>,XY,-7,+8,t(9;22)(q34.1;q11.2),i(17)(q10),+der(22)t(9;22)(q34.1;q11.2)
#Convert shorthand to long format
karyotype=["t(9;22)(q341;q112)",1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21,22,22,'X','Y']
karyotype=[str(i) for i in karyotype]
kary=set_up_karyotype(kary=karyotype)

kary_labels=dict(zip(kary['uniq_chr'].unique(),karyotype))

###

fig = px.bar(kary, x='uniq_chr', y='size', color="band_name",color_discrete_sequence=list(kary["band_color"]),hover_name="band_name",hover_data=['#chromosome','bp_start','bp_stop'])
fig=fig.update_yaxes(showgrid=False, zeroline=False, autorange="reversed")
fig=fig.update_xaxes(ticktext=[kary_labels[i] for i in kary_labels],
		tickvals=[i for i in kary_labels])
fig=fig.update_traces(marker_line_color=list(kary["chr_color"]),marker_line_width=1.5, opacity=0.4)
fig=fig.update_layout(showlegend=False)
fig.show()


#for separation of chr, might have to make chr groups and make subplots per chr group.



###Insert space between translocations, insert i and der and read karytype into long format. also reciprocal t, fix karyo order