diff --git a/divis/genetic_groups.py b/divis/genetic_groups.py deleted file mode 100644 index 6691db1607b0c164b435175c40e9d1c782f79eb7..0000000000000000000000000000000000000000 --- a/divis/genetic_groups.py +++ /dev/null @@ -1,114 +0,0 @@ -# Data analysis and manipulation library -import pandas as pd -import plotly.express as px -import os - -# System library to manipulate the file system -from os import path -from scripts.utils import write_excel -import shutil -from tqdm import tqdm -from scripts.quads import * - -df = pd.ExcelFile('data/semantic_cluster_coordinates6.xlsx') -df1 = pd.read_excel('data/input_data_file.xlsx') -df2 = pd.read_excel(df) -#quantitatives variables -quantitative =['Name (original)','Number of flowers per inflorescence'] -#qualitatives variables -qualitative = ['Name (original)', - 'Breeding period', - 'Geographic origin', - 'Horticultural group', - 'Ploidy', - 'Bush height', - 'Shape', - 'Quantity of prickles', - 'Perfume intensity', - 'Repeat flowering', - 'Quantity of bristles by branch', - 'Shine of upper face', - 'Corolla form', - 'Corolla size', - 'Color repartition', - 'Duplicature', - 'Petal color'] - -df_clus = df1[['Name (original)','Genetic group']] -df_clus = df_clus.fillna(0) -df_clus["Genetic group"]= df_clus["Genetic group"].astype(int) -df_quali = df1[qualitative] -df_quanti = df1[quantitative] - -#make the dataframe that contain only the qualitatives variables -dataframe_quali = df_quali.merge(df_clus) -dataframe_quali = dataframe_quali.fillna("missing values") - -#make the dataframe that contain only the quantitatives variables -dataframe_quanti = df_quanti.merge(df_clus) -dataframe_quanti = dataframe_quanti.rename(columns = {'Number of flowers per inflorescence' : 'Number_of_flowers_per_inflorescence'}) -#make the qualitative analysis -sdqualitative = sdquali(dataframe_quali, qualitative, 'Genetic group', 0.05) -sdqualitative=sdqualitative.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') -quali_a = quali_analysis(dataframe_quali, qualitative, 'Genetic group') -cm = clamod(quali_a,'Genetic group') -mc = modcla(quali_a,'Genetic group') -g = globa(quali_a) -pv = pvalue(quali_a,'Genetic group') -test_value = vtest(quali_a,'Genetic group',0.05) -test_value=test_value.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') -w = variable_weight(quali_a) -w=w.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - -#make the quantitative analysis for each quantitative variable -sd = sdquanti(dataframe_quanti,'Number_of_flowers_per_inflorescence', 'Genetic group') -sd = sd.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') -quanti_a = quanti_analysis(sd, dataframe_quanti,'Number_of_flowers_per_inflorescence', 'Genetic group',0.05,0.05) -quanti_a = quanti_a.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - -#out : -#create the new path for the result -if not os.path.exists('results/Genetic groups') : - os.makedirs('results/Genetic groups') -path = 'results/Genetic groups/' - -#name the files -file_name_x2 = 'x2_GG.xlsx' -file_name_qualitative = 'qualitative_analysis_GG.xlsx' -file_name_weight = 'weight_GG.xlsx' -file_name_anova = 'anova_GG.xlsx' -file_name_quantitative = 'quantitative_analysis_GG.xlsx' - -#create the excel files -write_excel(file_name_x2,'sheet', sdqualitative, idx=True) -write_excel(file_name_qualitative,'sheet',test_value,idx=True) -write_excel(file_name_weight,'sheet', w,idx=True) -write_excel(file_name_anova,'sheet', sd,idx=True) -write_excel(file_name_quantitative,'sheet', quanti_a, idx=True) - -#make the visualisations -data = pd.ExcelFile(file_name_qualitative) -col = {'overrepresented' : 'red', 'underrepresented' : 'blue', 'Not significant': 'grey'} -title = 'Proportions of modalities in each genetic group with Semantic distance' -df = pd.read_excel(data) -legend='' -for i in range (len(df)): - if legend == '' : - pass - else : - legend = legend+' ; ' - if df['variables'][i] =='Genetic group' : - legend= legend+ str(df['Genetic group'][i])+' : '+str(round(df['global'][i],2))+'%' -sunburst = px.sunburst(df, path=['Genetic group', 'variables', 'modalities'],values='mod/cla',title=title, color = 'signification',color_discrete_map=col) -sunburst.add_annotation(x=0,y=1.1,text=legend,font = dict(color='black',size=14),showarrow=False) -sunburst.add_annotation(x=0.2,y=1,text= 'Overrepresented',font = dict(color='red',size=14),showarrow=False) -sunburst.add_annotation(x=0.2,y=0.95,text= 'Underrepresented',font = dict(color='blue',size=14),showarrow=False) -sunburst.add_annotation(x=0.2,y=0.9,text= 'Not significant',font = dict(color='grey',size=14),showarrow=False) -sunburst.show() - -#move the files in the good directory -shutil.move(file_name_x2,path+file_name_x2) -shutil.move(file_name_qualitative,path+file_name_qualitative) -shutil.move(file_name_weight,path+file_name_weight) -shutil.move(file_name_anova,path+file_name_anova) -shutil.move(file_name_quantitative,path+file_name_quantitative) diff --git a/divis/gower5.py b/divis/gower5.py deleted file mode 100644 index c061ba63d9cf6214609b35ef700c430a006286fc..0000000000000000000000000000000000000000 --- a/divis/gower5.py +++ /dev/null @@ -1,124 +0,0 @@ -# Data analysis and manipulation library -import pandas as pd -import plotly.express as px -import os - -# System library to manipulate the file system -from os import path -from scripts.utils import write_excel -import shutil -from tqdm import tqdm -from scripts.quads import * - -df = pd.ExcelFile('data/gower_cluster_coordinates5.xlsx') -sheets = df.sheet_names -df1 = pd.read_excel('data/input_data_file.xlsx') -for sheet in tqdm(sheets) : - df2 = pd.read_excel(df, sheet) - #quantitatives variables - quantitative =['Name (original)','Number of flowers per inflorescence'] - #qualitatives variables - qualitative = ['Name (original)', - 'Breeding period', - 'Geographic origin', - 'Horticultural group', - 'Ploidy', - 'Bush height', - 'Shape', - 'Quantity of prickles', - 'Perfume intensity', - 'Repeat flowering', - 'Quantity of bristles by branch', - 'Shine of upper face', - 'Corolla form', - 'Corolla size', - 'Color repartition', - 'Duplicature', - 'Petal color'] - - df_quali = df1[qualitative] - df_quanti = df1[quantitative] - - #take the variable cluster from the second table - #with a merge from df1 to df2 - #merge : df1 = Name(original) ; df2 = Unnamed: 0 - #rename the df2 columns from Unnamed: 0 to Name(original) to make the merge - df2.rename(columns={'Unnamed: 0' : 'Name (original)'}, inplace = True) - columns_df2 = ['Name (original)','cluster'] - df2 = df2[columns_df2] - - #make the dataframe that contain only the qualitatives variables - dataframe_quali = df_quali.merge(df2) - dataframe_quali = dataframe_quali.fillna('missing values') - - #make the dataframe that contain only the quantitatives variables - dataframe_quanti = df_quanti.merge(df2) - dataframe_quanti = dataframe_quanti.rename(columns = {'Number of flowers per inflorescence' : 'Number_of_flowers_per_inflorescence'}) - - #make the qualitative analysis - sdqualitative = sdquali(dataframe_quali, qualitative, 'cluster', 0.05) - sdqualitative=sdqualitative.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - quali_a = quali_analysis(dataframe_quali, qualitative, 'cluster') - cm = clamod(quali_a,'cluster') - mc = modcla(quali_a,'cluster') - g = globa(quali_a) - pv = pvalue(quali_a,'cluster') - test_value = vtest(quali_a,'cluster',0.05) - test_value=test_value.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - w = variable_weight(quali_a) - w=w.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - - #make the quantitative analysis for each quantitative variable - sd = sdquanti(dataframe_quanti,'Number_of_flowers_per_inflorescence', 'cluster') - sd = sd.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - quanti_a = quanti_analysis(sd, dataframe_quanti,'Number_of_flowers_per_inflorescence', 'cluster',0.05,0.05) - quanti_a = quanti_a.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - - #out : - #create the new path for the result - if not os.path.exists('results/gower/cluster5') : - os.makedirs('results/gower/cluster5') - path = 'results/gower/cluster5/' - - #name the files - file_name_x2 = 'x2_gower_cluster5.xlsx' - file_name_qualitative = 'qualitative_analysis_gower_cluster5.xlsx' - file_name_weight = 'weight_gower_cluster5.xlsx' - file_name_anova = 'anova_gower_cluster5.xlsx' - file_name_quantitative = 'quantitative_analysis_gower_cluster5.xlsx' - - #create the excel files - write_excel(file_name_x2, sheet, sdqualitative, idx=True) - write_excel(file_name_qualitative, sheet, test_value,idx=True) - write_excel(file_name_weight, sheet, w,idx=True) - write_excel(file_name_anova, sheet, sd,idx=True) - write_excel(file_name_quantitative, sheet, quanti_a, idx=True) - -#make the visualisations -data = pd.ExcelFile(file_name_qualitative) -sheets = data.sheet_names -col = {'overrepresented' : 'red', 'underrepresented' : 'blue', 'Not significant': 'grey'} -for sheet in sheets : - title = 'Proportions of modalities in each clusters with Gower distance and '+sheet+' method' - df = pd.read_excel(data, sheet) - legend='' - for i in range (len(df)): - if legend == '' : - pass - else : - legend = legend+' ; ' - if df['variables'][i] =='cluster' : - legend= legend+ str(df['cluster'][i])+' : '+str(round(df['global'][i],2))+'%' - sunburst = px.sunburst(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=title, color = 'signification',color_discrete_map=col) - sunburst.add_annotation(x=0,y=1.1,text=legend,font = dict(color='black',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=1,text= 'Overrepresented',font = dict(color='red',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.95,text= 'Underrepresented',font = dict(color='blue',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.9,text= 'Not significant',font = dict(color='grey',size=14),showarrow=False) - sunburst.show() - -#move the files in the good directory -shutil.move(file_name_x2,path+file_name_x2) -shutil.move(file_name_qualitative,path+file_name_qualitative) -shutil.move(file_name_weight,path+file_name_weight) -shutil.move(file_name_anova,path+file_name_anova) -shutil.move(file_name_quantitative,path+file_name_quantitative) diff --git a/divis/gower6.py b/divis/gower6.py deleted file mode 100644 index 4b647a47d84c1b93d29541dafea0b5a3b044218c..0000000000000000000000000000000000000000 --- a/divis/gower6.py +++ /dev/null @@ -1,127 +0,0 @@ -# Data analysis and manipulation library -import pandas as pd -import plotly.express as px -import os - -# System library to manipulate the file system -from os import path -from scripts.utils import write_excel -import shutil -from tqdm import tqdm -from scripts.quads import * - -df = pd.ExcelFile('data/gower_cluster_coordinates6.xlsx') -sheets = df.sheet_names -df1 = pd.read_excel('data/input_data_file.xlsx') -for sheet in tqdm(sheets) : - df2 = pd.read_excel(df, sheet) - #quantitatives variables - quantitative =['Name (original)','Number of flowers per inflorescence'] - #qualitatives variables - qualitative = ['Name (original)', - 'Breeding period', - 'Geographic origin', - 'Horticultural group', - 'Ploidy', - 'Bush height', - 'Shape', - 'Quantity of prickles', - 'Perfume intensity', - 'Repeat flowering', - 'Quantity of bristles by branch', - 'Shine of upper face', - 'Corolla form', - 'Corolla size', - 'Color repartition', - 'Duplicature', - 'Petal color'] - - df_quali = df1[qualitative] - df_quanti = df1[quantitative] - - #take the variable cluster from the second table - #with a merge from df1 to df2 - #merge : df1 = Name(original) ; df2 = Unnamed: 0 - #rename the df2 columns from Unnamed: 0 to Name(original) to make the merge - df2.rename(columns={'Unnamed: 0' : 'Name (original)'}, inplace = True) - columns_df2 = ['Name (original)','cluster'] - df2 = df2[columns_df2] - - #make the dataframe that contain only the qualitatives variables - dataframe_quali = df_quali.merge(df2) - dataframe_quali = dataframe_quali.fillna('missing values') - - #make the dataframe that contain only the quantitatives variables - dataframe_quanti = df_quanti.merge(df2) - dataframe_quanti = dataframe_quanti.rename(columns = {'Number of flowers per inflorescence' : 'Number_of_flowers_per_inflorescence'}) - - #make the qualitative analysis - sdqualitative = sdquali(dataframe_quali, qualitative, 'cluster', 0.05) - sdqualitative=sdqualitative.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - quali_a = quali_analysis(dataframe_quali, qualitative, 'cluster') - cm = clamod(quali_a,'cluster') - mc = modcla(quali_a,'cluster') - g = globa(quali_a) - pv = pvalue(quali_a,'cluster') - test_value = vtest(quali_a,'cluster',0.05) - test_value=test_value.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - w = variable_weight(quali_a) - w=w.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - - #make the quantitative analysis for each quantitative variable - sd = sdquanti(dataframe_quanti,'Number_of_flowers_per_inflorescence', 'cluster') - sd = sd.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - quanti_a = quanti_analysis(sd, dataframe_quanti,'Number_of_flowers_per_inflorescence', 'cluster',0.05,0.05) - quanti_a = quanti_a.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - - #out : - #create the new path for the result - if not os.path.exists('results/gower/cluster6') : - os.makedirs('results/gower/cluster6') - path = 'results/gower/cluster6/' - - #name the files - file_name_x2 = 'x2_gower_cluster6.xlsx' - file_name_qualitative = 'qualitative_analysis_gower_cluster6.xlsx' - file_name_weight = 'weight_gower_cluster6.xlsx' - file_name_anova = 'anova_gower_cluster6.xlsx' - file_name_quantitative = 'quantitative_analysis_gower_cluster6.xlsx' - - #create the excel files - write_excel(file_name_x2, sheet, sdqualitative, idx=True) - write_excel(file_name_qualitative, sheet, test_value,idx=True) - write_excel(file_name_weight, sheet, w,idx=True) - write_excel(file_name_anova, sheet, sd,idx=True) - write_excel(file_name_quantitative, sheet, quanti_a, idx=True) - -#make the visualisations -data = pd.ExcelFile(file_name_qualitative) -sheets = data.sheet_names -col = {'overrepresented' : 'red', 'underrepresented' : 'blue', 'Not significant': 'grey'} -for sheet in sheets : - title = 'Proportions of modalities in each clusters with Gower distance and '+sheet+' method' - df = pd.read_excel(data, sheet) - legend='' - for i in range (len(df)): - if legend == '' : - pass - else : - legend = legend+' ; ' - if df['variables'][i] =='cluster' : - legend= legend+ str(df['cluster'][i])+' : '+str(round(df['global'][i],2))+'%' - sunburst = px.sunburst(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=title, color = 'signification',color_discrete_map=col) - sunburst.add_annotation(x=0,y=1.1,text=legend,font = dict(color='black',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=1,text= 'Overrepresented',font = dict(color='red',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.95,text= 'Underrepresented',font = dict(color='blue',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.9,text= 'Not significant',font = dict(color='grey',size=14),showarrow=False) - sunburst.show() - -#move the files in the good directory -shutil.move(file_name_x2,path+file_name_x2) -shutil.move(file_name_qualitative,path+file_name_qualitative) -shutil.move(file_name_weight,path+file_name_weight) -shutil.move(file_name_anova,path+file_name_anova) -shutil.move(file_name_quantitative,path+file_name_quantitative) - - - diff --git a/divis/gower7.py b/divis/gower7.py deleted file mode 100644 index 4e2d5f7c37471668d554047aade836be33d3770e..0000000000000000000000000000000000000000 --- a/divis/gower7.py +++ /dev/null @@ -1,124 +0,0 @@ -# Data analysis and manipulation library -import pandas as pd -import plotly.express as px -import os - -# System library to manipulate the file system -from os import path -from scripts.utils import write_excel -import shutil -from tqdm import tqdm -from scripts.quads import * - -df = pd.ExcelFile('data/gower_cluster_coordinates7.xlsx') -sheets = df.sheet_names -df1 = pd.read_excel('data/input_data_file.xlsx') -for sheet in tqdm(sheets) : - df2 = pd.read_excel(df, sheet) - #quantitatives variables - quantitative =['Name (original)','Number of flowers per inflorescence'] - #qualitatives variables - qualitative = ['Name (original)', - 'Breeding period', - 'Geographic origin', - 'Horticultural group', - 'Ploidy', - 'Bush height', - 'Shape', - 'Quantity of prickles', - 'Perfume intensity', - 'Repeat flowering', - 'Quantity of bristles by branch', - 'Shine of upper face', - 'Corolla form', - 'Corolla size', - 'Color repartition', - 'Duplicature', - 'Petal color'] - - df_quali = df1[qualitative] - df_quanti = df1[quantitative] - - #take the variable cluster from the second table - #with a merge from df1 to df2 - #merge : df1 = Name(original) ; df2 = Unnamed: 0 - #rename the df2 columns from Unnamed: 0 to Name(original) to make the merge - df2.rename(columns={'Unnamed: 0' : 'Name (original)'}, inplace = True) - columns_df2 = ['Name (original)','cluster'] - df2 = df2[columns_df2] - - #make the dataframe that contain only the qualitatives variables - dataframe_quali = df_quali.merge(df2) - dataframe_quali = dataframe_quali.fillna('missing values') - - #make the dataframe that contain only the quantitatives variables - dataframe_quanti = df_quanti.merge(df2) - dataframe_quanti = dataframe_quanti.rename(columns = {'Number of flowers per inflorescence' : 'Number_of_flowers_per_inflorescence'}) - - #make the qualitative analysis - sdqualitative = sdquali(dataframe_quali, qualitative, 'cluster', 0.05) - sdqualitative=sdqualitative.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - quali_a = quali_analysis(dataframe_quali, qualitative, 'cluster') - cm = clamod(quali_a,'cluster') - mc = modcla(quali_a,'cluster') - g = globa(quali_a) - pv = pvalue(quali_a,'cluster') - test_value = vtest(quali_a,'cluster',0.05) - test_value=test_value.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - w = variable_weight(quali_a) - w=w.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - - #make the quantitative analysis for each quantitative variable - sd = sdquanti(dataframe_quanti,'Number_of_flowers_per_inflorescence', 'cluster') - sd = sd.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - quanti_a = quanti_analysis(sd, dataframe_quanti,'Number_of_flowers_per_inflorescence', 'cluster',0.05,0.05) - quanti_a = quanti_a.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - - #out : - #create the new path for the result - if not os.path.exists('results/gower/cluster7') : - os.makedirs('results/gower/cluster7') - path = 'results/gower/cluster7/' - - #name the files - file_name_x2 = 'x2_gower_cluster7.xlsx' - file_name_qualitative = 'qualitative_analysis_gower_cluster7.xlsx' - file_name_weight = 'weight_gower_cluster7.xlsx' - file_name_anova = 'anova_gower_cluster7.xlsx' - file_name_quantitative = 'quantitative_analysis_gower_cluster7.xlsx' - - #create the excel files - write_excel(file_name_x2, sheet, sdqualitative, idx=True) - write_excel(file_name_qualitative, sheet, test_value,idx=True) - write_excel(file_name_weight, sheet, w,idx=True) - write_excel(file_name_anova, sheet, sd,idx=True) - write_excel(file_name_quantitative, sheet, quanti_a, idx=True) - -#make the visualisations -data = pd.ExcelFile(file_name_qualitative) -sheets = data.sheet_names -col = {'overrepresented' : 'red', 'underrepresented' : 'blue', 'Not significant': 'grey'} -for sheet in sheets : - title = 'Proportions of modalities in each clusters with Gower distance and '+sheet+' method' - df = pd.read_excel(data, sheet) - legend='' - for i in range (len(df)): - if legend == '' : - pass - else : - legend = legend+' ; ' - if df['variables'][i] =='cluster' : - legend= legend+ str(df['cluster'][i])+' : '+str(round(df['global'][i],2))+'%' - sunburst = px.sunburst(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=title, color = 'signification',color_discrete_map=col) - sunburst.add_annotation(x=0,y=1.1,text=legend,font = dict(color='black',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=1,text= 'Overrepresented',font = dict(color='red',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.95,text= 'Underrepresented',font = dict(color='blue',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.9,text= 'Not significant',font = dict(color='grey',size=14),showarrow=False) - sunburst.show() - -#move the files in the good directory -shutil.move(file_name_x2,path+file_name_x2) -shutil.move(file_name_qualitative,path+file_name_qualitative) -shutil.move(file_name_weight,path+file_name_weight) -shutil.move(file_name_anova,path+file_name_anova) -shutil.move(file_name_quantitative,path+file_name_quantitative) diff --git a/divis/semantic5.py b/divis/semantic5.py deleted file mode 100644 index 5e65e883df967a1f372858d5c30034a60a13bb45..0000000000000000000000000000000000000000 --- a/divis/semantic5.py +++ /dev/null @@ -1,124 +0,0 @@ -# Data analysis and manipulation library -import pandas as pd -import plotly.express as px -import os - -# System library to manipulate the file system -from os import path -from scripts.utils import write_excel -import shutil -from tqdm import tqdm -from scripts.quads import * - -df = pd.ExcelFile('data/semantic_cluster_coordinates5.xlsx') -sheets = df.sheet_names -df1 = pd.read_excel('data/input_data_file.xlsx') -for sheet in tqdm(sheets) : - df2 = pd.read_excel(df, sheet) - #quantitatives variables - quantitative =['Name (original)','Number of flowers per inflorescence'] - #qualitatives variables - qualitative = ['Name (original)', - 'Breeding period', - 'Geographic origin', - 'Horticultural group', - 'Ploidy', - 'Bush height', - 'Shape', - 'Quantity of prickles', - 'Perfume intensity', - 'Repeat flowering', - 'Quantity of bristles by branch', - 'Shine of upper face', - 'Corolla form', - 'Corolla size', - 'Color repartition', - 'Duplicature', - 'Petal color'] - - df_quali = df1[qualitative] - df_quanti = df1[quantitative] - - #take the variable cluster from the second table - #with a merge from df1 to df2 - #merge : df1 = Name(original) ; df2 = Unnamed: 0 - #rename the df2 columns from Unnamed: 0 to Name(original) to make the merge - df2.rename(columns={'Unnamed: 0' : 'Name (original)'}, inplace = True) - columns_df2 = ['Name (original)','cluster'] - df2 = df2[columns_df2] - - #make the dataframe that contain only the qualitatives variables - dataframe_quali = df_quali.merge(df2) - dataframe_quali = dataframe_quali.fillna('missing values') - - #make the dataframe that contain only the quantitatives variables - dataframe_quanti = df_quanti.merge(df2) - dataframe_quanti = dataframe_quanti.rename(columns = {'Number of flowers per inflorescence' : 'Number_of_flowers_per_inflorescence'}) - - #make the qualitative analysis - sdqualitative = sdquali(dataframe_quali, qualitative, 'cluster', 0.05) - sdqualitative=sdqualitative.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - quali_a = quali_analysis(dataframe_quali, qualitative, 'cluster') - cm = clamod(quali_a,'cluster') - mc = modcla(quali_a,'cluster') - g = globa(quali_a) - pv = pvalue(quali_a,'cluster') - test_value = vtest(quali_a,'cluster',0.05) - test_value=test_value.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - w = variable_weight(quali_a) - w=w.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - - #make the quantitative analysis for each quantitative variable - sd = sdquanti(dataframe_quanti,'Number_of_flowers_per_inflorescence', 'cluster') - sd = sd.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - quanti_a = quanti_analysis(sd, dataframe_quanti,'Number_of_flowers_per_inflorescence', 'cluster',0.05,0.05) - quanti_a = quanti_a.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - - #out : - #create the new path for the result - if not os.path.exists('results/semantic/cluster5') : - os.makedirs('results/semantic/cluster5') - path = 'results/semantic/cluster5/' - - #name the files - file_name_x2 = 'x2_semantic_cluster5.xlsx' - file_name_qualitative = 'qualitative_analysis_semantic_cluster5.xlsx' - file_name_weight = 'weight_semantic_cluster5.xlsx' - file_name_anova = 'anova_semantic_cluster5.xlsx' - file_name_quantitative = 'quantitative_analysis_semantic_cluster5.xlsx' - - #create the excel files - write_excel(file_name_x2, sheet, sdqualitative, idx=True) - write_excel(file_name_qualitative, sheet, test_value,idx=True) - write_excel(file_name_weight, sheet, w,idx=True) - write_excel(file_name_anova, sheet, sd,idx=True) - write_excel(file_name_quantitative, sheet, quanti_a, idx=True) - -#make the visualisations -data = pd.ExcelFile(file_name_qualitative) -sheets = data.sheet_names -col = {'overrepresented' : 'red', 'underrepresented' : 'blue', 'Not significant': 'grey'} -for sheet in sheets : - title = 'Proportions of modalities in each clusters with Semantic distance and '+sheet+' method' - df = pd.read_excel(data, sheet) - legend='' - for i in range (len(df)): - if legend == '' : - pass - else : - legend = legend+' ; ' - if df['variables'][i] =='cluster' : - legend= legend+ str(df['cluster'][i])+' : '+str(round(df['global'][i],2))+'%' - sunburst = px.sunburst(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=title, color = 'signification',color_discrete_map=col) - sunburst.add_annotation(x=0,y=1.1,text=legend,font = dict(color='black',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=1,text= 'Overrepresented',font = dict(color='red',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.95,text= 'Underrepresented',font = dict(color='blue',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.9,text= 'Not significant',font = dict(color='grey',size=14),showarrow=False) - sunburst.show() - -#move the files in the good directory -shutil.move(file_name_x2,path+file_name_x2) -shutil.move(file_name_qualitative,path+file_name_qualitative) -shutil.move(file_name_weight,path+file_name_weight) -shutil.move(file_name_anova,path+file_name_anova) -shutil.move(file_name_quantitative,path+file_name_quantitative) diff --git a/divis/semantic6.py b/divis/semantic6.py deleted file mode 100644 index 7d197bf7b8710f4ef991af714b96e085ca2bd12e..0000000000000000000000000000000000000000 --- a/divis/semantic6.py +++ /dev/null @@ -1,124 +0,0 @@ -# Data analysis and manipulation library -import pandas as pd -import plotly.express as px -import os - -# System library to manipulate the file system -from os import path -from scripts.utils import write_excel -import shutil -from tqdm import tqdm -from scripts.quads import * - -df = pd.ExcelFile('data/semantic_cluster_coordinates6.xlsx') -sheets = df.sheet_names -df1 = pd.read_excel('data/input_data_file.xlsx') -for sheet in tqdm(sheets) : - df2 = pd.read_excel(df, sheet) - #quantitatives variables - quantitative =['Name (original)','Number of flowers per inflorescence'] - #qualitatives variables - qualitative = ['Name (original)', - 'Breeding period', - 'Geographic origin', - 'Horticultural group', - 'Ploidy', - 'Bush height', - 'Shape', - 'Quantity of prickles', - 'Perfume intensity', - 'Repeat flowering', - 'Quantity of bristles by branch', - 'Shine of upper face', - 'Corolla form', - 'Corolla size', - 'Color repartition', - 'Duplicature', - 'Petal color'] - - df_quali = df1[qualitative] - df_quanti = df1[quantitative] - - #take the variable cluster from the second table - #with a merge from df1 to df2 - #merge : df1 = Name(original) ; df2 = Unnamed: 0 - #rename the df2 columns from Unnamed: 0 to Name(original) to make the merge - df2.rename(columns={'Unnamed: 0' : 'Name (original)'}, inplace = True) - columns_df2 = ['Name (original)','cluster'] - df2 = df2[columns_df2] - - #make the dataframe that contain only the qualitatives variables - dataframe_quali = df_quali.merge(df2) - dataframe_quali = dataframe_quali.fillna('missing values') - - #make the dataframe that contain only the quantitatives variables - dataframe_quanti = df_quanti.merge(df2) - dataframe_quanti = dataframe_quanti.rename(columns = {'Number of flowers per inflorescence' : 'Number_of_flowers_per_inflorescence'}) - - #make the qualitative analysis - sdqualitative = sdquali(dataframe_quali, qualitative, 'cluster', 0.05) - sdqualitative=sdqualitative.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - quali_a = quali_analysis(dataframe_quali, qualitative, 'cluster') - cm = clamod(quali_a,'cluster') - mc = modcla(quali_a,'cluster') - g = globa(quali_a) - pv = pvalue(quali_a,'cluster') - test_value = vtest(quali_a,'cluster',0.05) - test_value=test_value.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - w = variable_weight(quali_a) - w=w.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - - #make the quantitative analysis for each quantitative variable - sd = sdquanti(dataframe_quanti,'Number_of_flowers_per_inflorescence', 'cluster') - sd = sd.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - quanti_a = quanti_analysis(sd, dataframe_quanti,'Number_of_flowers_per_inflorescence', 'cluster',0.05,0.05) - quanti_a = quanti_a.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - - #out : - #create the new path for the result - if not os.path.exists('results/semantic/cluster6') : - os.makedirs('results/semantic/cluster6') - path = 'results/semantic/cluster6/' - - #name the files - file_name_x2 = 'x2_semantic_cluster6.xlsx' - file_name_qualitative = 'qualitative_analysis_semantic_cluster6.xlsx' - file_name_weight = 'weight_semantic_cluster6.xlsx' - file_name_anova = 'anova_semantic_cluster6.xlsx' - file_name_quantitative = 'quantitative_analysis_semantic_cluster6.xlsx' - - #create the excel files - write_excel(file_name_x2, sheet, sdqualitative, idx=True) - write_excel(file_name_qualitative, sheet, test_value,idx=True) - write_excel(file_name_weight, sheet, w,idx=True) - write_excel(file_name_anova, sheet, sd,idx=True) - write_excel(file_name_quantitative, sheet, quanti_a, idx=True) - -#make the visualisations -data = pd.ExcelFile(file_name_qualitative) -sheets = data.sheet_names -col = {'overrepresented' : 'red', 'underrepresented' : 'blue', 'Not significant': 'grey'} -for sheet in sheets : - title = 'Proportions of modalities in each clusters with Semantic distance and '+sheet+' method' - df = pd.read_excel(data, sheet) - legend='' - for i in range (len(df)): - if legend == '' : - pass - else : - legend = legend+' ; ' - if df['variables'][i] =='cluster' : - legend= legend+ str(df['cluster'][i])+' : '+str(round(df['global'][i],2))+'%' - sunburst = px.sunburst(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=title, color = 'signification',color_discrete_map=col) - sunburst.add_annotation(x=0,y=1.1,text=legend,font = dict(color='black',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=1,text= 'Overrepresented',font = dict(color='red',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.95,text= 'Underrepresented',font = dict(color='blue',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.9,text= 'Not significant',font = dict(color='grey',size=14),showarrow=False) - sunburst.show() - -#move the files in the good directory -shutil.move(file_name_x2,path+file_name_x2) -shutil.move(file_name_qualitative,path+file_name_qualitative) -shutil.move(file_name_weight,path+file_name_weight) -shutil.move(file_name_anova,path+file_name_anova) -shutil.move(file_name_quantitative,path+file_name_quantitative) diff --git a/divis/semantic7.py b/divis/semantic7.py deleted file mode 100644 index cc93fe6f48c13af2e911de919c63e1b95a96153e..0000000000000000000000000000000000000000 --- a/divis/semantic7.py +++ /dev/null @@ -1,124 +0,0 @@ -# Data analysis and manipulation library -import pandas as pd -import plotly.express as px -import os - -# System library to manipulate the file system -from os import path -from scripts.utils import write_excel -import shutil -from tqdm import tqdm -from scripts.quads import * - -df = pd.ExcelFile('data/semantic_cluster_coordinates7.xlsx') -sheets = df.sheet_names -df1 = pd.read_excel('data/input_data_file.xlsx') -for sheet in tqdm(sheets) : - df2 = pd.read_excel(df, sheet) - #quantitatives variables - quantitative =['Name (original)','Number of flowers per inflorescence'] - #qualitatives variables - qualitative = ['Name (original)', - 'Breeding period', - 'Geographic origin', - 'Horticultural group', - 'Ploidy', - 'Bush height', - 'Shape', - 'Quantity of prickles', - 'Perfume intensity', - 'Repeat flowering', - 'Quantity of bristles by branch', - 'Shine of upper face', - 'Corolla form', - 'Corolla size', - 'Color repartition', - 'Duplicature', - 'Petal color'] - - df_quali = df1[qualitative] - df_quanti = df1[quantitative] - - #take the variable cluster from the second table - #with a merge from df1 to df2 - #merge : df1 = Name(original) ; df2 = Unnamed: 0 - #rename the df2 columns from Unnamed: 0 to Name(original) to make the merge - df2.rename(columns={'Unnamed: 0' : 'Name (original)'}, inplace = True) - columns_df2 = ['Name (original)','cluster'] - df2 = df2[columns_df2] - - #make the dataframe that contain only the qualitatives variables - dataframe_quali = df_quali.merge(df2) - dataframe_quali = dataframe_quali.fillna('missing values') - - #make the dataframe that contain only the quantitatives variables - dataframe_quanti = df_quanti.merge(df2) - dataframe_quanti = dataframe_quanti.rename(columns = {'Number of flowers per inflorescence' : 'Number_of_flowers_per_inflorescence'}) - - #make the qualitative analysis - sdqualitative = sdquali(dataframe_quali, qualitative, 'cluster', 0.05) - sdqualitative=sdqualitative.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - quali_a = quali_analysis(dataframe_quali, qualitative, 'cluster') - cm = clamod(quali_a,'cluster') - mc = modcla(quali_a,'cluster') - g = globa(quali_a) - pv = pvalue(quali_a,'cluster') - test_value = vtest(quali_a,'cluster',0.05) - test_value=test_value.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - w = variable_weight(quali_a) - w=w.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - - #make the quantitative analysis for each quantitative variable - sd = sdquanti(dataframe_quanti,'Number_of_flowers_per_inflorescence', 'cluster') - sd = sd.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - quanti_a = quanti_analysis(sd, dataframe_quanti,'Number_of_flowers_per_inflorescence', 'cluster',0.05,0.05) - quanti_a = quanti_a.rename_axis('file : 20220615_florhige_synthese_english, code : 20220615_quads') - - #out : - #create the new path for the result - if not os.path.exists('results/semantic/cluster7') : - os.makedirs('results/semantic/cluster7') - path = 'results/semantic/cluster7/' - - #name the files - file_name_x2 = 'x2_semantic_cluster7.xlsx' - file_name_qualitative = 'qualitative_analysis_semantic_cluster7.xlsx' - file_name_weight = 'weight_semantic_cluster7.xlsx' - file_name_anova = 'anova_semantic_cluster7.xlsx' - file_name_quantitative = 'quantitative_analysis_semantic_cluster7.xlsx' - - #create the excel files - write_excel(file_name_x2, sheet, sdqualitative, idx=True) - write_excel(file_name_qualitative, sheet, test_value,idx=True) - write_excel(file_name_weight, sheet, w,idx=True) - write_excel(file_name_anova, sheet, sd,idx=True) - write_excel(file_name_quantitative, sheet, quanti_a, idx=True) - -#make the visualisations -data = pd.ExcelFile(file_name_qualitative) -sheets = data.sheet_names -col = {'overrepresented' : 'red', 'underrepresented' : 'blue', 'Not significant': 'grey'} -for sheet in sheets : - title = 'Proportions of modalities in each clusters with Semantic distance and '+sheet+' method' - df = pd.read_excel(data, sheet) - legend='' - for i in range (len(df)): - if legend == '' : - pass - else : - legend = legend+' ; ' - if df['variables'][i] =='cluster' : - legend= legend+ str(df['cluster'][i])+' : '+str(round(df['global'][i],2))+'%' - sunburst = px.sunburst(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=title, color = 'signification',color_discrete_map=col) - sunburst.add_annotation(x=0,y=1.1,text=legend,font = dict(color='black',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=1,text= 'Overrepresented',font = dict(color='red',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.95,text= 'Underrepresented',font = dict(color='blue',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.9,text= 'Not significant',font = dict(color='grey',size=14),showarrow=False) - sunburst.show() - -#move the files in the good directory -shutil.move(file_name_x2,path+file_name_x2) -shutil.move(file_name_qualitative,path+file_name_qualitative) -shutil.move(file_name_weight,path+file_name_weight) -shutil.move(file_name_anova,path+file_name_anova) -shutil.move(file_name_quantitative,path+file_name_quantitative) diff --git a/divis/visu_genetic_groups.py b/divis/visu_genetic_groups.py deleted file mode 100644 index e96eeba0082f5acd94c5c391f154019584ad3e7e..0000000000000000000000000000000000000000 --- a/divis/visu_genetic_groups.py +++ /dev/null @@ -1,51 +0,0 @@ -import plotly.express as px -import pandas as pd - -#ind_ov = df[df['signification'] == 'overrepresented'].index -#ind_und = df[df['signification'] == 'underrepresented'].index -#ind_np = df[df['signification'] == 'Not present'].index -#ind_ns = df[df['signification'] == 'Not significant'].index - -#Overrepresented modalities data -#df_ov = df.copy() -#df_ov.drop(ind_und,inplace=True) -#df_ov.drop(ind_np,inplace=True) -#df_ov.drop(ind_ns,inplace=True) -#sunburst_over = px.sunburst(df_ov, path=['cluster', 'variables', 'modalities'], values='cla/mod') -#sunburst_over.show() - -#Underrepresented modalities data -#df_und = df.copy() -#df_und.drop(ind_ov,inplace=True) -#df_und.drop(ind_np,inplace=True) -#df_und.drop(ind_ns,inplace=True) -#sunburst_under = px.sunburst(df_und, path=['cluster', 'variables', 'modalities'], values='cla/mod') -#sunburst_under.show() - - -file_name_qualitative = 'results/Genetic groups/qualitative_analysis_GG.xlsx' -data = pd.ExcelFile(file_name_qualitative) -col = {'overrepresented' : 'red', 'underrepresented' : 'blue', 'Not significant': 'grey'} -title = 'Proportions of modalities in each genetic group with semantic distance' -df = pd.read_excel(data) -legend='' -for i in range (len(df)): - if legend == '' : - pass - else : - legend = legend+' ; ' - if df['variables'][i] =='Genetic group' : - legend= legend+ str(df['Genetic group'][i])+' : '+str(round(df['global'][i],2))+'%' -sunburst = px.sunburst(df, path=['Genetic group', 'variables', 'modalities'],values='mod/cla',title=title, color = 'signification',color_discrete_map=col) - -sunburst.add_annotation(x=0,y=1.1,text=legend,font = dict(color='black',size=13),showarrow=False) -sunburst.add_annotation(x=0.2,y=1,text= 'Overrepresented',font = dict(color='red',size=14),showarrow=False) -sunburst.add_annotation(x=0.2,y=0.95,text= 'Underrepresented',font = dict(color='blue',size=14),showarrow=False) -sunburst.add_annotation(x=0.2,y=0.9,text= 'Not significant',font = dict(color='grey',size=14),showarrow=False) -sunburst.show() - #treemap = px.treemap(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=legend, color = 'signification') - #treemap.update_traces(root_color="lightgrey") - #treemap.update_layout(margin = dict(t=50, l=25, r=25, b=25)) - #treemap.show() - - diff --git a/divis/visu_gower5.py b/divis/visu_gower5.py deleted file mode 100644 index 7ec0eb09d0f3a79153402f8f75b893a99a0ad92a..0000000000000000000000000000000000000000 --- a/divis/visu_gower5.py +++ /dev/null @@ -1,55 +0,0 @@ -import plotly.express as px -import pandas as pd - - -#ind_ov = df[df['signification'] == 'overrepresented'].index -#ind_und = df[df['signification'] == 'underrepresented'].index -#ind_np = df[df['signification'] == 'Not present'].index -#ind_ns = df[df['signification'] == 'Not significant'].index - -#Overrepresented modalities data -#df_ov = df.copy() -#df_ov.drop(ind_und,inplace=True) -#df_ov.drop(ind_np,inplace=True) -#df_ov.drop(ind_ns,inplace=True) -#sunburst_over = px.sunburst(df_ov, path=['cluster', 'variables', 'modalities'], values='cla/mod') -#sunburst_over.show() - -#Underrepresented modalities data -#df_und = df.copy() -#df_und.drop(ind_ov,inplace=True) -#df_und.drop(ind_np,inplace=True) -#df_und.drop(ind_ns,inplace=True) -#sunburst_under = px.sunburst(df_und, path=['cluster', 'variables', 'modalities'], values='cla/mod') -#sunburst_under.show() -file_name_qualitative = 'results/gower/cluster5/qualitative_analysis_gower_cluster5.xlsx' -data = pd.ExcelFile(file_name_qualitative) -col = {'overrepresented' : 'red', 'underrepresented' : 'blue', 'Not significant': 'grey'} -sheets = data.sheet_names -for sheet in sheets : - title = 'Proportions of modalities in each clusters with Gower distance and '+sheet+' method' - df = pd.read_excel(data, sheet) - legend='' - for i in range (len(df)): - if legend == '' : - pass - else : - legend = legend+' ; ' - if df['variables'][i] =='cluster' : - legend= legend+ str(df['cluster'][i])+' : '+str(round(df['global'][i],2))+'%' - sunburst = px.sunburst(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=title, color = 'signification',color_discrete_map=col) - - sunburst.add_annotation(x=0,y=1.1,text=legend,font = dict(color='black',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=1,text= 'Overrepresented',font = dict(color='red',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.95,text= 'Underrepresented',font = dict(color='blue',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.9,text= 'Not significant',font = dict(color='grey',size=14),showarrow=False) - sunburst.show() - - - - #treemap = px.treemap(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=legend, color = 'signification') - #treemap.update_traces(root_color="lightgrey") - #treemap.update_layout(margin = dict(t=50, l=25, r=25, b=25)) - #treemap.show() - - diff --git a/divis/visu_gower6.py b/divis/visu_gower6.py deleted file mode 100644 index eeaf9d824c370b03b6629698529b0888dfd1db7f..0000000000000000000000000000000000000000 --- a/divis/visu_gower6.py +++ /dev/null @@ -1,54 +0,0 @@ -import plotly.express as px -import pandas as pd - -#ind_ov = df[df['signification'] == 'overrepresented'].index -#ind_und = df[df['signification'] == 'underrepresented'].index -#ind_np = df[df['signification'] == 'Not present'].index -#ind_ns = df[df['signification'] == 'Not significant'].index - -#Overrepresented modalities data -#df_ov = df.copy() -#df_ov.drop(ind_und,inplace=True) -#df_ov.drop(ind_np,inplace=True) -#df_ov.drop(ind_ns,inplace=True) -#sunburst_over = px.sunburst(df_ov, path=['cluster', 'variables', 'modalities'], values='cla/mod') -#sunburst_over.show() - -#Underrepresented modalities data -#df_und = df.copy() -#df_und.drop(ind_ov,inplace=True) -#df_und.drop(ind_np,inplace=True) -#df_und.drop(ind_ns,inplace=True) -#sunburst_under = px.sunburst(df_und, path=['cluster', 'variables', 'modalities'], values='cla/mod') -#sunburst_under.show() - - -file_name_qualitative = 'results/gower/cluster6/qualitative_analysis_gower_cluster6.xlsx' -data = pd.ExcelFile(file_name_qualitative) -col = {'overrepresented' : 'red', 'underrepresented' : 'blue', 'Not significant': 'grey'} -sheets = data.sheet_names -for sheet in sheets : - title = 'Proportions of modalities in each clusters with Gower distance and '+sheet+' method' - df = pd.read_excel(data, sheet) - legend='' - for i in range (len(df)): - if legend == '' : - pass - else : - legend = legend+' ; ' - if df['variables'][i] =='cluster' : - legend= legend+ str(df['cluster'][i])+' : '+str(round(df['global'][i],2))+'%' - sunburst = px.sunburst(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=title, color = 'signification',color_discrete_map=col) - - sunburst.add_annotation(x=0,y=1.1,text=legend,font = dict(color='black',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=1,text= 'Overrepresented',font = dict(color='red',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.95,text= 'Underrepresented',font = dict(color='blue',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.9,text= 'Not significant',font = dict(color='grey',size=14),showarrow=False) - sunburst.show() - - #treemap = px.treemap(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=legend, color = 'signification') - #treemap.update_traces(root_color="lightgrey") - #treemap.update_layout(margin = dict(t=50, l=25, r=25, b=25)) - #treemap.show() - - diff --git a/divis/visu_gower7.py b/divis/visu_gower7.py deleted file mode 100644 index 58241817c5ff4fb38a2b6dbc627a68614a9995c4..0000000000000000000000000000000000000000 --- a/divis/visu_gower7.py +++ /dev/null @@ -1,53 +0,0 @@ -import plotly.express as px -import pandas as pd - -#ind_ov = df[df['signification'] == 'overrepresented'].index -#ind_und = df[df['signification'] == 'underrepresented'].index -#ind_np = df[df['signification'] == 'Not present'].index -#ind_ns = df[df['signification'] == 'Not significant'].index - -#Overrepresented modalities data -#df_ov = df.copy() -#df_ov.drop(ind_und,inplace=True) -#df_ov.drop(ind_np,inplace=True) -#df_ov.drop(ind_ns,inplace=True) -#sunburst_over = px.sunburst(df_ov, path=['cluster', 'variables', 'modalities'], values='cla/mod') -#sunburst_over.show() - -#Underrepresented modalities data -#df_und = df.copy() -#df_und.drop(ind_ov,inplace=True) -#df_und.drop(ind_np,inplace=True) -#df_und.drop(ind_ns,inplace=True) -#sunburst_under = px.sunburst(df_und, path=['cluster', 'variables', 'modalities'], values='cla/mod') -#sunburst_under.show() - - -file_name_qualitative = 'results/gower/cluster7/qualitative_analysis_gower_cluster7.xlsx' -data = pd.ExcelFile(file_name_qualitative) -col = {'overrepresented' : 'red', 'underrepresented' : 'blue', 'Not significant': 'grey'} -sheets = data.sheet_names -for sheet in sheets : - title = 'Proportions of modalities in each clusters with Gower distance and '+sheet+' method' - df = pd.read_excel(data, sheet) - legend='' - for i in range (len(df)): - if legend == '' : - pass - else : - legend = legend+' ; ' - if df['variables'][i] =='cluster' : - legend= legend+ str(df['cluster'][i])+' : '+str(round(df['global'][i],2))+'%' - sunburst = px.sunburst(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=title, color = 'signification',color_discrete_map=col) - - sunburst.add_annotation(x=0,y=1.1,text=legend,font = dict(color='black',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=1,text= 'Overrepresented',font = dict(color='red',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.95,text= 'Underrepresented',font = dict(color='blue',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.9,text= 'Not significant',font = dict(color='grey',size=14),showarrow=False) - sunburst.show() - #treemap = px.treemap(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=legend, color = 'signification') - #treemap.update_traces(root_color="lightgrey") - #treemap.update_layout(margin = dict(t=50, l=25, r=25, b=25)) - #treemap.show() - - diff --git a/divis/visu_semantic5.py b/divis/visu_semantic5.py deleted file mode 100644 index c93a48fb1a46ffcbca31ebbe0b3dd47727360722..0000000000000000000000000000000000000000 --- a/divis/visu_semantic5.py +++ /dev/null @@ -1,55 +0,0 @@ -import plotly.express as px -import pandas as pd - -#ind_ov = df[df['signification'] == 'overrepresented'].index -#ind_und = df[df['signification'] == 'underrepresented'].index -#ind_np = df[df['signification'] == 'Not present'].index -#ind_ns = df[df['signification'] == 'Not significant'].index - -#Overrepresented modalities data -#df_ov = df.copy() -#df_ov.drop(ind_und,inplace=True) -#df_ov.drop(ind_np,inplace=True) -#df_ov.drop(ind_ns,inplace=True) -#sunburst_over = px.sunburst(df_ov, path=['cluster', 'variables', 'modalities'], values='cla/mod') -#sunburst_over.show() - -#Underrepresented modalities data -#df_und = df.copy() -#df_und.drop(ind_ov,inplace=True) -#df_und.drop(ind_np,inplace=True) -#df_und.drop(ind_ns,inplace=True) -#sunburst_under = px.sunburst(df_und, path=['cluster', 'variables', 'modalities'], values='cla/mod') -#sunburst_under.show() - - -file_name_qualitative = 'results/semantic/cluster5/qualitative_analysis_semantic_cluster5.xlsx' -data = pd.ExcelFile(file_name_qualitative) -col = {'overrepresented' : 'red', 'underrepresented' : 'blue', 'Not significant': 'grey'} -sheets = data.sheet_names -for sheet in sheets : - title = 'Proportions of modalities in each clusters with Semantic distance and '+sheet+' method' - df = pd.read_excel(data, sheet) - legend='' - for i in range (len(df)): - if legend == '' : - pass - else : - legend = legend+' ; ' - if df['variables'][i] =='cluster' : - legend= legend+ str(df['cluster'][i])+' : '+str(round(df['global'][i],2))+'%' - sunburst = px.sunburst(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=title, color = 'signification',color_discrete_map=col) - sunburst.add_annotation(x=0,y=1.1,text=legend,font = dict(color='black',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=1,text= 'Overrepresented',font = dict(color='red',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.95,text= 'Underrepresented',font = dict(color='blue',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.9,text= 'Not significant',font = dict(color='grey',size=14),showarrow=False) - sunburst.show() - - - - #treemap = px.treemap(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=legend, color = 'signification') - #treemap.update_traces(root_color="lightgrey") - #treemap.update_layout(margin = dict(t=50, l=25, r=25, b=25)) - #treemap.show() - - diff --git a/divis/visu_semantic6.py b/divis/visu_semantic6.py deleted file mode 100644 index 9446f872df5e03fc6a27327ca71243d7d0b7f97c..0000000000000000000000000000000000000000 --- a/divis/visu_semantic6.py +++ /dev/null @@ -1,52 +0,0 @@ -import plotly.express as px -import pandas as pd - -#ind_ov = df[df['signification'] == 'overrepresented'].index -#ind_und = df[df['signification'] == 'underrepresented'].index -#ind_np = df[df['signification'] == 'Not present'].index -#ind_ns = df[df['signification'] == 'Not significant'].index - -#Overrepresented modalities data -#df_ov = df.copy() -#df_ov.drop(ind_und,inplace=True) -#df_ov.drop(ind_np,inplace=True) -#df_ov.drop(ind_ns,inplace=True) -#sunburst_over = px.sunburst(df_ov, path=['cluster', 'variables', 'modalities'], values='cla/mod') -#sunburst_over.show() - -#Underrepresented modalities data -#df_und = df.copy() -#df_und.drop(ind_ov,inplace=True) -#df_und.drop(ind_np,inplace=True) -#df_und.drop(ind_ns,inplace=True) -#sunburst_under = px.sunburst(df_und, path=['cluster', 'variables', 'modalities'], values='cla/mod') -#sunburst_under.show() - -file_name_qualitative = 'results/semantic/cluster6/qualitative_analysis_semantic_cluster6.xlsx' -data = pd.ExcelFile(file_name_qualitative) -col = {'overrepresented' : 'red', 'underrepresented' : 'blue', 'Not significant': 'grey'} -sheets = data.sheet_names -for sheet in sheets : - title = 'Proportions of modalities in each clusters with Semantic distance and '+sheet+' method' - df = pd.read_excel(data, sheet) - legend='' - for i in range (len(df)): - if legend == '' : - pass - else : - legend = legend+' ; ' - if df['variables'][i] =='cluster' : - legend= legend+ str(df['cluster'][i])+' : '+str(round(df['global'][i],2))+'%' - sunburst = px.sunburst(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=title, color = 'signification',color_discrete_map=col) - sunburst.add_annotation(x=0,y=1.1,text=legend,font = dict(color='black',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=1,text= 'Overrepresented',font = dict(color='red',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.95,text= 'Underrepresented',font = dict(color='blue',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.9,text= 'Not significant',font = dict(color='grey',size=14),showarrow=False) - sunburst.show() - - #treemap = px.treemap(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=legend, color = 'signification') - #treemap.update_traces(root_color="lightgrey") - #treemap.update_layout(margin = dict(t=50, l=25, r=25, b=25)) - #treemap.show() - - diff --git a/divis/visu_semantic7.py b/divis/visu_semantic7.py deleted file mode 100644 index 975228bacdc52ea223eb90a0ee5d5f1a8852e0db..0000000000000000000000000000000000000000 --- a/divis/visu_semantic7.py +++ /dev/null @@ -1,52 +0,0 @@ -import plotly.express as px -import pandas as pd - -#ind_ov = df[df['signification'] == 'overrepresented'].index -#ind_und = df[df['signification'] == 'underrepresented'].index -#ind_np = df[df['signification'] == 'Not present'].index -#ind_ns = df[df['signification'] == 'Not significant'].index - -#Overrepresented modalities data -#df_ov = df.copy() -#df_ov.drop(ind_und,inplace=True) -#df_ov.drop(ind_np,inplace=True) -#df_ov.drop(ind_ns,inplace=True) -#sunburst_over = px.sunburst(df_ov, path=['cluster', 'variables', 'modalities'], values='cla/mod') -#sunburst_over.show() - -#Underrepresented modalities data -#df_und = df.copy() -#df_und.drop(ind_ov,inplace=True) -#df_und.drop(ind_np,inplace=True) -#df_und.drop(ind_ns,inplace=True) -#sunburst_under = px.sunburst(df_und, path=['cluster', 'variables', 'modalities'], values='cla/mod') -#sunburst_under.show() - -file_name_qualitative = 'results/semantic/cluster7/qualitative_analysis_semantic_cluster7.xlsx' -data = pd.ExcelFile(file_name_qualitative) -col = {'overrepresented' : 'red', 'underrepresented' : 'blue', 'Not significant': 'grey'} -sheets = data.sheet_names -for sheet in sheets : - title = 'Proportions of modalities in each clusters with Semantic distance and '+sheet+' method' - df = pd.read_excel(data, sheet) - legend='' - for i in range (len(df)): - if legend == '' : - pass - else : - legend = legend+' ; ' - if df['variables'][i] =='cluster' : - legend= legend+ str(df['cluster'][i])+' : '+str(round(df['global'][i],2))+'%' - sunburst = px.sunburst(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=title, color = 'signification',color_discrete_map=col) - sunburst.add_annotation(x=0,y=1.1,text=legend,font = dict(color='black',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=1,text= 'Overrepresented',font = dict(color='red',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.95,text= 'Underrepresented',font = dict(color='blue',size=14),showarrow=False) - sunburst.add_annotation(x=0.2,y=0.9,text= 'Not significant',font = dict(color='grey',size=14),showarrow=False) - sunburst.show() - - #treemap = px.treemap(df, path=['cluster', 'variables', 'modalities'],values='mod/cla',title=legend, color = 'signification') - #treemap.update_traces(root_color="lightgrey") - #treemap.update_layout(margin = dict(t=50, l=25, r=25, b=25)) - #treemap.show() - -