import numpy as np
import pandas as pd
import json
import os,sys
# from numba import jit
[docs]
def init_P_matrix(*shape):
"""
Generates a normalized probability matrix
Parameters:
-----------
shape: int or tuple of ints
Dimension of the probability matrix
Returns:
-------
A normalized probability matrix that is normalized along the last axis
"""
P = np.random.rand(*shape)
S = P.sum(axis = len(shape)-1)
return P/S.reshape((*shape[:-1],-1))
# Function to convert DataFrame to RST table
[docs]
def df_to_rst_table(df, title=None):
"""
Converts a pandas DataFrame to a RST table format.
Parameters:
-----------
df: pandas.DataFrame
The DataFrame to convert to a table.
title: str, default: None
The title of the table.
Returns:
-------
A string with the RST table format.
"""
# Get column names and calculate column widths
columns = df.columns
col_widths = [max(len(str(col)), df[col].astype(str).str.len().max()) for col in columns]
# Create the table header
header = '+' + '+'.join('-' * (width + 2) for width in col_widths) + '+'
separator = '+' + '+'.join('=' * (width + 2) for width in col_widths) + '+'
col_names = '|' + '|'.join(f' {col:{width}} ' for col, width in zip(columns, col_widths)) + '|'
# Create the table rows
rows = []
for _, row in df.iterrows():
row_str = '|' + '|'.join(f' {str(val):{width}} ' for val, width in zip(row, col_widths)) + '|'
row_separator = '+' + '+'.join('-' * (width + 2) for width in col_widths) + '+'
rows.append(row_str + "\n" + row_separator)
# Combine all parts
table = [header, col_names, separator] + rows
# Add title if provided
if title:
return f"\n{title}\n\n" + '\n'.join(table)
return '\n'.join(table)
[docs]
def add_codes(layer,col_name):
"""
Adds to the df property a column with an id of the value of the col_name column
Parameters
-----------
layer: node_layer, BiNet
Objects with a df attribute
col_name: str
Name of the column in the df attribute that an integer id will be assigned to every different value of the column.
This id is an integer that goes from 0 to N-1, where N is the number of different values in the col_name column.
Return
-----------
dict_codes: dict
Dictionary where each key is a different value of the col_name column and the value is the id integer that was assigned.
"""
dict_codes = {}
c = pd.Categorical(layer.df[col_name])#.codes
c2 = pd.Categorical(layer.df[col_name]).codes
for i,att in enumerate(c):
# print(i)
dict_codes[layer.df[col_name].iloc[i]] = c2[i]
# layer.df = layer.df.join(pd.DataFrame(c2, columns=[col_name + "_id"]))
layer.df[col_name + "_id"] = c2
return dict_codes
[docs]
def finished(A,A_old,tol):
"""
Returns True if A and A_old are simmilar by looking if the mean absolute difference between A and A_old is lower than a
tolerance tol
"""
finished = False
if(np.mean(np.abs(A-A_old))<tol):
finished = True
return finished
[docs]
def save_MMSBM_parameters(BiNet,dir=".",matrix_format="npy",BiNet_json=False):
"""
It saves the parameters into matrices in the dir directory
Parameters:
-----------
BiNet: BiNet object
Bipartite network with the MMSBM initialized
dir: str, default: "."
Directory where the files with the MMSBM parameters will be saved
matrix_format: str, default: npy
Format that the matrices parameters will be saved. It can be, npy or npz.
BiNet_json: boolean, default: False
If it is True, the information of the BiNet class will be saved into a json.
"""
na = BiNet.nodes_a
nb = BiNet.nodes_b
if matrix_format == "npy":
save_func = np.save
if matrix_format == "npz":
save_func = np.savez
# else:
# save_func = np.savetxt
save_func(dir+"/pkl."+matrix_format,BiNet.pkl)
# save_func(dir+"/omega."+matrix_format,BiNet.omega)
##Metas saves
for layer,str_layer in [(na,"a"),(nb,"b")]:
save_func(dir+"/theta_{}.".format(str_layer)+matrix_format,layer.theta)
##inclusive_meta saves
for i, meta in enumerate(layer.meta_inclusives.values()):
save_func(dir+"/zeta_{}.".format(str(meta))+matrix_format,meta.zeta)
save_func(dir+"/q_k_tau_{}.".format(str(meta))+matrix_format,meta.q_k_tau)
# save_func(dir+"/omega_{}_in_{}.".format(str_layer,str(meta))+matrix_format,meta.omega)
##exclusive_meta saves
for i, meta in enumerate(layer.meta_exclusives.values()):
save_func(dir+"/qka_{}.".format(str(meta))+matrix_format,meta.qka)
# save_func(dir+"/omega_{}_ex_{}.".format(str_layer,str(meta))+matrix_format,meta.omega)
#BiNet json
if BiNet_json:
save_BiNet_dict(BiNet,dir=dir)
[docs]
def save_nodes_layer_dict(layer,dir="."):
"""
It saves the some information, including the dict_codes from each layer, into a json called layer_data.json
Parameters:
-----------
layer: node_layer
Bipartite network object
dir: str
Directory where the files with the MMSBM parameters will be saved
"""
dict_info = {}
dict_info["dict_codes"] ={str(k):str(v) for k,v in layer.dict_codes.items()}
dict_info["nodes_name"] = str(layer)
dict_info["N_nodes"] = len(layer)
dict_info["N_metas"] = layer.N_meta
dict_info["K"] = layer.K
dict_info["N_metas_exclusives"] = str(layer.N_meta_exclusive)
dict_info["N_metas_inclusives"] = str(layer.N_meta_inclusive)
dict_info["metadata_exclusives"] = []
for i, meta in enumerate(layer.meta_exclusives.values()):
dict_info["metadata_exclusives"].append({
"meta_name":str(meta),
"lambda_val":meta.lambda_val,
"N_atts":len(meta),
# "Attributes":[str(i) for i in np.unique(layer.df[str(meta)])],
"dict_codes":{str(k):str(v) for k,v in meta.dict_codes.items()}
})
dict_info["metadata_inclusives"] = []
for i, meta in enumerate(layer.meta_inclusives.values()):
dict_info["metadata_inclusives"].append({
"meta_name":str(meta),
"lambda_val":meta.lambda_val,
"Tau":meta.Tau,
"N_atts":len(meta),
"separator":meta._separator,
# "Attributes":[str(i) for i in np.unique(layer.df[str(meta)])],
"dict_codes":{str(k):str(v) for k,v in meta.dict_codes.items()}
})
with open(dir+f'/layer_{str(layer)}_data.json', 'w') as outfile:
json.dump(dict_info, outfile)
[docs]
def save_BiNet_dict(BiNet,dir="."):
"""
It saves the some information, including the dict_codes from each layer, into a json called BiNet_data.json
Parameters:
-----------
BiNet: BiNet object
Bipartite network object
dir: str
Directory where the files with the MMSBM parameters will be saved
"""
na = BiNet.nodes_a
nb = BiNet.nodes_b
#other values from EM and MMSBM saves
dict_info = {}
dict_info["dict_codes"] ={str(k):str(v) for k,v in BiNet.dict_codes.items()}
dict_info["links_label"] = BiNet.labels_name
dict_info["nodes_a_name"] = str(na)
dict_info["nodes_b_name"] = str(nb)
dict_info["Ka"] = na.K
dict_info["dict_codes_a"] = {str(k): str(v) for k, v in na.dict_codes.items()}
dict_info["Kb"] = nb.K
dict_info["separator"] = BiNet._separator
dict_info["dict_codes_b"] = {str(k): str(v) for k, v in nb.dict_codes.items()}
# for layer,str_layer in [(na,"a"),(nb,"b")]:
# layer_label = "layer "+str_layer
# dict_info[layer_label] = {"name":str(layer),
# "N_nodes":len(layer),
# "N_metas":layer.N_meta,
# "K":layer.K,
# "N_metas_exclusives":str(layer.N_meta_exclusive),
# "N_metas_inclusives":str(layer.N_meta_inclusive),
# "dict_codes":{str(k):str(v) for k,v in layer.dict_codes.items()}
# }
# ##exclusive_meta saves
# dict_info[layer_label]["metadata_exclusives"] = []
# for i, meta in enumerate(layer.meta_exclusives.values()):
# dict_info[layer_label]["metadata_exclusives"].append({
# "Meta_name":str(meta),
# "lambda":meta.lambda_val,
# "N_atts":len(meta),
# # "Attributes":[str(i) for i in np.unique(layer.df[str(meta)])],
# "dict_codes":{str(k):str(v) for k,v in meta.dict_codes.items()}
# })
# ##inclusive_meta saves
# dict_info[layer_label]["metadata_inclusives"] = []
# for i, meta in enumerate(layer.meta_inclusives.values()):
# dict_info[layer_label]["metadata_inclusives"].append({
# "Meta_name":str(meta),
# "lambda":meta.lambda_val,
# "Tau":meta.Tau,
# "N_atts":len(meta),
# # "Attributes":[str(i) for i in np.unique(layer.df[str(meta)])],
# "dict_codes":{str(k):str(v) for k,v in meta.dict_codes.items()}
# })
BiNet.info = dict_info
with open(dir+'/BiNet_data.json', 'w') as outfile:
json.dump(dict_info, outfile)
[docs]
def load_EM_parameters(BiNet,directory="."):
"""
It loads the parameters from matrices in the directory
Parameters:
-----------
BiNet: BiNet object
Bipartite network with the MMSBM initialized
directory: str, default: "."
Directory where the files with the MMSBM parameters will be loaded
"""
na = BiNet.nodes_a
nb = BiNet.nodes_b
if directory[-1] != "/":
directory += "/"
#format
for f in ["npy","npz","txt","dat"]:
if os.path.isfile(directory+"pkl."+f) or os.path.isfile(directory+"pkl."+f):
matrix_format = f
break
BiNet.pkl = np.load(directory+"pkl." + matrix_format)
# BiNet.omega = np.load(directory+"omega." + matrix_format)
##Metas saves
for layer,str_layer in [(na,"a"),(nb,"b")]:
layer.theta = np.load(directory+"theta_{}.".format(str_layer)+matrix_format)
##inclusive_meta saves
for i, meta in enumerate(layer.meta_inclusives.values()):
meta.zeta = np.load(directory+"zeta_{}.".format(str(meta))+matrix_format)
meta.q_k_tau = np.load(directory+"q_k_tau_{}.".format(str(meta))+matrix_format)
# meta.omega = np.load(directory+"omega_{}_in_{}.".format(str_layer,str(meta))+matrix_format)
##exclusive_meta saves
for i, meta in enumerate(layer.meta_exclusives.values()):
meta.qka = np.load(directory+"qka_{}.".format(str(meta))+matrix_format)
# meta.omega = np.load(directory+"omega_{}_ex_{}.".format(str_layer,str(meta))+matrix_format)