Skip to main content

Circos plotting the CCA results

I run CCA on a set of variables and got 4 components significant after fdr correction. Each components got top vars extracted by thresholding the cca weight at 0.2 and now I wanted to plot the components and top vars in each fc and sc as seen bellow of the code:

import os
import networkx as nx
import numpy as np
import pandas as pd
from sklearn.cross_decomposition import CCA
from scipy.stats import percentileofscore
from statsmodels.stats.multitest import multipletests
from sklearn.utils import resample
from nxviz import CircosPlot
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler


#Set seed
np.random.seed(123)

# Load data
def load_data(file_name):
    return pd.read_csv(file_name)

# Canonical Correlation Analysis
def calculate_canonical_loadings(X, Y):
    cca = CCA(n_components=69, max_iter=5000)
    cca.fit(X, Y)
    X_c, Y_c = cca.transform(X, Y)
    return np.corrcoef(X_c.T, Y_c.T)[:X_c.shape[1], X_c.shape[1]:], cca

# Bootstrap for CCA coefficients
def bootstrap_cca_coefficients(X, Y, n_iterations=1000, alpha=0.05):
    bootstrap_coefs = []
    cca = CCA(n_components=69, max_iter=5000)
    for _ in range(n_iterations):
        resampled_X, resampled_Y = resample(X, Y)
        cca.fit(resampled_X, resampled_Y)
        bootstrap_coefs.append(cca.x_weights_)
    lower, upper = np.percentile(bootstrap_coefs, [alpha/2*100, (1-alpha/2)*100], axis=0)
    return lower, upper

def permutation_test_cca(X, Y, observed_corr, n_iterations=1000):
    p_values = np.zeros(observed_corr.shape[0])
    permuted_corr = np.zeros((n_iterations, observed_corr.shape[0]))
    cca = CCA(n_components=69, max_iter=5000)
    for i in range(n_iterations):
        shuffled_X = np.random.permutation(X)
        shuffled_Y = np.random.permutation(Y)
        cca.fit(shuffled_X, shuffled_Y)
        X_c, Y_c = cca.transform(shuffled_X, shuffled_Y)
        permuted_corr[i] = np.diag(np.corrcoef(X_c.T, Y_c.T)[:X_c.shape[1], X_c.shape[1]:])
    p_values = 1 - np.array([percentileofscore(permuted_corr[:, i], observed_corr[i]) / 100 for i in range(observed_corr.shape[0])])
    return p_values
 
# Extract top variable names
def get_top_variable_names_for_significant_components(data, weights, significant_indices, threshold=0.2):
    top_variable_names = {}
    column_names = data.columns
    for index in significant_indices:
        weight_vector = weights[:, index]
        top_variable_indices = np.where(np.abs(weight_vector) >= threshold)[0]
        top_variable_names[index] = column_names[top_variable_indices].tolist()
    return top_variable_names

# Function to prepare CCA data for Circos plot
def prepare_cca_data_for_circos(top_fc_variable_names, top_sc_variable_names, observed_corr, significant_fdr):
    edge_list = []
    for comp in significant_fdr:
        for fc_var in top_fc_variable_names[comp]:
            for sc_var in top_sc_variable_names[comp]:
                edge_list.append((fc_var, sc_var, observed_corr[comp, comp]))
    return edge_list

# Function to plot Circos plot
def plot_circos(edge_list):
    G = nx.Graph()
    for fc_var, sc_var, weight in edge_list:
        G.add_edge(fc_var, sc_var, weight=weight)
    
    c = CircosPlot(G, node_color="class", node_order="class", node_labels=True)
    c.draw()
    plt.show()

# Main function
if __name__ == "__main__":
    desktop_path = os.path.expanduser("~/Desktop/struc_func/The_NMF_Run_data")
    os.chdir(desktop_path)
    
    fc_data = load_data("top_vars_nmf_struc.csv")
    sc_data = load_data("top_vars_nmf_func.csv")
    
    scaler = StandardScaler()
    fc_data_standardized = scaler.fit_transform(fc_data)
    sc_data_standardized = scaler.fit_transform(sc_data)
    
    observed_corr, cca = calculate_canonical_loadings(fc_data_standardized, sc_data_standardized)
    p_values = permutation_test_cca(fc_data_standardized, sc_data_standardized, np.diag(observed_corr))
        
    reject, pvals_corrected, _, _ = multipletests(p_values, alpha=0.05, method='fdr_bh')
    significant_fdr = np.where(pvals_corrected < 0.05)[0]
    
    if len(significant_fdr) > 0:
        top_fc_variable_names = get_top_variable_names_for_significant_components(fc_data, cca.x_weights_, significant_fdr, threshold=0.2)
        top_sc_variable_names = get_top_variable_names_for_significant_components(sc_data, cca.y_weights_, significant_fdr, threshold=0.2)

        edge_list = prepare_cca_data_for_circos(top_fc_variable_names, top_sc_variable_names, observed_corr, significant_fdr)
        
        plot_circos(edge_list)
    else:
        print("No significant components found after FDR correction.")
  
Plotting (circos)
 def generate_circos_plot_matplotlib(nodes, links, top_fc_variable_names=None, top_sc_variable_names=None):
    fig, ax = plt.subplots(figsize=(10, 10), subplot_kw={'projection': 'polar'})
    N = len(nodes)
    
    # Sort the nodes by their 'color' property for better visualization
    sorted_nodes = sorted(nodes, key=lambda x: x['color'])
    
    # Generate theta values
    theta = np.linspace(0.0, 2 * np.pi, N, endpoint=False)
    
    # Create the node points on the circle
    for i, node in enumerate(sorted_nodes):
        ax.scatter(theta[i], 1, c=node['color'], s=300)
        ax.text(theta[i], 1.1, node['label'], ha='center')
        
    # Add logic to highlight top variables (if provided)
    if top_fc_variable_names and top_sc_variable_names:
        # Your logic to highlight top variables goes here.
        pass
        
    # Create links
    for link in links:
        source_idx = next((i for i, node in enumerate(sorted_nodes) if node['id'] == link['source']), None)
        target_idx = next((i for i, node in enumerate(sorted_nodes) if node['id'] == link['target']), None)
        
        if source_idx is None or target_idx is None:
            continue
        
        # Convert the weights to color
        color_intensity = np.clip(link['value'], 0, 1)
        color = (1 - color_intensity, 0.2, color_intensity)
        
        ax.plot([theta[source_idx], theta[target_idx]], [1, 1], c=color, lw=2*link['value'])
        
    plt.show()

But I was not successful in achieving my goal, I know the I ran the CCA correctly. I appreciate any help in plotting this.



source https://stackoverflow.com/questions/77137376/circos-plotting-the-cca-results

Comments

Popular posts from this blog

How to show number of registered users in Laravel based on usertype?

i'm trying to display data from the database in the admin dashboard i used this: <?php use Illuminate\Support\Facades\DB; $users = DB::table('users')->count(); echo $users; ?> and i have successfully get the correct data from the database but what if i want to display a specific data for example in this user table there is "usertype" that specify if the user is normal user or admin i want to user the same code above but to display a specific usertype i tried this: <?php use Illuminate\Support\Facades\DB; $users = DB::table('users')->count()->WHERE usertype =admin; echo $users; ?> but it didn't work, what am i doing wrong? source https://stackoverflow.com/questions/68199726/how-to-show-number-of-registered-users-in-laravel-based-on-usertype

Why is my reports service not connecting?

I am trying to pull some data from a Postgres database using Node.js and node-postures but I can't figure out why my service isn't connecting. my routes/index.js file: const express = require('express'); const router = express.Router(); const ordersCountController = require('../controllers/ordersCountController'); const ordersController = require('../controllers/ordersController'); const weeklyReportsController = require('../controllers/weeklyReportsController'); router.get('/orders_count', ordersCountController); router.get('/orders', ordersController); router.get('/weekly_reports', weeklyReportsController); module.exports = router; My controllers/weeklyReportsController.js file: const weeklyReportsService = require('../services/weeklyReportsService'); const weeklyReportsController = async (req, res) => { try { const data = await weeklyReportsService; res.json({data}) console...

How to split a rinex file if I need 24 hours data

Trying to divide rinex file using the command gfzrnx but getting this error. While doing that getting this error msg 'gfzrnx' is not recognized as an internal or external command Trying to split rinex file using the command gfzrnx. also install'gfzrnx'. my doubt is I need to run this program in 'gfzrnx' or in 'cmdprompt'. I am expecting a rinex file with 24 hrs or 1 day data.I Have 48 hrs data in RINEX format. Please help me to solve this issue. source https://stackoverflow.com/questions/75385367/how-to-split-a-rinex-file-if-i-need-24-hours-data