Skip to main content

Circos plotting the CCA results

I run CCA on a set of variables and got 4 components significant after fdr correction. Each components got top vars extracted by thresholding the cca weight at 0.2 and now I wanted to plot the components and top vars in each fc and sc as seen bellow of the code:

import os
import networkx as nx
import numpy as np
import pandas as pd
from sklearn.cross_decomposition import CCA
from scipy.stats import percentileofscore
from statsmodels.stats.multitest import multipletests
from sklearn.utils import resample
from nxviz import CircosPlot
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler


#Set seed
np.random.seed(123)

# Load data
def load_data(file_name):
    return pd.read_csv(file_name)

# Canonical Correlation Analysis
def calculate_canonical_loadings(X, Y):
    cca = CCA(n_components=69, max_iter=5000)
    cca.fit(X, Y)
    X_c, Y_c = cca.transform(X, Y)
    return np.corrcoef(X_c.T, Y_c.T)[:X_c.shape[1], X_c.shape[1]:], cca

# Bootstrap for CCA coefficients
def bootstrap_cca_coefficients(X, Y, n_iterations=1000, alpha=0.05):
    bootstrap_coefs = []
    cca = CCA(n_components=69, max_iter=5000)
    for _ in range(n_iterations):
        resampled_X, resampled_Y = resample(X, Y)
        cca.fit(resampled_X, resampled_Y)
        bootstrap_coefs.append(cca.x_weights_)
    lower, upper = np.percentile(bootstrap_coefs, [alpha/2*100, (1-alpha/2)*100], axis=0)
    return lower, upper

def permutation_test_cca(X, Y, observed_corr, n_iterations=1000):
    p_values = np.zeros(observed_corr.shape[0])
    permuted_corr = np.zeros((n_iterations, observed_corr.shape[0]))
    cca = CCA(n_components=69, max_iter=5000)
    for i in range(n_iterations):
        shuffled_X = np.random.permutation(X)
        shuffled_Y = np.random.permutation(Y)
        cca.fit(shuffled_X, shuffled_Y)
        X_c, Y_c = cca.transform(shuffled_X, shuffled_Y)
        permuted_corr[i] = np.diag(np.corrcoef(X_c.T, Y_c.T)[:X_c.shape[1], X_c.shape[1]:])
    p_values = 1 - np.array([percentileofscore(permuted_corr[:, i], observed_corr[i]) / 100 for i in range(observed_corr.shape[0])])
    return p_values
 
# Extract top variable names
def get_top_variable_names_for_significant_components(data, weights, significant_indices, threshold=0.2):
    top_variable_names = {}
    column_names = data.columns
    for index in significant_indices:
        weight_vector = weights[:, index]
        top_variable_indices = np.where(np.abs(weight_vector) >= threshold)[0]
        top_variable_names[index] = column_names[top_variable_indices].tolist()
    return top_variable_names

# Function to prepare CCA data for Circos plot
def prepare_cca_data_for_circos(top_fc_variable_names, top_sc_variable_names, observed_corr, significant_fdr):
    edge_list = []
    for comp in significant_fdr:
        for fc_var in top_fc_variable_names[comp]:
            for sc_var in top_sc_variable_names[comp]:
                edge_list.append((fc_var, sc_var, observed_corr[comp, comp]))
    return edge_list

# Function to plot Circos plot
def plot_circos(edge_list):
    G = nx.Graph()
    for fc_var, sc_var, weight in edge_list:
        G.add_edge(fc_var, sc_var, weight=weight)
    
    c = CircosPlot(G, node_color="class", node_order="class", node_labels=True)
    c.draw()
    plt.show()

# Main function
if __name__ == "__main__":
    desktop_path = os.path.expanduser("~/Desktop/struc_func/The_NMF_Run_data")
    os.chdir(desktop_path)
    
    fc_data = load_data("top_vars_nmf_struc.csv")
    sc_data = load_data("top_vars_nmf_func.csv")
    
    scaler = StandardScaler()
    fc_data_standardized = scaler.fit_transform(fc_data)
    sc_data_standardized = scaler.fit_transform(sc_data)
    
    observed_corr, cca = calculate_canonical_loadings(fc_data_standardized, sc_data_standardized)
    p_values = permutation_test_cca(fc_data_standardized, sc_data_standardized, np.diag(observed_corr))
        
    reject, pvals_corrected, _, _ = multipletests(p_values, alpha=0.05, method='fdr_bh')
    significant_fdr = np.where(pvals_corrected < 0.05)[0]
    
    if len(significant_fdr) > 0:
        top_fc_variable_names = get_top_variable_names_for_significant_components(fc_data, cca.x_weights_, significant_fdr, threshold=0.2)
        top_sc_variable_names = get_top_variable_names_for_significant_components(sc_data, cca.y_weights_, significant_fdr, threshold=0.2)

        edge_list = prepare_cca_data_for_circos(top_fc_variable_names, top_sc_variable_names, observed_corr, significant_fdr)
        
        plot_circos(edge_list)
    else:
        print("No significant components found after FDR correction.")
  
Plotting (circos)
 def generate_circos_plot_matplotlib(nodes, links, top_fc_variable_names=None, top_sc_variable_names=None):
    fig, ax = plt.subplots(figsize=(10, 10), subplot_kw={'projection': 'polar'})
    N = len(nodes)
    
    # Sort the nodes by their 'color' property for better visualization
    sorted_nodes = sorted(nodes, key=lambda x: x['color'])
    
    # Generate theta values
    theta = np.linspace(0.0, 2 * np.pi, N, endpoint=False)
    
    # Create the node points on the circle
    for i, node in enumerate(sorted_nodes):
        ax.scatter(theta[i], 1, c=node['color'], s=300)
        ax.text(theta[i], 1.1, node['label'], ha='center')
        
    # Add logic to highlight top variables (if provided)
    if top_fc_variable_names and top_sc_variable_names:
        # Your logic to highlight top variables goes here.
        pass
        
    # Create links
    for link in links:
        source_idx = next((i for i, node in enumerate(sorted_nodes) if node['id'] == link['source']), None)
        target_idx = next((i for i, node in enumerate(sorted_nodes) if node['id'] == link['target']), None)
        
        if source_idx is None or target_idx is None:
            continue
        
        # Convert the weights to color
        color_intensity = np.clip(link['value'], 0, 1)
        color = (1 - color_intensity, 0.2, color_intensity)
        
        ax.plot([theta[source_idx], theta[target_idx]], [1, 1], c=color, lw=2*link['value'])
        
    plt.show()

But I was not successful in achieving my goal, I know the I ran the CCA correctly. I appreciate any help in plotting this.



source https://stackoverflow.com/questions/77137376/circos-plotting-the-cca-results

Comments

Popular posts from this blog

ValueError: X has 10 features, but LinearRegression is expecting 1 features as input

So, I am trying to predict the model but its throwing error like it has 10 features but it expacts only 1. So I am confused can anyone help me with it? more importantly its not working for me when my friend runs it. It works perfectly fine dose anyone know the reason about it? cv = KFold(n_splits = 10) all_loss = [] for i in range(9): # 1st for loop over polynomial orders poly_order = i X_train = make_polynomial(x, poly_order) loss_at_order = [] # initiate a set to collect loss for CV for train_index, test_index in cv.split(X_train): print('TRAIN:', train_index, 'TEST:', test_index) X_train_cv, X_test_cv = X_train[train_index], X_test[test_index] t_train_cv, t_test_cv = t[train_index], t[test_index] reg.fit(X_train_cv, t_train_cv) loss_at_order.append(np.mean((t_test_cv - reg.predict(X_test_cv))**2)) # collect loss at fold all_loss.append(np.mean(loss_at_order)) # collect loss at order plt.plot(np.log(al...

Sorting large arrays of big numeric stings

I was solving bigSorting() problem from hackerrank: Consider an array of numeric strings where each string is a positive number with anywhere from to digits. Sort the array's elements in non-decreasing, or ascending order of their integer values and return the sorted array. I know it works as follows: def bigSorting(unsorted): return sorted(unsorted, key=int) But I didnt guess this approach earlier. Initially I tried below: def bigSorting(unsorted): int_unsorted = [int(i) for i in unsorted] int_sorted = sorted(int_unsorted) return [str(i) for i in int_sorted] However, for some of the test cases, it was showing time limit exceeded. Why is it so? PS: I dont know exactly what those test cases were as hacker rank does not reveal all test cases. source https://stackoverflow.com/questions/73007397/sorting-large-arrays-of-big-numeric-stings

How to load Javascript with imported modules?

I am trying to import modules from tensorflowjs, and below is my code. test.html <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Document</title </head> <body> <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@2.0.0/dist/tf.min.js"></script> <script type="module" src="./test.js"></script> </body> </html> test.js import * as tf from "./node_modules/@tensorflow/tfjs"; import {loadGraphModel} from "./node_modules/@tensorflow/tfjs-converter"; const MODEL_URL = './model.json'; const model = await loadGraphModel(MODEL_URL); const cat = document.getElementById('cat'); model.execute(tf.browser.fromPixels(cat)); Besides, I run the server using python -m http.server in my command prompt(Windows 10), and this is the error prompt in the console log of my browser: Failed to loa...