parse_motif_search.py

#!/usr/bin/env python
from collections import defaultdict
import os
from shutil import copyfile
import pandas as pd
import numpy as np
from Bio import SeqIO
import os.path


SOFTWARES = {'meme','dreme','centrimo','meme_tomtom'}
##TYPES_SEARCHES = {'All','Narrow','Const','Max'}


def parse_all_logs(path_analysis, exp_design_name):
    with open(path_analysis + 'Motif_search.log', 'w') as general_log:
        header = 'Data_name\t' + '\t'.join(SOFTWARES)
        general_log.write(header+'\n')
        for data_name in os.listdir(path_analysis + 'results' + os.sep):
            if not data_name.startswith('.') and not data_name.endswith('.log') and not data_name.endswith('.sh'):
                progress_filename = path_analysis + 'results' + os.sep + data_name + os.sep + 'progress_log.txt'
                line = data_name + '\t'
                for software in SOFTWARES:
                    found = False
                    with open(progress_filename,'r') as file:
                        for row in file:
                            status_soft = 'name: '+software+'  status: 0'
                            if status_soft in row:
                                found = True
                    if found:
                        line += '1\t'
                    else:
                        line += '0\t'
                general_log.write(line.strip() + '\n')


def parse_dreme(path_analysis, path_motif, exp_design_name):
    with open(path_analysis + 'dreme.sh', 'w') as dreme_file:
        for data_name in os.listdir(path_analysis + 'results' + os.sep):
            if exp_design_name in data_name:
                print(data_name)
                if not data_name.startswith('.') and not data_name.endswith('.log') and not data_name.endswith('.sh'):
                    # print(data_name)
                    dreme_xml = path_analysis + 'results' + os.sep + data_name + os.sep + 'dreme_out' + \
                                os.sep + 'dreme.xml'
                    if os.path.exists(dreme_xml):
                        dreme_list = list()
                        with open(dreme_xml, 'r') as file:
                            for row in file:
                                dreme_list.append(row.strip())
                                if '<command_line>' in row:
                                    dreme_file.write(row)

                        dreme_txt = path_analysis + 'results' + os.sep + data_name + os.sep + 'dreme_out' \
                                    + os.sep + 'dreme.txt'
                        dreme_txt_list = list()
                        with open(dreme_txt, 'r') as file:
                            for row in file:
                                dreme_txt_list.append(row.strip())
                        dreme_txt_intro = ''
                        write_intro = False
                        for row in dreme_txt_list:
                            if row.startswith('MOTIF '):
                                break
                            if 'MEME version' in row:
                                write_intro = True
                            if write_intro:
                                dreme_txt_intro += row + '\n'

                        #print(dreme_txt_intro)

                        # parse dreme
                        with open(path_motif + os.sep + 'logs' + os.sep + 'Dreme_' + data_name + '.log', 'w') as general_log:
                            header = 'ID\tMotif\tNb_sites\tPos_Occurence\tNeg_Occurence\tNb_Seq\t' \
                                     'Percent\tPos_Percent\tNeg_Percent\tEvalue'
                            general_log.write(header + '\n')
                            # search length fasta
                            for line in dreme_list:
                                if '<positives name=' in line:
                                    number_seq = float(line.split(' ')[2].replace('\"','').replace('count=',''))
                            # search motifs
                            for i in range(1, len(dreme_list)):
                                line = dreme_list[i]
                                if '<motifs>' in line:
                                    index_motifs = i
                                    break

                            # get motifs info
                            for k in range(index_motifs+1, len(dreme_list)):
                                if '<motif' in dreme_list[k]:
                                    new_line = dreme_list[k].replace('\"','').split(' ')
                                    #print(new_line)
                                    id = new_line[1].replace('id=', '')
                                    #print(id,data_name)
                                    #print(dreme_txt_intro)
                                    seq = new_line[2].replace('seq=', '')
                                    occ = float(new_line[4].replace('nsites=', ''))
                                    pos_occ = float(new_line[5].replace('p=', ''))
                                    neg_occ = float(new_line[6].replace('n=', ''))
                                    evalue = new_line[8].replace('evalue=', '')

                                    # save table
                                    line_to_write = [id, seq, str(occ), str(pos_occ),str(neg_occ), str(number_seq),
                                                     str(occ/number_seq), str(pos_occ/number_seq), str(neg_occ/number_seq),
                                                     str(evalue)]
                                    general_log.write('\t'.join(line_to_write) + '\n')

                                    # copy png file
                                    for imagefile in os.listdir(path_analysis + 'results/' + data_name + '/dreme_out/'):
                                        if id in imagefile:
                                            if 'nc_' in imagefile:
                                                src = path_analysis + 'results'+ os.sep + data_name + os.sep + \
                                                      'dreme_out' + os.sep + imagefile
                                                dst = path_motif + os.sep + 'motif_figure' + os.sep + seq + '_nc.png'
                                                copyfile(src, dst)
                                            else:
                                                src = path_analysis + 'results' + os.sep + data_name + \
                                                      os.sep + 'dreme_out' + os.sep + imagefile
                                                dst = path_motif + os.sep + 'motif_figure' + os.sep + seq + '_rc.png'
                                                copyfile(src, dst)

                                    # save motif
                                    with open(path_motif + os.sep + 'motif' + os.sep + seq + '.meme', 'w') as meme_file:
                                        meme_file.write(dreme_txt_intro+'\n')
                                        # find motif info
                                        dreme_txt_motif = ''
                                        write_motif = False
                                        for row in dreme_txt_list:
                                            if write_motif and row.startswith('MOTIF '):
                                                break
                                            if ('MOTIF '+seq+' DREME') in row:
                                                write_motif = True
                                            if write_motif:
                                                dreme_txt_motif += row + '\n'

                                        #print(dreme_txt_motif)
                                        meme_file.write(dreme_txt_motif + '\n')


def regroup_motifs(path_motif, exp_design_name):
    motif_set = set()
    dict_dataset = defaultdict(list)
    dict_evalue = defaultdict(list)
    for data_name in os.listdir(path_motif + 'logs' + os.sep):
        if data_name.startswith('Dreme_') and data_name.endswith('.log') and "-" in data_name :# exp_design_name in data_name:
            type_data = data_name.replace('Dreme_','').replace('.log','')
            print(type_data, data_name)
            with open(path_motif + 'logs/' + data_name,'r') as log_file:
                log_file.readline()
                for line in log_file:
                    motif = line.split('\t')[1]
                    evalue = line.strip().split('\t')[-1]
                    motif_set.add(motif)
                    print(evalue,motif)
                    dict_dataset[motif].append(type_data)
                    dict_evalue[motif].append(evalue)

    with open(path_motif + 'Motif_'+exp_design_name+'_Diff_raw.txt','w') as motif_log:
        motif_log.write("Motif\tData\tP-value\n")
        for motif in motif_set:
            motif_log.write(motif+'\t'+';'.join(dict_dataset[motif])+'\t'+';'.join(dict_evalue[motif])+'\n')


def select_centrally_enriched(path_analysis, path_motif, exp_design_name, evalue_cutoff):
    centrimo_motif = dict()
    for data_name in os.listdir(path_motif + 'logs/'):
        if data_name.startswith('Dreme_') and data_name.endswith('.log') and "-" in data_name :# exp_design_name in data_name:
            type_data = data_name.replace('Dreme_','').replace('.log','')
            centrimo_path = path_analysis + '/results/' + type_data + '/centrimo_out/centrimo.txt'
            if os.path.exists(centrimo_path):
                with open(centrimo_path, 'r') as centrimo_file:
                    for line in centrimo_file:
                        if 'DREME' in line:
                            motif = line.split('\t')[1].split(' ')[0]
                            print(motif, type_data)
                            centrimo_motif[motif] = type_data

    print(len(centrimo_motif))

    with open(path_motif + 'Motif_' + exp_design_name + '_Diff_raw.txt', 'r') as motif_log, \
            open(path_motif + 'Motif_' + exp_design_name + '_Diff.txt', 'w') as motif_filter_log:
        motif_filter_log.write("Motif\tData\tP-value\tCentrally Enriched\n")
        motif_log.readline()
        for line in motif_log:
            print(line.split('\t')[0])
            if line.split('\t')[0] in centrimo_motif:
                evalue_list = line.split('\t')[2].split(';')
                min_evalue = 1
                for evalue in evalue_list:
                    if float(evalue)<min_evalue:
                        min_evalue = float(evalue)
                if min_evalue < evalue_cutoff:
                    motif_filter_log.write(line.strip()+'\tYes\n')


def create_motif(path_motif, motif):
    motif_file = path_motif + '/motif/' + motif + '.meme'
    motif_figure_file = path_motif + '/motif_figure/' + motif + '_nc.eps'
#    motif_figure_file = path_motif + '/motif_figure/' + motif + '_nc.png'
    # Meme-suite as to be installed for using = iupac2meme
    iupac2_command = 'iupac2meme -dna ' + motif + ' > ' + motif_file
    print(iupac2_command)
    os.system(iupac2_command)
    ceqlogo_command = 'ceqlogo -i ' + motif_file + ' -m ' + motif + ' -o ' + motif_figure_file + ' -f eps'
#    ceqlogo_command = 'ceqlogo -i ' + motif_file + ' -m ' + motif + ' -o '+motif_figure_file+' -f png'
    print('Run ' + ceqlogo_command)
    os.system(ceqlogo_command)


def regroup_figures(path_motif, motif_list):
    '''
    Create SVG file with all motifs included
    :param path_motif:
    :param motif_list:
    :return:
    '''
    print('Create PNG file with all motifs')
    df_summary = pd.read_csv(path_motif + motif_list + '.txt', sep='\t')
    #df_summary = df_summary.sort_values(by=['Motif'], ascending=[True])
    svg_text = '<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<!-- Generator: Adobe Illustrator 15.0.0, SVG Export ' \
               'Plug-In . SVG Version: 6.00 Build 0)  -->\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\" \"' \
               'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<svg version=\"1.1\" id=\"Calque_1\" ' \
               'xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" x=\"0px\" y=\"0px\"' \
               ' width=\"1300px\" height=\"'+str(len(df_summary.index)*100)+'px\" viewBox=\"0 0 1300 '\
               +str(len(df_summary.index)*100)+'\" xml:space=\"preserve\">\n'
    i=0
    for index, row in df_summary.iterrows():
        motif = row['Motif']
        data = row['Data']
        pvalue = row['P-value']
        #comment = row['Comment']
        image_file = 'motif_figure/' + motif + '_nc.png'
        new_row = '<image overflow=\"visible\" width=\"299\" height=\"176\" xlink:href=\"'+image_file+'\"  ' \
                    'transform=\"matrix(0.5184 0 0 0.5184 10.00 '+ str(float(100*i))+')\">\n</image>\n'
        image_file = 'motif_figure/' + motif + '_rc.png'
        new_row += '<image overflow=\"visible\" width=\"299\" height=\"176\" xlink:href=\"' + image_file + '\"  ' \
                        'transform=\"matrix(0.5184 0 0 0.5184 170.00 '+ str(float(100*i))+')\">\n</image>\n'
        new_row += '<text transform=\"matrix(0.5184 0 0 0.5184 340 '+ str(float(50+100*i))+')\" font-family=' \
                    '\"\'MyriadPro-Regular\'\" font-size=\"40\">'+data+'</text>\n'
        new_row += '<text transform=\"matrix(0.5184 0 0 0.5184 800 '+ str(float(50+100*i))+')\" font-family=' \
                    '\"\'MyriadPro-Regular\'\" font-size=\"40\">'+pvalue+'</text>\n'
        #new_row += '<text transform=\"matrix(0.5184 0 0 0.5184 1000 ' + str(float(50 + 100 * i)) + ')\" font-family=' \
        #            '\"\'MyriadPro-Regular\'\" font-size=\"40\">' + comment + '</text>\n'
        svg_text += new_row
        i+=1
    svg_text += '</svg>\n'

    with open(path_motif + motif_list + '.svg', 'w') as figure_file:
        figure_file.write(svg_text)
    print('Convert svg to png')
    os.system('convert '+path_motif + motif_list + '.svg '+path_motif + motif_list + '.png')


def run_fimo(path_analysis, exp_design_name, path_motif, path_motif_list, fimo_file):
    '''
    Creates a fimo.sh file with all the command to run
    :param path_analysis: path where the fasta files, background, and fimo results should be
    :param exp_design_name: name of the peaks : example: CecAm_Raw_1
    :param path_motif: path where the motif will be found
    :param motif_list_filename: path of the motif list
    :return: save file to path_motif + 'Fimo.sh'
    '''
    motif_list = list()
    with open(path_motif_list + '.txt', 'r') as motif_file:
        motif_file.readline()
        for row in motif_file:
            motif_list.append(row.split('\t')[0].strip())
    data_name = exp_design_name


    with open(fimo_file, 'w') as motif_sh:
        for motif in motif_list:
            motif_file = path_motif + 'motif/' + motif + '.meme'
            print(motif_file)
            fasta_file = path_analysis + 'fasta/' + data_name + '.fasta'
            background_file = path_analysis + 'results/' + data_name + '/background'
            folder_fimo = path_motif + 'fimo/' + motif + '_' + data_name
            fimo_sh = 'fimo --parse-genomic-coord --verbosity 1 --thresh 1e-2 --max-stored-scores 1000000 ' \
                      '--oc ' + folder_fimo + ' --bgfile ' + background_file + ' ' + motif_file + ' ' + fasta_file
            print(fimo_sh)
            motif_sh.write(fimo_sh+'\n')


def run_centrimo(path_analysis, exp_design_name, path_motif, motif_list_filename):
    '''
    Creates a centrimo.sh file with all the command to run
    :param path_analysis: path where the fasta files, background, and fimo results should be
    :param exp_design_name: name of the peaks : example: CecAm_Raw_1
    :param path_motif: path where the motif will be found
    :param motif_list_filename: path of the motif list
    :return: save file to path_motif + 'Centrimo.sh'
    '''
    motif_list = list()
    with open(path_motif + motif_list_filename + '.txt', 'r') as motif_file:
        motif_file.readline()
        for row in motif_file:
            motif_list.append(row.split('\t')[0].strip())
    data_name = exp_design_name

    with open(path_motif + 'Centrimo.sh', 'w') as motif_sh:
        for motif in motif_list:
            motif_file = path_analysis + 'motif/' + motif + '.meme'
            #print(motif_file)
            fasta_file = path_analysis + 'fasta/' + data_name + '.fasta'
            background_file = path_analysis + 'results/' + data_name + '/background'
            folder_centrimo = path_analysis + 'centrimo/' + motif + '_' + data_name
            centrimo_sh = 'centrimo -seqlen 150 -verbosity 1 -oc ' + folder_centrimo +\
                          ' --bgfile ' + background_file + ' -score 5.0 '\
                          '-ethresh 10.0 ' + fasta_file + ' '+motif_file
            print(centrimo_sh)
            motif_sh.write(centrimo_sh + '\n')


def create_motif_table(exp_design_name, path_motif, path_motif_list):
    '''
    Creates two tables summarizing occurence of motif for each peak
    :param path_analysis: path where the fasta files, background, and fimo results should be
    :param exp_design_name: name of the peaks : example: CecAm_Raw_1
    :param path_motif: path where the motif will be found
    :param motif_list_filename: path of the motif list
    :return: save file to path_analysis + motif_list_filename + '_score.txt'
                and path_analysis + motif_list_filename + '_count.txt'
    '''
    motif_list = list()
    with open(path_motif_list + '.txt', 'r') as motif_file:
        motif_file.readline()
        for row in motif_file:
            motif_list.append(row.split('\t')[0].strip())

    data_name = exp_design_name

    fasta_to_length = dict()
    print(path_analysis + 'fasta/' + exp_design_name + '.fasta' )
    fasta_seq = SeqIO.to_dict(SeqIO.parse(path_analysis + 'fasta/' + exp_design_name + '.fasta' , "fasta"))
    nb_peaks = len(fasta_seq.keys())
    np_motif = np.zeros((nb_peaks, len(motif_list)))
    df_motif_score = pd.DataFrame(np_motif, index=fasta_seq.keys(), columns=motif_list)
    np_motif = np.zeros((nb_peaks, len(motif_list)))
    df_motif_count = pd.DataFrame(np_motif, index=fasta_seq.keys(), columns=motif_list)
    np_motif = np.ones((nb_peaks, len(motif_list)))
    df_motif_pvalue = pd.DataFrame(np_motif, index=fasta_seq.keys(), columns=motif_list)

    for motif in motif_list:
        print(motif)
        df_motif_peaks = pd.read_csv(path_motif + 'fimo/' + motif + '_' + data_name +
                                     '/fimo.txt', index_col=0, sep='\t')
        grouped = df_motif_peaks[['sequence name','score']].groupby('sequence name')
        score = grouped.sum()
        df_motif_score[motif] = score
        counted = grouped.count()
        df_motif_count[motif] = counted
#           df_motif_pvalue[motif][peak] += pvalue
    df_motif_score.to_csv(path_motif_list + '_score.txt', sep='\t')
    df_motif_count.to_csv(path_motif_list + '_count.txt', sep='\t')

    #df_motif_pvalue.to_csv(path_motif + exp_design_name + '_' + motif_list_filename + '_pvalue.txt', sep='\t')


def get_size_fasta(path_analysis, exp_design_name):
    with open(path_analysis + 'Fasta_Summary.txt', 'w') as fasta_summary:
        for data_name in os.listdir(path_analysis+'fasta/'):
            if data_name.endswith('.fasta'):
                print(data_name)
                fasta_seq = SeqIO.to_dict(SeqIO.parse(path_analysis+'fasta/'+data_name, "fasta"))
                print(data_name,len(fasta_seq))
                fasta_summary.write(data_name.replace('.fasta','')+'\t'+str(len(fasta_seq))+'\n')


def motif_vs_fasta(path_analysis, exp_design_name, path_motif, motif_list_filename):
    '''
    Creates two tables summarizing occurence of motif for each fasta file
    :param path_analysis: path where the fasta files, background, and fimo results should be
    :param exp_design_name: name of the peaks : example: CecAm_Raw_1
    :param path_motif: path where the motif will be found
    :param motif_list_filename: path of the motif list
    :return: save file to path_analysis + motif_list_filename + '_score.txt'
                 and path_analysis + motif_list_filename + '_count.txt'
    '''
    # read fasta summary
    fasta_to_size = dict()
    with open(path_analysis + 'Fasta_Summary.txt', 'r') as fasta_summary:
        for row in fasta_summary:
            fasta_name = row.split('\t')[0]
            fasta_size = row.split('\t')[1].strip()
            if fasta_size != '0':
                fasta_to_size[fasta_name] = fasta_size
    # read list
    motif_list = list()
    with open(path_motif + motif_list_filename + '.txt', 'r') as motif_file:
        motif_file.readline()
        for row in motif_file:
            motif_list.append(row.split('\t')[0].strip())
    # read motif summary
    df_motif_score = pd.read_csv(path_analysis + motif_list_filename + '_score.txt', index_col=0, sep='\t')
    df_motif_count = pd.read_csv(path_analysis + motif_list_filename + '_count.txt', index_col=0, sep='\t')

    # create motif vs fasta tables
    nb_fasta = len(fasta_to_size.keys())
    np_fasta = np.zeros((len(motif_list), nb_fasta))
    df_motif_fasta_score = pd.DataFrame(np_fasta, index=motif_list, columns=fasta_to_size.keys())
    np_fasta = np.zeros((len(motif_list), nb_fasta))
    df_motif_fasta_count = pd.DataFrame(np_fasta, index=motif_list, columns=fasta_to_size.keys())

    print(motif_list)
    # go through all fasta file
    for fasta_name, fasta_size in fasta_to_size.items():
        fasta_peaks = SeqIO.to_dict(SeqIO.parse(path_analysis+'fasta/'+fasta_name+'.fasta', 'fasta'))
        #print(fasta_peaks.keys())
        for motif in motif_list:
            motif_score = df_motif_score.loc[fasta_peaks.keys(), motif].sum()
            df_motif_fasta_score[fasta_name][motif] = float(motif_score)/float(fasta_size)
            motif_count = df_motif_count.loc[fasta_peaks.keys(), motif].sum()
            df_motif_fasta_count[fasta_name][motif] = float(motif_count)/float(fasta_size)

    df_motif_fasta_score.to_csv(path_analysis + motif_list_filename + '_fasta_score.txt', sep='\t')
    df_motif_fasta_count.to_csv(path_analysis + motif_list_filename + '_fasta_count.txt', sep='\t')


#exp_design_name = 'LivOld'
#exp_design_name = 'LiverZT_MaxValues'
#exp_design_name = 'Liver_all_MaxMaxValues_1'
#exp_design_name = 'Liver_all_MaxMaxValues_3'
exp_design_name = 'Cecum_all_withoutabxGF_MaxMaxValues_3'
#exp_design_name = 'Cecum_all_MaxMaxValues_3'
#exp_design_name = 'LivOld_MaxValues'
#exp_design_name = 'CecAm_MaxValues'

pathTars = '/pasteur/projets/policy01/m6aAkker/'
#path = '/Volumes/m6aAkker/'
path = 'Z:' + os.sep


path_analysis = path + 'PeakDiffExpression' + os.sep + exp_design_name + os.sep + 'Motif' + os.sep

#path_motif = path_analysis
path_motif = path + 'PeakDiffExpression' + os.sep + 'Motif' + os.sep

#motif_list_filename = 'Motif_'+exp_design_name+'_count'
motif_list_filename = 'Motif_' + exp_design_name
path_motif_list = path_analysis + motif_list_filename

# Create all folders for motif analysis
# if not os.path.isdir(path_motif + 'logs' + os.sep):
#     os.mkdir(path_motif + 'logs' + os.sep)
# if not os.path.isdir(path_motif + 'motif' + os.sep):
#     os.mkdir(path_motif + 'motif' + os.sep)
# if not os.path.isdir(path_motif + 'motif_figure' + os.sep):
#     os.mkdir(path_motif + 'motif_figure' + os.sep)
# if not os.path.isdir(path_motif + 'fimo' + os.sep):
#     os.mkdir(path_motif + 'fimo' + os.sep)
# if not os.path.isdir(path_motif + 'centrimo' + os.sep):
#     os.mkdir(path_motif + 'centrimo' + os.sep)

# first look at log to see which software crashed
#parse_all_logs(path_analysis, exp_design_name)
# Parse meme results and extract all motifs
#parse_dreme(path_analysis, path_motif, exp_design_name)
# regroup all motifs
#regroup_motifs(path_motif, exp_design_name)
# Select motif centrally enriched -> Could be done automatically
#evalue_cutoff = 0.00001
#evalue_cutoff = 0.01
#select_centrally_enriched(path_analysis, path_motif, exp_design_name, evalue_cutoff)

# Manually filter motif list and put it in path_motif
# add new motif
#motif = 'RRACH'
#motif="NGGACN"
#create_motif(path_motif, motif)
#motif = 'NBCAN'
#add_motif(path_motif, motif, motif_list_filename)

# put all motifs figures together to filter the list
# regroup_figures(path_motif, motif_list_filename)


path_analysis = pathTars + 'PeakDiffExpression/' + exp_design_name + '/Motif/'
#path_motif = path_analysis
path_motif = pathTars + 'PeakDiffExpression/Motif/'
#motif_list_filename = 'Motif_'+exp_design_name+'_count'
motif_list_filename = 'Motif_' + exp_design_name + "_Meth"
path_motif_list = path + 'PeakDiffExpression' + os.sep + 'Motif' + os.sep + motif_list_filename
fimo_file = path + 'PeakDiffExpression'+os.sep+'Motif' + os.sep + 'Fimo_' + exp_design_name + "_Meth"+ '.sh'
# write sh file for running fimo
run_fimo(path_analysis, exp_design_name, path_motif, path_motif_list, fimo_file)
# regroup everything in one .sh file
#print('cat '+path_motif+ '/Fimo_* > ' + path_motif + '/Fimo.sh')
# write sh file for running centrimo
#run_centrimo(path_analysis, exp_design_name, path_motif, motif_list_filename)

#
# exp_design_name = 'Liver_all_MaxMaxValues_1'
# path_analysis = path + 'PeakDiffExpression/' + exp_design_name + '/Motif/'
# path_motif = path + 'PeakDiffExpression/Motif/'
# motif_list_filename = 'Motif_' + exp_design_name
# path_motif_list = path + 'PeakDiffExpression/Motif/' + motif_list_filename
#
# # create a table grouping peak marginal score for each motif
# create_motif_table(exp_design_name, path_motif, path_motif_list)
#
# exp_design_name = 'Liver_all_MaxMaxValues_3'
# path_analysis = path + 'PeakDiffExpression/' + exp_design_name + '/Motif/'
# path_motif = path + 'PeakDiffExpression/Motif/'
# motif_list_filename = 'Motif_' + exp_design_name
# path_motif_list = path + 'PeakDiffExpression/Motif/' + motif_list_filename
#
# # create a table grouping peak marginal score for each motif
# create_motif_table(exp_design_name, path_motif, path_motif_list)
#
# exp_design_name = 'Cecum_all_MaxMaxValues_1'
# path_analysis = path + 'PeakDiffExpression/' + exp_design_name + '/Motif/'
# path_motif = path + 'PeakDiffExpression/Motif/'
# motif_list_filename = 'Motif_' + exp_design_name
# path_motif_list = path + 'PeakDiffExpression/Motif/' + motif_list_filename
#
# # create a table grouping peak marginal score for each motif
# create_motif_table(exp_design_name, path_motif, path_motif_list)


# get size fasta
#get_size_fasta(path_analysis, exp_design_name)

# Count marginal score and marginal motif presence for every fasta file
#motif_vs_fasta(path_analysis, exp_design_name, path_motif, motif_list_filename)

# Using R run motif analysis for clustering of motif and biocondition

# use clustered list of motif to recreate figure
#regroup_figures(path_motif, motif_list_filename + '_' + exp_design_name)