Commit c0ea9d94 authored by samuel hanot's avatar samuel hanot

first commit

parents
# recursive_gmconvert
Implements divide-and-conquer gmm computation as in [1].
## dependencies
EMAN2 (blake.bcm.edu/emanwiki/EMAN2), used to compute the FSC.
gnuplot (www.gnuplot.info/)
## instalation
1. clone the repository or download the zip and extract it (we refer to the root of the source tree as `SRC_DIR`).
2. compile gmconvert: `cd ${SRC_DIR}/gmconvert/src ; make ; cd -`
3. if you need to run some commands before executing the scripts, place them in modules.sh
## usage:
```
${SRC_DIR}/recursive_gmconvert.sh MAP_NAME THRESHOLD n N i0 SERIAL
```
`MAP_NAME`: the file name of the input EM map
`THRESHOLD`: the density threshold
`n`: number of gaussians per sub-process. 2 or 4 is a good guess.
`N`: number recursion levels.
`i0`: initial recursion level (default: 1)
`SERIAL`: if set to 1, uses the serial implementation (default: 0)
The script creates a sub-directory (called `i`) for each recursion level `i`, and the output files are called `i/i.gmm` and `i_imp.gmm`.
`i/i.gmm` contains the gmm in `gmconvert` format, and `i_imp.gmm` contains the gmm in `IMP` format (the conversion is handeld by `gmconvert2imp.sh`), which can be read in IMP using `IMP.isd.gmm_tools.decorate_gmm_from_text` function.
Unless you set SERIAL to 1, all the scripts assume that you are running a cluster with the `slurm` queuing system.
## References
[1] Bayesian multi-scale modeling of macromolecular structures based on cryo-electron microscopy density maps
Samuel Hanot, Massimiliano Bonomi, Charles H Greenberg, Andrej Sali, Michael Nilges, Michele Vendruscolo, Riccardo Pellarin
doi: https://doi.org/10.1101/113951
#!/bin/bash
bindir=$(dirname $0)
source ${bindir}/modules.sh
for f in $(cat)
do
echo $f
rsync -a -v -z --delete rsync.ebi.ac.uk::pub/databases/emdb/structures/EMD-${f}/ $f
cd $f
links http://www.ebi.ac.uk/pdbe/entry/emdb/EMD-$f/analysis > header/map_analysis.txt
cutoff=$(awk '/contour/{print $4}' < header/map_analysis.txt)
resolution=$(sed -ne 's/[<>/ ]//g' -ne 's/resolutionByAuthor//gp' header/emd-${f}.xml)
echo ${cutoff} > cutoff.txt
echo ${resolution} > resolution.txt
gunzip -c map/emd_${f}.map.gz > emd_${f}.map
${bindir}/recursive_gmconvert.sh emd_${f}.map $cutoff $1 $2
cd -
done
This diff is collapsed.
/*
<3DmapEM.h>
*/
/*** Functions (GLOBAL) ***/
extern void Set_Less_Than_Threshold_Voxels_to_Zero();
extern void Cal_Mean_and_CovarMat_from_Voxel();
extern int EM_optimize_GaussMix_for_3D_Map_SmallMemory();
extern int EM_optimize_GaussMix_for_3D_Map();
extern double Corr_Coeff_Bwn_Two_Voxels();
extern double Corr_Coeff_Bwn_Voxel_and_GMM();
extern double Log_Likelihood_Of_GaussMix_For_3Dmap();
extern void Set_GaussMix_Density_To_3Dmap();
This diff is collapsed.
/*
<3DmapKmean.h>
*/
extern void K_Means_Clustering_for_3D_Map_Multiple_Start();
extern double K_Means_Clustering_for_3D_Map();
extern void Initialize_Gauss_From_Randomly_Chosen_Voxels();
This diff is collapsed.
/*
<ATOMs_gmconvert_from_PDBmmCIF.h>
*/
/* FUNCTIONS (GLOBAL) */
extern void Read_mmCIF_File();
extern void make_ATOM_list_from_ASSEMBLY();
extern void make_ATOM_list_from_UNITMOLs();
/**
<Atom2Vox.c>
Functions from atomic data to Voxel data
**/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <math.h>
#include <stdbool.h>
#include "pdbstruct.h"
#include "PdbIO.h"
#include "Radius.h"
#include "gauss.h"
#include "Matrix3D.h"
#include "Voxel.h"
#include "MCubeFunc.h"
#include "GaussIO.h"
/* one_over_2pi_32 = 1.0/pow(2*M_PI,3/2) = pow(1/2/M_PI,3/2) */
#define one_over_2pi_32 0.0634936359342410
/* three_over_2pi_32 = pow(3/2/M_PI,3/2) */
#define three_over_2pi_32 0.3299226101861591
/** FUNCTIONS (GLOBAL) **/
void Set_Voxel_Value_By_Atoms();
void Get_Min_Max_From_Atoms();
void Malloc_Voxel_From_Min_Max();
void Malloc_Voxel_From_Atoms();
/** FUNCTIONS (LOCAL) **/
static double Gaussian3D_Isotropic();
static double Gaussian3D_Isotropic_PeakDensityOne();
void Set_Voxel_Value_By_Atoms(vox,HeadAtom,Atom2VoxType,sigma_isotropic,init)
struct VOXEL *vox;
struct ATOM *HeadAtom;
char Atom2VoxType; /* 'I':isotropic gaussian, 'R':Rvdw-based gaussian */
float sigma_isotropic;
bool init;
{
/*
[Atom2VoxType=='I']
gauss(r) = 1/pow(2pi,3/2)/sigma^3 * exp[-r^2/sigma^2/2.0]
[Atom2VoxType=='R']
Based on the idea of Laskowski (J.Mol.Graph., 13,323-330, (1995) ).
Sigma is decided so that gauss(Rvdw) is half of gauss(0.0).
Therefore,
exp[-Rvdw^2/sigma^2/2] = 1/2
==> sigma^2 = Rvdw^2/(2log2)
sigma = Rvdw/sqrt(2log2)
To simplify the problem, we set one to peak density of gauss(r). Threfore,we use:
gauss(r) = exp[-r^2/sigma^2/2.0]
sigma = Rvdw/sqrt(2log2)
For this case, the threshold value for the iscontour should be '0.5'.
*/
int i,x,y,z;
float X[3];
struct ATOM *an;
float min,max,Sigma,SDcutoff;
int Nmin[3],Nmax[3];
printf("#Set_Voxel_Value_By_Atoms(Atom2VoxType:%c sigma_isotropic:%lf)\n",Atom2VoxType,sigma_isotropic);
SDcutoff = 3.0;
/** Initialize **/
if(init){
for (x=0;x< vox->N[0];++x){
for (y=0;y< vox->N[1];++y){
for (z=0;z< vox->N[2];++z){vox->dat[x][y][z] = 0.0; }
}
}
}
/** Add density for each atoms **/
Sigma = sigma_isotropic;
an = HeadAtom;
while (an->next != NULL){
an = an->next;
if (Atom2VoxType=='R'){ Sigma = an->R/sqrt(2*log(2.0)); }
for (i=0;i<3;++i){
min = an->Pos[i] - an->R;
max = an->Pos[i] + an->R;
Nmin[i] = (int)floor((min - SDcutoff*Sigma - vox->OrigPos[i])/vox->grid_width);
Nmax[i] = (int)ceil( (max + SDcutoff*Sigma - vox->OrigPos[i])/vox->grid_width);
if (Nmin[i]<0) Nmin[i] = 0;
if (Nmax[i]>=vox->N[i]) Nmax[i] = vox->N[i]-1; }
for (x=Nmin[0];x<=Nmax[0];++x){
X[0] = vox->OrigPos[0] + vox->grid_width * x;
for (y=Nmin[1];y<=Nmax[1];++y){
X[1] = vox->OrigPos[1] + vox->grid_width * y;
for (z=Nmin[2];z<=Nmax[2];++z){
X[2] = vox->OrigPos[2] + vox->grid_width * z;
if (Atom2VoxType=='R'){ vox->dat[x][y][z] += (float)Gaussian3D_Isotropic_PeakDensityOne(X,an->Pos,Sigma); }
else { vox->dat[x][y][z] += (float)Gaussian3D_Isotropic(X,an->Pos,Sigma); }
} /* z */
} /* y */
} /* x */
} /* an */
} /* end of Set_Voxel_Value_By_Atoms() */
void Get_Min_Max_From_Atoms(HeadAtom,Min,Max)
struct ATOM *HeadAtom;
double (*Min)[3], (*Max)[3];
{
int i;
struct ATOM *an;
float min[3],max[3];
char init;
/** (1) Find min[3] and max[3] **/
an = HeadAtom; init = 1;
while (an->next != NULL){
an = an->next;
for (i=0;i<3;++i){
min[i] = an->Pos[i] - an->R;
max[i] = an->Pos[i] + an->R;
}
if (init==1){
for (i=0;i<3;++i){ (*Min)[i] = min[i]; (*Max)[i] = max[i]; }
init = 0;
}
else{
for (i=0;i<3;++i){
if (min[i]<(*Min)[i]) (*Min)[i] = min[i];
if (max[i]>(*Max)[i]) (*Max)[i] = max[i];
}
}
}
}
void Malloc_Voxel_From_Atoms(vox,HeadAtom,margin)
struct VOXEL *vox;
struct ATOM *HeadAtom;
float margin;
{
int i;
struct ATOM *an;
float min[3],max[3],Min[3],Max[3];
char init;
printf("#Malloc_Voxel_Value_From_Atoms()\n");
/** (1) Find min[3] and max[3] **/
an = HeadAtom; init = 1;
while (an->next != NULL){
an = an->next;
for (i=0;i<3;++i){
min[i] = an->Pos[i] - an->R;
max[i] = an->Pos[i] + an->R;
}
if (init==1){
for (i=0;i<3;++i){ Min[i] = min[i]; Max[i] = max[i]; }
init = 0;
}
else{
for (i=0;i<3;++i){
if (min[i]<Min[i]) Min[i] = min[i];
if (max[i]>Max[i]) Max[i] = max[i];
}
}
} /* an */
printf("[0]Min %f Max %f [1]Min %f Max %f [2]Min %f Max %f\n",
Min[0], Max[0], Min[1], Max[1], Min[2], Max[2]);
/** (2) Set up N[] **/
for (i=0;i<3;++i){
vox->OrigPos[i] = (int)floor((Min[i]-margin)/vox->grid_width)*vox->grid_width;
vox->N[i] = (int)ceil((Max[i]- vox->OrigPos[i] + margin)/vox->grid_width);
}
/** (3) Malloc Voxel **/
printf("#OrigPos %f %f %f\n",
vox->OrigPos[0], vox->OrigPos[1], vox->OrigPos[2]);
Malloc_Voxel(vox,vox->N[0],vox->N[1],vox->N[2]);
} /* end of Malloc_Voxel_From_Atoms() */
void Malloc_Voxel_From_Min_Max(vox,Min,Max,margin)
struct VOXEL *vox;
double Min[3], Max[3];
float margin;
{
int i;
printf("#Malloc_Voxel_Value_From_Min_Max()\n");
/** (1) Set up N[] **/
for (i=0;i<3;++i){
vox->OrigPos[i] = (int)floor((Min[i]-margin)/vox->grid_width)*vox->grid_width;
vox->N[i] = (int)ceil((Max[i]- vox->OrigPos[i] + margin)/vox->grid_width);
}
/** (2) Malloc Voxel **/
printf("#OrigPos %f %f %f\n",
vox->OrigPos[0], vox->OrigPos[1], vox->OrigPos[2]);
Malloc_Voxel(vox,vox->N[0],vox->N[1],vox->N[2]);
} /* end of Malloc_Voxel_From_Min_Max() */
double Gaussian3D_Isotropic(Pos,Cen,Sigma)
float Pos[3]; /* Position */
float Cen[3]; /* Center */
float Sigma; /* Standard Deviation */
{
double D[3],xSx,val;
D[0] = Pos[0] - Cen[0];
D[1] = Pos[1] - Cen[1];
D[2] = Pos[2] - Cen[2];
xSx = 0.0;
xSx = (D[0]*D[0] + D[1]*D[1] + D[2]*D[2])/(Sigma*Sigma);
val = three_over_2pi_32/(Sigma*Sigma*Sigma)*exp(-0.5*xSx);
return(val);
}/* end of Gaussian3D_Isotropic() */
double Gaussian3D_Isotropic_PeakDensityOne(Pos,Cen,Sigma)
float Pos[3]; /* Position */
float Cen[3]; /* Center */
float Sigma; /* Standard Deviation */
{
double D[3],xSx,val;
D[0] = Pos[0] - Cen[0];
D[1] = Pos[1] - Cen[1];
D[2] = Pos[2] - Cen[2];
xSx = 0.0;
xSx = (D[0]*D[0] + D[1]*D[1] + D[2]*D[2])/(Sigma*Sigma);
val = exp(-0.5*xSx);
return(val);
}/* end of Gaussian3D_Isotropic_PeakDensityOne() */
/**
<Atom2Vox.h>
**/
extern void Malloc_Voxel_From_Min_Max();
extern void Get_Min_Max_From_Atoms();
extern void Set_Voxel_Value_By_Atoms();
extern void Malloc_Voxel_From_Atoms();
/*
<AtomKmean.c>
K-means Clustering for Atom Data (PDB atom data)
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <math.h>
#include "globalvar.h"
#include "pdbstruct.h"
#include "PdbIO.h"
#include "Radius.h"
#include "gauss.h"
#include "Voxel.h"
#include "MCubeFunc.h"
#include "PointEM.h"
#include "Matrix3D.h"
#include "GaussIO.h"
/*** Functions (GLOBAL) ***/
void K_Means_Clustering_For_Atoms();
void K_Means_Clustering_For_Atoms_Multiple_Start();
/*** Functions (LOCAL) ***/
static void update_Class();
static void update_M_Weight_CovM();
/*****************/
/*** FUNCTIONS ***/
/*****************/
void K_Means_Clustering_For_Atoms_Multiple_Start(Ngauss,Garray,Ahead,ObjFunc_fin,Ntry)
int Ngauss;
struct GAUSS3D *Garray;
struct ATOM *Ahead;
double *ObjFunc_fin; /* Final Objective Function */
int Ntry;
{
struct GAUSS3D **GarrayTry;
int t,g,t_min;
double ObjFunc,ObjFunc_min;
printf("#K_Means_Clustering_For_Atoms_Multiple_Start(Ngauss:%d)\n",Ngauss);
/* (1) Malloc Gauss Try */
GarrayTry = (struct GAUSS3D **)malloc(sizeof(struct GAUSS3D*)*Ntry);
for (t=0;t<Ntry;++t){
GarrayTry[t] = (struct GAUSS3D *)malloc(sizeof(struct GAUSS3D)*Ngauss);
}
/* (2) try K_Means Ntry times */
t_min = -1;
ObjFunc_min = -1.0;
for (t=0;t<Ntry;++t){
Initialize_Gauss_From_Randomly_Chosen_Atoms(Ahead,Ngauss,GarrayTry[t]);
K_Means_Clustering_For_Atoms(Ngauss,GarrayTry[t],Ahead,&ObjFunc);
if ((t==0)||(ObjFunc<ObjFunc_min)){
ObjFunc_min = ObjFunc;
t_min = t;
}
}
/* printf("#t_min %d ObjFunc_min %f\n",t_min,ObjFunc_min); */
/* (3) Copy the best gaussians */
for (g=0;g<Ngauss;++g){
Copy_GAUSS3D(&(Garray[g]),&(GarrayTry[t_min][g]));
}
*ObjFunc_fin = ObjFunc_min;
/* (4) Free Gauss Try */
for (t=0;t<Ntry;++t) free(GarrayTry[t]);
free(GarrayTry);
} /* end of K_Means_Clustering_For_Atoms_Multiple_Start() */
void K_Means_Clustering_For_Atoms(Ngauss,Garray,Ahead,ObjFunc_fin)
int Ngauss;
struct GAUSS3D *Garray;
struct ATOM *Ahead;
double *ObjFunc_fin; /* Final Objective Function */
{
int Nrepeat,r,a,Natom,Nclass_change;
int *Class,*ClassPre; /* [0..Natom-1] */
struct ATOM *an;
double ObjFunc,sumWeight;
/* printf("#K_Means_Clustering_For_Atoms()\n"); */
Nrepeat = 1000;
/*** [1] Count Natom and malloc array class ***/
Natom = 0;
sumWeight = 0.0;
an = Ahead;
while (an->next != NULL){
an = an->next;
an->num = Natom;
++Natom;
sumWeight += an->Weight;
}
Class = (int *)malloc(sizeof(int)*Natom);
ClassPre = (int *)malloc(sizeof(int)*Natom);
for (a=0;a<Natom;++a){ Class[a] = ClassPre[a] = 0;}
/*** [2] Iteration(r) for K-means ***/
r = 0;
do{
/* (2-1) Calculate Class[a] */
update_Class(Ngauss, Garray, Ahead,Class,ClassPre,&Nclass_change,&ObjFunc);
/* (2-2) Update M[], Weight */
if (Nclass_change>0){
update_M_Weight_CovM(Ngauss, Garray,Ahead,Class,sumWeight,'-');
}
for (a=0;a<Natom;++a){ClassPre[a] = Class[a];}
}while ((r<Nrepeat)&&(Nclass_change>0));
/* [3] Finishing. Update M[], Weight,CovM[][] */
update_M_Weight_CovM(Ngauss, Garray,Ahead,Class,sumWeight,'C');
*ObjFunc_fin = ObjFunc;
free(Class);
} /* end of K_Means_Clustering_For_Atoms() */
void update_Class(Ngauss, Garray, Ahead,Class,ClassPre,Nclass_change,ObjFunc)
int Ngauss;
struct GAUSS3D *Garray;
struct ATOM *Ahead;
int *Class; /* [0..Natom-1] */
int *ClassPre; /* [0..Natom-1] */
int *Nclass_change;
double *ObjFunc;
{
struct ATOM *an;
int g;
double DD,minDD;
*Nclass_change = 0;
an = Ahead;
DD = 0.0;
minDD = -1.0;
*ObjFunc = 0.0;
while (an->next != NULL){
an = an->next;
for (g=0;g<Ngauss;++g){
DD = (an->Pos[0] - Garray[g].M[0])*(an->Pos[0] - Garray[g].M[0])
+(an->Pos[1] - Garray[g].M[1])*(an->Pos[1] - Garray[g].M[1])
+(an->Pos[2] - Garray[g].M[2])*(an->Pos[2] - Garray[g].M[2]);
if ((DD<minDD)||(g==0)) { minDD = DD; Class[an->num] = g; }
}
if (Class[an->num] != ClassPre[an->num]){ *Nclass_change = *Nclass_change + 1;}
*ObjFunc += DD;
}
} /* end of update_Class() */
void update_M_Weight_CovM(Ngauss, Garray, Ahead, Class,sumWeight,UpdateCovMtype)
int Ngauss;
struct GAUSS3D *Garray;
struct ATOM *Ahead;
int *Class; /* [0..Natom-1] */
double sumWeight;
char UpdateCovMtype; /* 'C':update covariance matrix */
{
int g,i,j;
double sumWeight_g;
struct ATOM *an;
for (g=0;g<Ngauss;++g){
sumWeight_g = 0.0;
for (i=0;i<3;++i){
Garray[g].M[i] = 0.0;
for (j=0;j<3;++j){
Garray[g].CovM[i][j] = 0.0;
}
}
/* update Garray[g].M[] and Garray[g].Weight */
an = Ahead;
while (an->next != NULL){
an = an->next;
if (Class[an->num]==g){
for (i=0;i<3;++i){Garray[g].M[i] += an->Weight*an->Pos[i];}
sumWeight_g += an->Weight;
}
}