#include "collapseSkel3d.h"
#include "edt3d.h"

#include <cmath>
#include <set>
//#include <algorithm>
#include <assert.h>

#include <parallel/algorithm>

#include "hashwrap.h"

#define ADVECT_VEL



static float area(collapseSkel3d::coord3 &p1, collapseSkel3d::coord3 &p2, collapseSkel3d::coord3 &p3) 
{
  collapseSkel3d::coord3 ab(p2.x-p1.x, p2.y-p1.y, p2.z-p1.z);
  collapseSkel3d::coord3 ac(p3.x-p1.x, p3.y-p1.y, p3.z-p1.z);

  float nx = ab.y*ac.z - ac.y*ab.z;
  float ny = ac.x*ab.z - ab.x*ac.z;
  float nz = ab.x*ac.y - ac.x*ab.y;
  
  return 0.5f*sqrtf(nx*nx+ny*ny+nz*nz);
}

void collapseSkel3d::init(Volume<byte> &data, int thr, int np, int *points, float *normals)
{
  const int width = data.getWidth(), height = data.getHeight(), depth = data.getDepth();

  omp_set_num_threads(1);

  dens_prev.makeVolume(width, height, depth);
  dens.makeVolume(width, height, depth);
  thin_img.makeVolume(width, height, depth);
  max_dens.makeVolume(width, height, depth);
  edt.makeVolume(width, height, depth);
  ft.makeVolume(width, height, depth);

#ifdef ADVECT_VEL
  u.makeVolume(width, height, depth);
  v.makeVolume(width, height, depth);
  w.makeVolume(width, height, depth);
#endif
  
  max_dst = 0.0f;

  //init diffusion weights (lookup table)
  static const float sigma2 = 0.8f; //2.8f
  this->r = 1; //2.0f kernel radius

  int klen = 2*this->r + 1;
  diff_weights = new float[klen*klen*klen];
  diff_vecs = new float[klen*klen*klen][3];

  float min_w = 1e+6f;

  //static const float cst = 1.0/pow(sqrt(2.0*M_PI*sigma2), 3.0);
  static const float cst = 1.0f;
  for(int idx=0,zz=-r;zz<=r;zz++)
    for(int yy=-r;yy<=r;yy++)
      for(int xx=-r;xx<=r;xx++) {
	float dst2 = zz*zz + yy*yy + xx*xx;
	diff_weights[idx] = cst * exp( -dst2/(2.0f*sigma2) );
	//diff_weights[idx] = 1.0f/(1.0f + dst2/sigma2);
	min_w = std::min(min_w, diff_weights[idx]);

	float dz=zz, dy=yy, dx=xx;
	float mag = sqrtf( dz*dz + dy*dy + dx*dx ) + 1e-7f;
	dz/=mag; dy/=mag; dx/=mag;

	diff_vecs[idx][0]=dx;
	diff_vecs[idx][1]=dy;
	diff_vecs[idx][2]=dz;

	idx++;  
      }    

  //printf("Minimum diffusion weight: %f.\n", min_w);

  {
    /*
    Volume<int> sqr_edt(width, height, depth);
    edt_3d(sqr_edt, data, thr);
    */
    /*
    Volume<int> sqr_edt(width, height, depth);
    int weights[3];
    set_weight_345(weights);
    wedt_3d(sqr_edt, data, thr, weights);
    */

    //compute FT
    //LUK: this assumes that volume is not larger than 1024^3
    ft_3d(ft, data, thr);

    for(int i=0;i<depth;i++)
      for(int j=0;j<height;j++)
	for(int k=0;k<width;k++) {
	  //use FT to compute EDT
	  //edt[i][j][k] =  sqrtf( sqr_edt[i][j][k] );
	  
	  int fp = ft[i][j][k];

	  //decode fp to (x,y,z) 
	  int z = fp % depth; 
	  int x = fp / depth;
	  int y = x % height;
	  x = x / height;

	  float dst = sqrtf( (x-k)*(x-k) + (y-j)*(y-j) + (z-i)*(z-i) ); 
	  //float dst = (x-k)*(x-k) + (y-j)*(y-j) + (z-i)*(z-i); 

	  edt[i][j][k] = dst;
	  //edt[i][j][k] =  sqr_edt[i][j][k];
	  
	  if(data[i][j][k]>thr) thin_img[i][j][k] = 255;	    

	  max_dst = std::max(max_dst, dst);
	}


#if 0
  //smooth edt
    dens = edt;
    for(int i=r;i<depth-r;i++)
      for(int j=r;j<height-r;j++)
	for(int k=r;k<width-r;k++) {
	  
	  float avg = 0.0f, sum_w = 0.0f;
	  for(int idx=0,zz=-r;zz<=r;zz++)
	    for(int yy=-r;yy<=r;yy++)
	      for(int xx=-r;xx<=r;xx++) {
		float w = diff_weights[idx];
		avg += w*dens[i+zz][j+yy][k+xx];
		sum_w += w;
	      }

	  edt[i][j][k] = avg/sum_w;

	  if(edt[i][j][k]>0.5f) thin_img[i][j][k] = 255;
	}
    dens.clearVolume();
#endif    


    //spline kernel
    static const float kernel[10] = {1.0, 4.0, 1.0, 4.0, 16.0, 4.0, 1.0, 4.0, 1.0, 72.0};

    tdens = 0.0f;
    for(int i=0;i<depth;i++)
      for(int j=0;j<height;j++)
	for(int k=0;k<width;k++) {
	  
	  if(!thin_img[i][j][k]) continue;

	  int nsmaller = 0;
	  for(int z=i-1;z<=i+1;z++)
	    for(int y=j-1;y<=j+1;y++)
	      for(int x=k-1;x<=k+1;x++)
		if(data[z][y][x]<=thr) nsmaller++;
		//if(edt[z][y][x]<=0.5f) nsmaller++;

	  //border voxel
	  if(nsmaller>0 && isSimple(k, j, i, thin_img) ) {
	    //dens_prev[i][j][k] = 1.0f;
	    thin_img[i][j][k] = 254;
	    //ipoints.push_back(collapseSkel3d::coord3(k, j, i));	      
	    queue.push_back(collapseSkel3d::coord3(k, j, i));
	  }

	  //tdens += dens_prev[i][j][k];
	}
  }

  for(int i=0;i<np;i++) {
    int z = points[3*i+0];
    int y = points[3*i+1];
    int x = points[3*i+2];

    //if(!isSimple(x, y, z, thin_img)) continue;

    /*
      dens_prev[z][y][x] = 1.0f;
      thin_img[z][y][x] = 254;
      //ipoints.push_back(collapseSkel3d::coord3(k, j, i));	      
      queue.push_back(collapseSkel3d::coord3(x, y, z));
    */
    w[z][y][x] = -normals[3*i+0];
    v[z][y][x] = -normals[3*i+1];
    u[z][y][x] = -normals[3*i+2];
  }

  //printf("Total dens: %f\n", tdens);

  //compute ift
  ift.resize(width*height*depth);
  
  for(int i=0;i<depth;i++)
    for(int j=0;j<height;j++)
      for(int k=0;k<width;k++) {

	//we only care about internal points
	if(thin_img[i][j][k]==0) continue;
	
	int fp = ft[i][j][k];
	
	//decode fp to (x,y,z) 
	int z = fp % depth; 
	int x = fp / depth;
	int y = x % height;
	x = x / height;
		
	unsigned int idx = (x*height + y)*depth + z;

	//assert(!thin_img[z][y][x]);
	//assert(edt[z][y][x]<1e-7f);

	ift[idx].addSkelPoint(fp);
      }

}

void collapseSkel3d::fillGaps(float thr)
{
  const int width = dens.getWidth(), height = dens.getHeight(), depth = dens.getDepth();
  
  Volume<float> timp(width, height, depth);

  for(int i=0;i<depth;i++)
    for(int j=0;j<height;j++)
      for(int k=0;k<width;k++) {
  
	if(max_dens[i][j][k]<thr) continue;
	//if(max_dens[i][j][k]<1e-6f) continue;

	int fp = ft[i][j][k];
	
	//decode fp to (x,y,z) 
	int z = fp % depth; 
	int x = fp / depth;
	int y = x % height;
	x = x / height;
	
	std::vector<coord3> nskel;
	for(int xx=x-1;xx<=x+1;xx++)
	  for(int yy=y-1;yy<=y+1;yy++)
	    for(int zz=z-1;zz<=z+1;zz++) {
	      
	      unsigned int idx = (xx*height + yy)*depth + zz;
	      for(InverseFT::SkelSet::iterator it = ift[idx].skel_idxs.begin(); 
		  it!=ift[idx].skel_idxs.end(); it++)
		{
		  unsigned int idxs = *it;		  
		  int sz = idxs % depth; 
		  int sx = idxs / depth;
		  int sy = sx % height;
		  sx = sx / height;

		  //assert(sz < depth && sy < height && sx < width);

		  //if(max_dens[sz][sy][sx]<thr) continue;

		  nskel.push_back(coord3(sx, sy, sz));
		}
	      
	    }
	
	for(int m=0;m<nskel.size();m++) {
	  const coord3 &c = nskel[m];

	  vector<Volume<float>::POINT3> points;
	  timp.bresenham_linie_3D(k, j, i, c.x, c.y, c.z, points);
	  
	  if(points.size()<2) {
	    printf("bla\n");
	    continue;
	  }

	  float a = max_dens[i][j][k], b = max_dens[c.z][c.y][c.x];
	  float t = 1.0f, tincr = 1.0f/(points.size()-1);
	  for(int p=0;p<points.size();p++) {
	    float val = a*t + b*(1.0f-t);
	    //float val = std::max(a, b);
	    const Volume<float>::POINT3 &pp = points[p];
	    timp[(int)pp.coord[2]][(int)pp.coord[1]][(int)pp.coord[0]] = 
	      std::max(val, timp[(int)pp.coord[2]][(int)pp.coord[1]][(int)pp.coord[0]]);
	      //timp[(int)pp.coord[2]][(int)pp.coord[1]][(int)pp.coord[0]] + val;
	    t -= tincr;
	  }

	  
	}

      }

  const float w = 0.0f;

  for(int i=0;i<depth;i++)
    for(int j=0;j<height;j++)
      for(int k=0;k<width;k++) {
	max_dens[i][j][k] = w*max_dens[i][j][k] + (1.0f-w)*timp[i][j][k];
      }

}


void collapseSkel3d::assignImpLoops()
{
  const int ssize = thin_img.getWidth() * thin_img.getHeight(), 
    depth = thin_img.getDepth();

  for(int i=0;i<depth;i++) {
    float *max_dens_ptr = max_dens[i][0];
    byte *thin_img_ptr = thin_img[i][0];

    for(int j=0;j<ssize;j++)     
      if(thin_img_ptr[j] > 128)
	max_dens_ptr[j] = 1.0f;
  }
}

bool collapseSkel3d::isSimple(int x, int y, int z, Volume<byte> &thin_img)
{
  //const int width = thin_img.getWidth(), height = thin_img.getHeight(), depth = thin_img.getDepth();

  int C_asterix = 0, C_bar = 0, count = 0;

  int visit[3][3][3]; // Visitor table 
  memset(visit,0,27*sizeof(int));
  visit[1][1][1] = -1;

  // Compute \bar{C}
 
  // Seeking for a component

  // Look at X-axis
  for (int k = -1; k<=1; ++k) 
  {    
    if (thin_img(x+k,y,z)==0 && visit[1+k][1][1]==0) {
      ++C_bar;
      ++count;
      visit[1+k][1][1] = count;
      
      // Follow component
      for (int l = -1; l<=1; ++l) {
	if (thin_img(x+k,y+l,z)==0 && 
	    visit[1+k][1+l][1]==0)
	  visit[1+k][1+l][1] = count;
	if (thin_img(x+k,y,z+l)==0 && 
	    visit[1+k][1][1+l]==0)
	  visit[1+k][1][1+l] = count;
      }
    }
  }
  
  // Look at Y-axis
  for (int k = -1; k<=1; ++k) {
    if (thin_img(x,y+k,z)==0 && visit[1][1+k][1]==0) {
      int label = 0;
      ++C_bar;
      ++count;
      visit[1][1+k][1] = count;
      label = count;
      
      // Follow component
      for (int l = -1; l<=1; ++l) {
	if (l==0) continue;
	
	if (thin_img(x+l,y+k,z)==0) {
	  if (visit[1+l][1+k][1]!=0) {
	    if (label!=visit[1+l][1+k][1]) {
	      // Meld component
	      --C_bar;
	      
	      int C = visit[1+l][1+k][1];
	      for(int a=0;a<3;a++)
		for(int b=0;b<3;b++)
		  for(int c=0;c<3;c++)
		    if (visit[a][b][c]==C) visit[a][b][c] = label;
	    }
	  } else visit[1+l][1+k][1] = label;
	}
	
	if (thin_img(x,y+k,z+l)==0) {
	  if (visit[1][1+k][1+l]!=0) {
	    if (label!=visit[1][1+k][1+l]) {
	      // Meld component
	      --C_bar;
	      
	      int C = visit[1][1+k][1+l];
	      for(int a=0;a<3;a++)
		for(int b=0;b<3;b++)
		  for(int c=0;c<3;c++)
		    if (visit[a][b][c]==C) visit[a][b][c] = label;
	    }
	  } else visit[1][1+k][1+l] = label;
	}
      }
    }
  }

  // Look at Z-axis
  for (int k = -1; k<=1; ++k) {
    if (thin_img(x,y,z+k)==0 && visit[1][1][1+k]==0) {
      int label = 0;
      ++C_bar;
      ++count;
      visit[1][1][1+k] = count;
      label = count;
      
      // Follow component
      for (int l = -1; l<=1; ++l) {
	if (l==0) continue;
	
	if (thin_img(x+l,y,z+k)==0) {
	  if (visit[1+l][1][1+k]!=0) {
	    if (label!=visit[1+l][1][1+k]) {
	      // Meld component
	      --C_bar;
	      
	      int C = visit[1+l][1][1+k];
	      for(int a=0;a<3;a++)
		for(int b=0;b<3;b++)
		  for(int c=0;c<3;c++)
		    if (visit[a][b][c]==C) visit[a][b][c] = label;
	    }
	  } else visit[1+l][1][1+k] = label;
	}
	
	if (thin_img(x,y+l,z+k)==0) {
	  if (visit[1][1+l][1+k]!=0) {
	    if (label!=visit[1][1+l][1+k]) {
	      // Meld component
	      --C_bar;
	      
	      int C = visit[1][1+l][1+k];
	      for(int a=0;a<3;a++)
		for(int b=0;b<3;b++)
		  for(int c=0;c<3;c++)
		    if (visit[a][b][c]==C) visit[a][b][c] = label;
	    }
	  } else visit[1][1+l][1+k] = label;
	}
      }
    }
  }

  if (C_bar!=1) return false;


  // Reinit visit
  memset(visit,0,27*sizeof(int));
  visit[1][1][1] = -1;


 // Compute C^*

  // Seeking for a component
  for (int k = -1; k<=1; ++k)
    for (int l = -1; l<=1; ++l)
      for (int m = -1; m<=1; ++m) {
	int label = 0;

	// Protection
	if ((k==0 && l==0 && m==0)) continue;
	
	if (visit[1+k][1+l][1+m]==0 && thin_img(x+k,y+l,z+m)!=0) {
	  // Look after the neightbor
	  for (int k1 = -1; k1<=1; ++k1)
	    for (int l1 = -1; l1<=1; ++l1)
	      for (int m1 = -1; m1<=1; ++m1) {
		// Protection
		if (k+k1>1   || k+k1<-1 ||
		    l+l1>1   || l+l1<-1 ||
		    m+m1>1   || m+m1<-1 ) continue;
		
		// Search for a already knew component
		if (visit[1+k+k1][1+l+l1][1+m+m1]>0 && thin_img(x+k+k1,y+l+l1,z+m+m1)!=0) {
		  if (label==0) label = visit[1+k+k1][1+l+l1][1+m+m1];
		  else if (label!=visit[1+k+k1][1+l+l1][1+m+m1]) {
		    // Meld component
		    --C_asterix;
		    
		    int C = visit[1+k+k1][1+l+l1][1+m+m1];
		    for(int a=0;a<3;a++)
		      for(int b=0;b<3;b++)
			for(int c=0;c<3;c++)
			  if (visit[a][b][c]==C) visit[a][b][c] = label;
		  }
		}
	      }
	  
	  // Label the point
	  if (label==0) {
	    // Find a new component
	    ++C_asterix;
	    ++count;
	    visit[1+k][1+l][1+m] = count;
	  } else visit[1+k][1+l][1+m] = label;
	}
      }

  if (C_asterix==1) return true;
 
  return false;  
}

bool collapseSkel3d::isEndPoint(int x, int y, int z, bool curve, Volume<byte> &thin_img)
{
  //const int width = thin_img.getWidth(), height = thin_img.getHeight(), depth = thin_img.getDepth();

  if(curve) { //medial curve
    int isb = 0;

    for (int k = -1; k<=1; ++k)
      for (int l = -1; l<=1; ++l)
        for (int m = -1; m<=1; ++m) {
	  /*
          // Protection
          if (x+k<0 || x+k>=width || y+l<0 || y+l>=height || z+m<0 || z+m>=depth) continue;
	  */
	  if (thin_img[z+k][y+l][x+m]!=0) ++isb;
        }

    if (isb==2) return true; // The pixel with one neighbor
  }
  else {
    // Use Pudney specification with the 9 plans
    static const int plan9 [9][8][3] = { { {-1,0,-1}, {0,0,-1}, {1,0,-1}, {-1,0,0}, {1,0,0}, {-1,0,1}, {0,0,1}, {1,0,1} }, // Plan 1
                                  { {-1,1,0}, {0,1,0}, {1,1,0}, {-1,0,0}, {1,0,0}, {-1,-1,0}, {0,-1,0}, {1,-1,0} }, // Plan 2
                                  { {0,-1,-1}, {0,0,-1}, {0,1,-1}, {0,-1,0}, {0,1,0}, {0,-1,1}, {0,0,1}, {0,1,1} }, // Plan 3
                                  { {1,1,1}, {0,1,0}, {-1,1,-1}, {1,0,1}, {-1,0,-1}, {-1,-1,-1}, {0,-1,0}, {1,-1,1} }, // Plan 4
                                  { {-1,1,1}, {0,1,0}, {1,1,-1}, {-1,0,1}, {1,0,-1}, {-1,-1,1}, {0,-1,0}, {1,-1,-1} }, // Plan 5
                                  { {-1,1,1}, {0,1,1}, {1,1,1}, {-1,0,0}, {1,0,0}, {-1,-1,-1}, {0,-1,-1}, {1,-1,-1} }, // Plan 6
                                  { {-1,1,-1}, {0,1,-1}, {1,1,-1}, {-1,0,0}, {1,0,0}, {-1,-1,1}, {0,-1,1}, {1,-1,1} }, // Plan 7
                                  { {-1,1,-1}, {-1,1,0}, {-1,1,1}, {0,0,-1}, {0,0,1}, {1,-1,-1}, {1,-1,0}, {1,-1,1} }, // Plan 8
                                  { {1,1,-1}, {1,1,0}, {1,1,1}, {0,0,-1}, {0,0,1}, {-1,-1,-1}, {-1,-1,0}, {-1,-1,1} }  // Plan 9
    };
    
    // Count the number of neighbors on each plan
    for (int k = 0; k<9; ++k) {
      int count = 0;

      for (int l = 0; l<8; ++l) {
	/*
	// Protection
        if (x+plan9[k][l][0]<0 || x+plan9[k][l][0]>=width ||
            y+plan9[k][l][1]<0 || y+plan9[k][l][1]>=height ||
            z+plan9[k][l][2]<0 || z+plan9[k][l][2]>=depth) continue;
	*/
        if (thin_img[z+plan9[k][l][2]][y+plan9[k][l][1]][x+plan9[k][l][0]]!=0) ++count;
      }

      if (count<2) return true;
    }
  }

  // Else it's not...
  return false;
}

bool first=false;

void collapseSkel3d::simplify_skel(Volume<float> &lambda, float thr, float incr)
{
  const int width = lambda.getWidth(), height = lambda.getHeight(), depth = lambda.getDepth();

  std::vector<collapseSkel3d::coord3> indices;

  if(!first) {

  for(int z=1; z<depth-1; z++)
    for(int y=1; y<height-1; y++)
      for(int x=1; x<width-1; x++) {

	thin_img[z][y][x] = 0;

	//small importance
	if(lambda[z][y][x]<thr) {
	  continue;
	}

	//otherwise, consider for thinning
	indices.push_back(collapseSkel3d::coord3(x, y, z));
	thin_img[z][y][x] = 255;
      }
  first=true;
  }
  else {

  for(int z=1; z<depth-1; z++)
    for(int y=1; y<height-1; y++)
      for(int x=1; x<width-1; x++) {

	if(!thin_img[z][y][x])continue;

	int nb = 0;
	for(int zz=z-1; zz<=z+1; zz++)
	  for(int yy=y-1; yy<=y+1; yy++)
	    for(int xx=x-1; xx<=x+1; xx++) 
	      if(!thin_img[zz][yy][xx]) nb++;
	
	if(!nb) {
	  lambda[z][y][x] = std::max(lambda[z][y][x], thr + 1e-7f);	 
	  continue;
	}

	
	//otherwise, consider for thinning
	indices.push_back(collapseSkel3d::coord3(x, y, z));
      }


  }

  int interface_points = indices.size();

  std::stable_sort(indices.begin(), indices.begin()+interface_points, index_cmp_min(lambda));

  
  std::vector<coord3> thin_set;

  //for all points in importance order
  for(int i=0; i<interface_points; i++) {
    const coord3 &c = indices[i];    

    if(isSimple(c.x, c.y, c.z, thin_img) )
      thin_set.push_back(c);
  }
  
  interface_points = thin_set.size();
  
  for(int i=0; i<interface_points; i++) {

    const coord3 &c = thin_set[i];    

    if(isSimple(c.x, c.y, c.z, thin_img) ) {
	thin_img[c.z][c.y][c.x] = 0;
	lambda[c.z][c.y][c.x] = thr - 1e-7f;
    }
    else 
      lambda[c.z][c.y][c.x] = std::max(lambda[c.z][c.y][c.x], thr + 1e-7f);
  }
  
}

void collapseSkel3d::simplify_skel_filter(Volume<float> &lambda, float thr, float incr)
{
  const int width = lambda.getWidth(), height = lambda.getHeight(), depth = lambda.getDepth();

  Volume<float> tmp(lambda);

  for(int z=1; z<depth-1; z++)
    for(int y=1; y<height-1; y++)
      for(int x=1; x<width-1; x++) {

	thin_img[z][y][x] = 0;

	//small importance
	if(lambda[z][y][x]<thr) {
	  continue;
	}

	thin_img[z][y][x] = 255;
      }
  
  for(int z=1; z<depth-1; z++)
    for(int y=1; y<height-1; y++)
      for(int x=1; x<width-1; x++) {

	if(thin_img[z][y][x]) continue;

	float avg_val = 0.0f;
	int nb = 0;
	for(int zz=z-1; zz<=z+1; zz++)
	  for(int yy=y-1; yy<=y+1; yy++)
	    for(int xx=x-1; xx<=x+1; xx++) 
	      if(thin_img[zz][yy][xx]) {
		avg_val += lambda[zz][yy][xx];
		nb++;
	      }

	if(nb<9) continue;
		
	const float w = 0.1f;
	tmp[z][y][x] += w*(avg_val/nb);// + (1.0f-w)*lambda[z][y][x];
      }

  lambda = tmp;

}

//reaction part (as in reaction-xxx PDE)
void collapseSkel3d::reaction(std::vector<coord3> &ipoints, Volume<float> &dens)
{
  //float tgt_dens = 1.0f;
  float tgt_dens = 0.2f*sim_iter; //0.2f, 1.0f, 2.0f
  float stiff = 0.05f; //0.05f
  
  for(int i=0, size=ipoints.size(); i<size; i++) {
    const coord3 &c = ipoints[i];

    assert(!thin_img[c.z][c.y][c.x]);

    //if(dens[c.z][c.y][c.x]<2.0f) continue;

    /*
    float sum_w = 1.0f, avg = dens[c.z][c.y][c.x]; int idx = 0, ninside = 0;
    for(int zz=c.z-r;zz<=c.z+r;zz++)
      for(int yy=c.y-r;yy<=c.y+r;yy++)
	for(int xx=c.x-r;xx<=c.x+r;xx++) {
	  if(thin_img[zz][yy][xx]) {
	    sum_w += diff_weights[idx];	   	    
	    avg +=  diff_weights[idx]*dens[zz][yy][xx];
	    ninside++;
	  }
	  idx++;
	}
    
    tgt_dens = avg/sum_w;
    */

    //dens[c.z][c.y][c.x] += stiff*(tgt_dens - dens[c.z][c.y][c.x]);
    if(dens[c.z][c.y][c.x]<1.0f)
      dens[c.z][c.y][c.x]=1.0f;
  }
}

// diffusion-like transport of density
float collapseSkel3d::transport_dens_diffusion(float weight, Volume<float> &dens, 
					       Volume<float> &dens_prev,  
					       std::vector<coord3> &ipoints)
{
  float tot_dens = 0.0f;

  //spline kernel
  static const float kernel[10] = {1.0, 4.0, 1.0, 4.0, 16.0, 4.0, 1.0, 4.0, 1.0, 72.0};

  //thinned points push their density forward 
#pragma omp parallel for reduction(+: tot_dens)
  for(int i=0;i<ipoints.size();i++) {
    const coord3 &c = ipoints[i];

    assert(!thin_img[c.z][c.y][c.x]);

#if 0
    Volume<byte> &gimg = this->thin_img;
    //Volume<float>  &gimg = this->edt;
    

    int idx1 = 0; float gx = 0.0f, gy = 0.0f, gz = 0.0f, kval; 
    for(int k=-1;k<=1;k++)
      for(int m=-1;m<=1;m++) {	
	kval = kernel[idx1++];
	gx += kval*(gimg[c.z+k][c.y+m][c.x+1]-gimg[c.z+k][c.y+m][c.x-1]);
	gy += kval*(gimg[c.z+k][c.y+1][c.x+m]-gimg[c.z+k][c.y-1][c.x+m]);
	gz += kval*(gimg[c.z+1][c.y+k][c.x+m]-gimg[c.z-1][c.y+k][c.x+m]);	
      }    
    //kval = kernel[9];
    //gx/=kval; gy/=kval; gz/=kval;

    //kval = std::max(fabsf(gx), std::max(fabsf(gy), fabsf(gz))) + 1e-7f;
    kval = sqrtf(gx*gx + gy*gy + gz*gz) + 1e-7f;
#else
    float gx = u[c.z][c.y][c.x];
    float gy = v[c.z][c.y][c.x];
    float gz = w[c.z][c.y][c.x];
    float kval = sqrtf(gx*gx + gy*gy + gz*gz) + 1e-7f;
#endif

    gx/=kval; gy/=kval; gz/=kval;
    
    const float sigma2 = 0.05f; //0.05f

    float sum_w = 0.0f; int idx = 0, ninside = 0; float *diff_vec;
    for(int zz=c.z-r;zz<=c.z+r;zz++)
      for(int yy=c.y-r;yy<=c.y+r;yy++)
	for(int xx=c.x-r;xx<=c.x+r;xx++) {
	  if(thin_img[zz][yy][xx]) {

	    diff_vec = diff_vecs[idx];
	    	   
	    float dx=diff_vec[0]-gx, dy=diff_vec[1]-gy, dz=diff_vec[2]-gz;
	    float dlen2 = dz*dz + dy*dy + dx*dx;

	    //float w_grad = exp(-dlen2/(2.0f*sigma2));	   	    
	    float w_grad = 1.0f/(1.0f + dlen2/sigma2);

	    sum_w += diff_weights[idx]*w_grad;
	    ninside++;
	  }
	  idx++;
	}
    /*
      when thinning removes locally more than 
       one voxel layer, this still happens
    */
    //assert(sum_w>0.0f);

    float dprev = (sum_w>0.0f) ? (weight*dens_prev[c.z][c.y][c.x])/sum_w : 0.0f;

    idx = 0;
    for(int zz=c.z-r;zz<=c.z+r;zz++)
      for(int yy=c.y-r;yy<=c.y+r;yy++)
	for(int xx=c.x-r;xx<=c.x+r;xx++) {
	  if(thin_img[zz][yy][xx]) { 

	    diff_vec = diff_vecs[idx];

	    float dx=diff_vec[0]-gx, dy=diff_vec[1]-gy, dz=diff_vec[2]-gz;
	    float dlen2 = dz*dz + dy*dy + dx*dx;

	    //float w_grad = exp(-dlen2/(2.0f*sigma2));
	    float w_grad = 1.0f/(1.0f + dlen2/sigma2);

	    float w = diff_weights[idx]*w_grad;
	    dens[zz][yy][xx] += w*dprev;
	  }
	  idx++;
	}

    tot_dens += sum_w*dprev;

    //if(sum_w>0.0f) dens_prev[c.z][c.y][c.x] = 0.0f;
  }

  return tot_dens;
}

// advection transport of density
float collapseSkel3d::transport_dens_advection(float weight,Volume<float> &dens, 
					       Volume<float> &dens_prev,
					       std::vector<coord3> &ipoints)
{
   float tot_dens = 0.0f;

   //spline kernel
   static const float kernel[10] = {1.0f, 4.0f, 1.0f, 4.0f, 16.0f, 4.0f, 1.0f, 4.0f, 1.0f, 72.0f};

   for(int i=0;i<ipoints.size();i++) {
    const coord3 &c = ipoints[i];
    
    assert(!thin_img[c.z][c.y][c.x]);

    float gx = 0.0f, gy = 0.0f, gz = 0.0f, kval; 

    //const bool flag = isRemovable(c.x, c.y, c.z, dens_prev);
    //const bool flag = false;
    const bool flag = true;

    if(flag) {
      gx = u[c.z][c.y][c.x];
      gy = v[c.z][c.y][c.x];
      gz = w[c.z][c.y][c.x];
      kval = sqrtf(gx*gx + gy*gy + gz*gz) + 1e-6f;
    }
    else {

      Volume<byte> &gimg = this->thin_img;
      //Volume<float>  &gimg = this->edt;

      int idx = 0; 
      for(int k=-1;k<=1;k++)
	for(int m=-1;m<=1;m++) {	
	  kval = kernel[idx++];
	  gx += kval*(gimg[c.z+k][c.y+m][c.x+1]-gimg[c.z+k][c.y+m][c.x-1]);
	  gy += kval*(gimg[c.z+k][c.y+1][c.x+m]-gimg[c.z+k][c.y-1][c.x+m]);
	  gz += kval*(gimg[c.z+1][c.y+k][c.x+m]-gimg[c.z-1][c.y+k][c.x+m]);	
	}    
      kval = kernel[9];
      gx/=kval; gy/=kval; gz/=kval;      
      kval = sqrtf(gx*gx + gy*gy + gz*gz) + 1e-6f;      
    }

    if(kval > 1e-7f) {
      gx/=kval; gy/=kval; gz/=kval;      

      gx+=c.x; gy+=c.y; gz+=c.z;

      int i0=(int)gx, i1=i0+1, j0=(int)gy, j1=j0+1, k0=(int)gz, k1=k0+1;
      float s1 = gx-i0, s0 = 1-s1, t1 = gy-j0, t0 = 1-t1, m1 = gz-k0, m0 = 1-m1;
      
      float sum_w = 0.0f;
      if(thin_img[k0][j0][i0]) 
	sum_w += s0*t0*m0;
      if(thin_img[k0][j0][i1]) 
	sum_w += s1*t0*m0;
      if(thin_img[k0][j1][i0]) 
	sum_w += s0*t1*m0;
      if(thin_img[k0][j1][i1]) 
	sum_w += s1*t1*m0;
      if(thin_img[k1][j0][i0]) 
	sum_w += s0*t0*m1;
      if(thin_img[k1][j0][i1]) 
	sum_w += s1*t0*m1;
      if(thin_img[k1][j1][i0]) 
	sum_w += s0*t1*m1;
      if(thin_img[k1][j1][i1]) 
	sum_w += s1*t1*m1;

      if(sum_w > 1e-7f) {
	float rho_remaining = (weight*dens_prev[c.z][c.y][c.x])/sum_w;  
	/*
	float diff = weight*std::max(1.0f-dens_prev[c.z][c.y][c.x], 0.0f);
	rho_remaining += (sum_w>0.0f) ? 1.0f*diff/sum_w : 0.0f;
	*/
	
	if(thin_img[k0][j0][i0]) 
	  dens[k0][j0][i0] += s0*t0*m0*rho_remaining;
	if(thin_img[k0][j0][i1]) 
	  dens[k0][j0][i1] += s1*t0*m0*rho_remaining;
	if(thin_img[k0][j1][i0]) 
	  dens[k0][j1][i0] += s0*t1*m0*rho_remaining;
	if(thin_img[k0][j1][i1]) 
	  dens[k0][j1][i1] += s1*t1*m0*rho_remaining;
	if(thin_img[k1][j0][i0]) 
	  dens[k1][j0][i0] += s0*t0*m1*rho_remaining;
	if(thin_img[k1][j0][i1]) 
	  dens[k1][j0][i1] += s1*t0*m1*rho_remaining;
	if(thin_img[k1][j1][i0]) 
	  dens[k1][j1][i0] += s0*t1*m1*rho_remaining;
	if(thin_img[k1][j1][i1]) 
	  dens[k1][j1][i1] += s1*t1*m1*rho_remaining;      
      	
	tot_dens += sum_w*rho_remaining;
	//dens_prev[c.z][c.y][c.x] = 0.0f;
      }
      else {
	printf("sum_w\n");
	dens[c.z][c.y][c.x] = dens_prev[c.z][c.y][c.x];
      }
    }
    else {
      printf("kval\n");
      dens[c.z][c.y][c.x] = dens_prev[c.z][c.y][c.x];
    }
  }

  return tot_dens;
}

float collapseSkel3d::transport_dens_advection_project(float weight,std::vector<coord3> &out_coords, Volume<float> &tmp_dens, Volume<float> &dens, 
							     Volume<float> &dens_prev,
							     std::vector<coord3> &ipoints)
{
   float tot_dens = 0.0f;

   //spline kernel
   static const float kernel[10] = {1.0f, 4.0f, 1.0f, 4.0f, 16.0f, 4.0f, 1.0f, 4.0f, 1.0f, 72.0f};

   /*   
   for(int i=0;i<ipoints.size();i++) {
    const coord3 &c = ipoints[i];
    
    assert(!thin_img[c.z][c.y][c.x]);

    thin_img[c.z][c.y][c.x] = 255;
   }
   */

   for(int i=0;i<out_coords.size();i++) {
    const coord3 &c = out_coords[i];

    assert(!thin_img[c.z][c.y][c.x]);

    const int x = c.x;
    const int y = c.y;
    const int z = c.z;

	  if(tmp_dens[z][y][x]<1e-7f) continue;

    
	  float gx = 0.0f, gy = 0.0f, gz = 0.0f, kval; 

	  
	  /*	  	  	  
	  Volume<byte> &gimg = this->thin_img;
	  //Volume<float> &gimg = this->edt;
	  
	  int idx = 0; 
	  for(int k=-1;k<=1;k++)
	    for(int m=-1;m<=1;m++) {	
	      kval = kernel[idx++];
	      gx += kval*(gimg[z+k][y+m][x+1]-gimg[z+k][y+m][x-1]);
	      gy += kval*(gimg[z+k][y+1][x+m]-gimg[z+k][y-1][x+m]);
	      gz += kval*(gimg[z+1][y+k][x+m]-gimg[z-1][y+k][x+m]);	
	    }    
	  kval = sqrtf(gx*gx + gy*gy + gz*gz);      
	  */

	  	  	  	  
	  gx=u[z][y][x];
	  gy=v[z][y][x];
	  gz=w[z][y][x];

	  kval = sqrtf(gx*gx + gy*gy + gz*gz);      	  
	  //kval = std::max(fabsf(gx), std::max(fabsf(gy), fabsf(gz)));	  
	  
	  /*	  	  	  
	  //4D regression gradient
	  const int rr=2;

	  for(int ii=-rr;ii<=rr;ii++)
	    for(int jj=-rr;jj<=rr;jj++) 	
	      for(int kk=-rr;kk<=rr;kk++) {	
		
		//if(z+ii<0 || z+ii>=depth || y+jj<0 || y+jj>=height || x<0 || x>=width) continue;

		if(!thin_img[z+ii][y+jj][x+kk]) continue;
		  
		if(ii==0 && jj==0 && kk==0) continue;
		  
		float dst= sqrtf( ii*ii + jj*jj + kk*kk);		  

		float fk = 1;//edt[z+ii][y+jj][x+kk];
		
		gz += ii*fk/dst;
		gy += jj*fk/dst;
		gx += kk*fk/dst;
	      }

	  kval = sqrtf(gx*gx + gy*gy + gz*gz);
	  */	  

	  /*
	  int mx, my, mz;
	  float mind = 1e+7f;

	  const int rad = 2;

	  for(int zz=z-rad;zz<=z+rad;zz++)
	    for(int yy=y-rad;yy<=y+rad;yy++)
	      for(int xx=x-rad;xx<=x+rad;xx++) {

		if(!thin_img[zz][yy][xx]) continue;

		//if((dens[zz][yy][xx]-dens_prev[zz][yy][xx])<2.01f) continue;

		if(dens[zz][yy][xx]<mind && isSimple(xx, yy, zz, thin_img)) {
		  mind=dens[zz][yy][xx];
		  
		  mx=xx; my=yy; mz=zz;
		}
	      }
	  
	  if(mind<1e+5f) {
	    dens[mz][my][mx]+=tmp_dens[z][y][x];
	    tot_dens+=tmp_dens[z][y][x];
	    tmp_dens[z][y][x] = 0.0f;
	  }
	  else {
	    printf("not found\n");
	    tmp_dens[z][y][x] = 0.0f;
	  }
	  */

#if 1
    if(kval > 1e-7f) {
      gx/=kval; gy/=kval; gz/=kval;      

      for(float ll=-3.5f; ll<3.6f; ll+=0.01f) {

	//gx+=x; gy+=y; gz+=z;
	float gx1 = x + ll*gx;
	float gy1 = y + ll*gy;
	float gz1 = z + ll*gz;

	/*			
      int i0=(int)gx1, i1=i0+1, j0=(int)gy1, j1=j0+1, k0=(int)gz1, k1=k0+1;
      float s1 = gx1-i0, s0 = 1-s1, t1 = gy1-j0, t0 = 1-t1, m1 = gz1-k0, m0 = 1-m1;
      
      float sum_w = 0.0f;
      if(thin_img[k0][j0][i0]) 
	sum_w += s0*t0*m0;
      if(thin_img[k0][j0][i1]) 
	sum_w += s1*t0*m0;
      if(thin_img[k0][j1][i0]) 
	sum_w += s0*t1*m0;
      if(thin_img[k0][j1][i1]) 
	sum_w += s1*t1*m0;
      if(thin_img[k1][j0][i0]) 
	sum_w += s0*t0*m1;
      if(thin_img[k1][j0][i1]) 
	sum_w += s1*t0*m1;
      if(thin_img[k1][j1][i0]) 
	sum_w += s0*t1*m1;
      if(thin_img[k1][j1][i1]) 
	sum_w += s1*t1*m1;
	*/		
      
      int i0=(int)(gx1+0.5f), j0=(int)(gy1+0.5f), k0=(int)(gz1+0.5f);
      float s0=1, t0=1, m0=1;

      float sum_w = 0.0f;
      if(thin_img[k0][j0][i0]) 
	sum_w += s0*t0*m0;
      

      if(sum_w > 1e-7f) {
	float rho_remaining = (weight*tmp_dens[z][y][x])/sum_w;  
	/*
	float diff = weight*std::max(1.0f-dens_prev[c.z][c.y][c.x], 0.0f);
	rho_remaining += (sum_w>0.0f) ? 1.0f*diff/sum_w : 0.0f;
	*/

	if(thin_img[k0][j0][i0]) {
	  dens[k0][j0][i0] += s0*t0*m0*rho_remaining;

	  tot_dens += sum_w*rho_remaining;

	  tmp_dens[z][y][x]=0.0f;
	}

	/*			
	if(thin_img[k0][j0][i0]) 
	  dens[k0][j0][i0] += s0*t0*m0*rho_remaining;
	if(thin_img[k0][j0][i1]) 
	  dens[k0][j0][i1] += s1*t0*m0*rho_remaining;
	if(thin_img[k0][j1][i0]) 
	  dens[k0][j1][i0] += s0*t1*m0*rho_remaining;
	if(thin_img[k0][j1][i1]) 
	  dens[k0][j1][i1] += s1*t1*m0*rho_remaining;
	if(thin_img[k1][j0][i0]) 
	  dens[k1][j0][i0] += s0*t0*m1*rho_remaining;
	if(thin_img[k1][j0][i1]) 
	  dens[k1][j0][i1] += s1*t0*m1*rho_remaining;
	if(thin_img[k1][j1][i0]) 
	  dens[k1][j1][i0] += s0*t1*m1*rho_remaining;
	if(thin_img[k1][j1][i1]) 
	  dens[k1][j1][i1] += s1*t1*m1*rho_remaining;      
	
	tot_dens += sum_w*rho_remaining;

	tmp_dens[z][y][x]=0.0f;
	*/

	break;

      }
      else {
	//printf("prj sum_w %f\n", ll);
	//dens[z][y][x] = dens_prev[z][y][x];

	//printf("%f %f %f\n", gx-x, gy-y, gz-z);
      }
      }//end ll
      
      if(tmp_dens[z][y][x]>1e-7f) {
	printf("prj sum_w\n");
	//assert(edt[z][y][x]<3.1f);
	//tot_dens += tmp_dens[z][y][x];
	dens[z][y][x] += tmp_dens[z][y][x];
      }

    }
    else {
      printf("prj kval\n");
      //dens[z][y][x] = dens_prev[z][y][x];
    }
#endif

   }

   /*   
   for(int i=0;i<ipoints.size();i++) {
    const coord3 &c = ipoints[i];
    
    thin_img[c.z][c.y][c.x] = 0;
   }
   */
  return tot_dens;
}

float collapseSkel3d::transport_dens_advection_unconstrained(float weight,std::vector<coord3> &out_coords, Volume<float> &tmp_dens, Volume<float> &dens, 
						       Volume<float> &dens_prev,
						       std::vector<coord3> &ipoints)
{
   float tot_dens = 0.0f;

   float lost_dens = 0.0f;

   //spline kernel
   static const float kernel[10] = {1.0f, 4.0f, 1.0f, 4.0f, 16.0f, 4.0f, 1.0f, 4.0f, 1.0f, 72.0f};

   for(int i=0;i<ipoints.size();i++) {
    const coord3 &c = ipoints[i];
    
    assert(!thin_img[c.z][c.y][c.x]);

    float gx = 0.0f, gy = 0.0f, gz = 0.0f, kval; 

    //const bool flag = isRemovable(c.x, c.y, c.z, dens_prev);
    //const bool flag = false;
    const bool flag = true;

    if(flag) {
      gx = u[c.z][c.y][c.x];
      gy = v[c.z][c.y][c.x];
      gz = w[c.z][c.y][c.x];
      kval = sqrtf(gx*gx + gy*gy + gz*gz);
      //kval = std::max(fabsf(gx), std::max(fabsf(gy), fabsf(gz)));
    }
    else {

      Volume<byte> &gimg = this->thin_img;
      //Volume<float>  &gimg = this->edt;

      int idx = 0; 
      for(int k=-1;k<=1;k++)
	for(int m=-1;m<=1;m++) {	
	  kval = kernel[idx++];
	  gx += kval*(gimg[c.z+k][c.y+m][c.x+1]-gimg[c.z+k][c.y+m][c.x-1]);
	  gy += kval*(gimg[c.z+k][c.y+1][c.x+m]-gimg[c.z+k][c.y-1][c.x+m]);
	  gz += kval*(gimg[c.z+1][c.y+k][c.x+m]-gimg[c.z-1][c.y+k][c.x+m]);	
	}    
      kval = kernel[9];
      gx/=kval; gy/=kval; gz/=kval;      
      kval = sqrtf(gx*gx + gy*gy + gz*gz) + 1e-6f;      
    }

    if(kval > 1e-7f) {
      gx/=kval; gy/=kval; gz/=kval;      

      gx+=c.x; gy+=c.y; gz+=c.z;
      
      /*      
      int i0=(int)gx, i1=i0+1, j0=(int)gy, j1=j0+1, k0=(int)gz, k1=k0+1;
      float s1 = gx-i0, s0 = 1-s1, t1 = gy-j0, t0 = 1-t1, m1 = gz-k0, m0 = 1-m1;
      
      float sum_w = 0.0f;
      if(thin_img[k0][j0][i0]) 
	sum_w += s0*t0*m0;
      if(thin_img[k0][j0][i1]) 
	sum_w += s1*t0*m0;
      if(thin_img[k0][j1][i0]) 
	sum_w += s0*t1*m0;
      if(thin_img[k0][j1][i1]) 
	sum_w += s1*t1*m0;
      if(thin_img[k1][j0][i0]) 
	sum_w += s0*t0*m1;
      if(thin_img[k1][j0][i1]) 
	sum_w += s1*t0*m1;
      if(thin_img[k1][j1][i0]) 
	sum_w += s0*t1*m1;
      if(thin_img[k1][j1][i1]) 
	sum_w += s1*t1*m1;

      //LUK:
      sum_w = 1.0f;
      */      
		
      int i0=(int)(gx+0.5f), j0=(int)(gy+0.5f), k0=(int)(gz+0.5f);
      float s0=1, t0=1, m0=1;

      float sum_w = 0.0f;
      sum_w += s0*t0*m0;
	
 
      if(sum_w > 1e-7f) {
	float rho_remaining = (weight*dens_prev[c.z][c.y][c.x])/sum_w;  
	/*	
	float diff = weight*std::max(1.0f-dens_prev[c.z][c.y][c.x], 0.0f);
	rho_remaining += diff/sum_w;
	*/
	
	if(thin_img[k0][j0][i0]) {
	  dens[k0][j0][i0] += s0*t0*m0*rho_remaining;
	  tot_dens += sum_w*rho_remaining;
	}
	else {
	  out_coords.push_back(coord3(i0, j0, k0));
	  tmp_dens[k0][j0][i0] += s0*t0*m0*rho_remaining;

	  lost_dens += sum_w*rho_remaining;
	}
	
	/*
	  if(thin_img[k0][j0][i0])
	    dens[k0][j0][i0] += s0*t0*m0*rho_remaining;
	  else {
	    out_coords.push_back(coord3(i0, j0, k0));
	    tmp_dens[k0][j0][i0] += s0*t0*m0*rho_remaining;
	  }	    
	  if(thin_img[k0][j0][i1]) 
	    dens[k0][j0][i1] += s1*t0*m0*rho_remaining;
	  else {
	    out_coords.push_back(coord3(i1, j0, k0));
	    tmp_dens[k0][j0][i1] += s1*t0*m0*rho_remaining;
	  }
	  if(thin_img[k0][j1][i0]) 
	    dens[k0][j1][i0] += s0*t1*m0*rho_remaining;
	  else {
	    out_coords.push_back(coord3(i0, j1, k0));
	    tmp_dens[k0][j1][i0] += s0*t1*m0*rho_remaining;
	  }
	  if(thin_img[k0][j1][i1]) 
	    dens[k0][j1][i1] += s1*t1*m0*rho_remaining;
	  else {
	    out_coords.push_back(coord3(i1, j1, k0));
	    tmp_dens[k0][j1][i1] += s1*t1*m0*rho_remaining;
	  }
	  if(thin_img[k1][j0][i0]) 
	    dens[k1][j0][i0] += s0*t0*m1*rho_remaining;
	  else {
	    out_coords.push_back(coord3(i0, j0, k1));
	    tmp_dens[k1][j0][i0] += s0*t0*m1*rho_remaining;
	  }
	  if(thin_img[k1][j0][i1]) 
	    dens[k1][j0][i1] += s1*t0*m1*rho_remaining;
	  else {
	    out_coords.push_back(coord3(i1, j0, k1));
	    tmp_dens[k1][j0][i1] += s1*t0*m1*rho_remaining;
	  }
	  if(thin_img[k1][j1][i0]) 
	    dens[k1][j1][i0] += s0*t1*m1*rho_remaining;
	  else {
	    out_coords.push_back(coord3(i0, j1, k1));
	    tmp_dens[k1][j1][i0] += s0*t1*m1*rho_remaining;
	  }
	  if(thin_img[k1][j1][i1]) 
	    dens[k1][j1][i1] += s1*t1*m1*rho_remaining;      
	  else {
	    out_coords.push_back(coord3(i1, j1, k1));
	    tmp_dens[k1][j1][i1] += s1*t1*m1*rho_remaining;
	  }
	*/
	  
	  //tot_dens += sum_w*rho_remaining;
      }
      else {
	printf("unc sum_w\n");
	dens[c.z][c.y][c.x] = dens_prev[c.z][c.y][c.x];
      }
    }
    else {
      printf("unc kval\n");
      dens[c.z][c.y][c.x] = dens_prev[c.z][c.y][c.x];
    }
  }

   if(lost_dens>1e-7f)
     printf("lost: %f\n", lost_dens);



  return tot_dens;
}

// advection transport of velocity/momentum
void collapseSkel3d::transport_vel_advection(float weight,Volume<float> &dens, 
					     Volume<float> &u,Volume<float> &v,Volume<float> &w, 
					     std::vector<coord3> &ipoints)
{
  for(int i=0;i<ipoints.size();i++) {
    const coord3 &c = ipoints[i];

    assert(!thin_img[c.z][c.y][c.x]);

    float gx = u[c.z][c.y][c.x];
    float gy = v[c.z][c.y][c.x];
    float gz = w[c.z][c.y][c.x];
    float kval = sqrtf(gx*gx + gy*gy + gz*gz);
    //float kval = dens[c.z][c.y][c.x];

    if(kval > 1e-7f) {
      //float gx1=gx, gy1=gy, gz1=gz;

      gx/=kval; gy/=kval; gz/=kval;      

      float gx1=gx, gy1=gy, gz1=gz;

      gx+=c.x; gy+=c.y; gz+=c.z;

      int i0=(int)gx, i1=i0+1, j0=(int)gy, j1=j0+1, k0=(int)gz, k1=k0+1;
      float s1 = gx-i0, s0 = 1-s1, t1 = gy-j0, t0 = 1-t1, m1 = gz-k0, m0 = 1-m1;

      float sum_w = 0.0f;
      if(thin_img[k0][j0][i0]) 
	sum_w += s0*t0*m0;
      if(thin_img[k0][j0][i1]) 
	sum_w += s1*t0*m0;
      if(thin_img[k0][j1][i0]) 
	sum_w += s0*t1*m0;
      if(thin_img[k0][j1][i1]) 
	sum_w += s1*t1*m0;
      if(thin_img[k1][j0][i0]) 
	sum_w += s0*t0*m1;
      if(thin_img[k1][j0][i1]) 
	sum_w += s1*t0*m1;
      if(thin_img[k1][j1][i0]) 
	sum_w += s0*t1*m1;
      if(thin_img[k1][j1][i1]) 
	sum_w += s1*t1*m1;

      if(sum_w > 1e-7f) {

	//advect momentum field
	float rho_remaining = (weight*dens[c.z][c.y][c.x])/sum_w;  

	//advect velocity field
	//float rho_remaining = weight/sum_w;  


	if(thin_img[k0][j0][i0]) {
	  u[k0][j0][i0] += s0*t0*m0*rho_remaining*gx1;
	  v[k0][j0][i0] += s0*t0*m0*rho_remaining*gy1;
	  w[k0][j0][i0] += s0*t0*m0*rho_remaining*gz1;
	}
	if(thin_img[k0][j0][i1]) {
	  u[k0][j0][i1] += s1*t0*m0*rho_remaining*gx1;
	  v[k0][j0][i1] += s1*t0*m0*rho_remaining*gy1;
	  w[k0][j0][i1] += s1*t0*m0*rho_remaining*gz1;
	}
	if(thin_img[k0][j1][i0]) {
	  u[k0][j1][i0] += s0*t1*m0*rho_remaining*gx1;
	  v[k0][j1][i0] += s0*t1*m0*rho_remaining*gy1;
	  w[k0][j1][i0] += s0*t1*m0*rho_remaining*gz1;
	}
	if(thin_img[k0][j1][i1]) {
	  u[k0][j1][i1] += s1*t1*m0*rho_remaining*gx1;
	  v[k0][j1][i1] += s1*t1*m0*rho_remaining*gy1;
	  w[k0][j1][i1] += s1*t1*m0*rho_remaining*gz1;
	}
	if(thin_img[k1][j0][i0]) {
	  u[k1][j0][i0] += s0*t0*m1*rho_remaining*gx1;
	  v[k1][j0][i0] += s0*t0*m1*rho_remaining*gy1;
	  w[k1][j0][i0] += s0*t0*m1*rho_remaining*gz1;
	}
	if(thin_img[k1][j0][i1]) {
	  u[k1][j0][i1] += s1*t0*m1*rho_remaining*gx1;
	  v[k1][j0][i1] += s1*t0*m1*rho_remaining*gy1;
	  w[k1][j0][i1] += s1*t0*m1*rho_remaining*gz1;
	}
	if(thin_img[k1][j1][i0]) {
	  u[k1][j1][i0] += s0*t1*m1*rho_remaining*gx1;
	  v[k1][j1][i0] += s0*t1*m1*rho_remaining*gy1;
	  w[k1][j1][i0] += s0*t1*m1*rho_remaining*gz1;
	}
	if(thin_img[k1][j1][i1]) {
	  u[k1][j1][i1] += s1*t1*m1*rho_remaining*gx1;
	  v[k1][j1][i1] += s1*t1*m1*rho_remaining*gy1;
	  w[k1][j1][i1] += s1*t1*m1*rho_remaining*gz1;
	}
      }
    }
  }
}

// advection transport of velocity/momentum
void collapseSkel3d::transport_vel_diffusion(float weight,Volume<float> &dens, 
					     Volume<float> &u,Volume<float> &v,Volume<float> &w, 
					     std::vector<coord3> &ipoints)
{
  for(int i=0;i<ipoints.size();i++) {
    const coord3 &c = ipoints[i];

    assert(!thin_img[c.z][c.y][c.x]);

    float gx = u[c.z][c.y][c.x];
    float gy = v[c.z][c.y][c.x];
    float gz = w[c.z][c.y][c.x];
    float kval = sqrtf(gx*gx + gy*gy + gz*gz);
    //float kval = std::max(fabsf(gx), std::max(fabsf(gy), fabsf(gz)));
    //float kval = dens[c.z][c.y][c.x];
    //float kval = std::max(fabsf(gx), std::max(fabsf(gy), fabsf(gz)));


    if(kval < 1e-7f) continue;

    gx/=kval; gy/=kval; gz/=kval;      

    /*    
    float *diff_vec;
    const float sigma2 = 0.05f;
    */

    float sum_w = 0.0f; int idx = 0, ninside = 0;
    for(int zz=c.z-r;zz<=c.z+r;zz++)
      for(int yy=c.y-r;yy<=c.y+r;yy++)
	for(int xx=c.x-r;xx<=c.x+r;xx++) {
	  if(thin_img[zz][yy][xx]) {
	    /*    
	    diff_vec = diff_vecs[idx];

	    float dx=diff_vec[0]-gx, dy=diff_vec[1]-gy, dz=diff_vec[2]-gz;
	    float dlen2 = dz*dz + dy*dy + dx*dx;

	    //float w_grad = exp(-dlen2/(2.0f*sigma2));	   	    
	    float w_grad = 1.0f/(1.0f + dlen2/sigma2);

	    sum_w += diff_weights[idx]*w_grad;
	    */
	    sum_w += diff_weights[idx];

	    ninside++;
	  }
	  idx++;
	}

    //assert(ninside);

    if(!ninside) continue;

    /*
      when thinning removes locally more than 
       one voxel layer, this still happens
    */
    //assert(sum_w>0.0f);

    float dprev = dens[c.z][c.y][c.x]/sum_w;

    idx = 0;
    for(int zz=c.z-r;zz<=c.z+r;zz++)
      for(int yy=c.y-r;yy<=c.y+r;yy++)
	for(int xx=c.x-r;xx<=c.x+r;xx++) {
	  if(thin_img[zz][yy][xx]) { 
	    /*
	    diff_vec = diff_vecs[idx];

	    float dx=diff_vec[0]-gx, dy=diff_vec[1]-gy, dz=diff_vec[2]-gz;
	    float dlen2 = dz*dz + dy*dy + dx*dx;

	    //float w_grad = exp(-dlen2/(2.0f*sigma2));	   	    
	    float w_grad = 1.0f/(1.0f + dlen2/sigma2);

	    float weight = diff_weights[idx]*w_grad;
	    */
	    float weight = diff_weights[idx];

	    u[zz][yy][xx] += weight*dprev*gx;
	    v[zz][yy][xx] += weight*dprev*gy;
	    w[zz][yy][xx] += weight*dprev*gz;

	  }
	  idx++;
	}

  }

}

bool collapseSkel3d::isRemovable(int x, int y, int z, Volume<float> &dens)
{
  coord3 c(x, y, z);

  float dd = dens[c.z][c.y][c.x] + 1e-7f;
  float gx = u[c.z][c.y][c.x]/dd;
  float gy = v[c.z][c.y][c.x]/dd;
  float gz = w[c.z][c.y][c.x]/dd;
  float kval = sqrtf(gx*gx + gy*gy + gz*gz);

  int nn = 0;
  
  if(kval > 1e-7f) {
    gx/=kval; gy/=kval; gz/=kval;      

    gx+=c.x; gy+=c.y; gz+=c.z;

    int i0=(int)gx, i1=i0+1, j0=(int)gy, j1=j0+1, k0=(int)gz, k1=k0+1;
    float s1 = gx-i0, s0 = 1-s1, t1 = gy-j0, t0 = 1-t1, m1 = gz-k0, m0 = 1-m1;

    if(thin_img[k0][j0][i0]) nn++;

    if(thin_img[k0][j0][i1]) nn++;

    if(thin_img[k0][j1][i0]) nn++;

    if(thin_img[k0][j1][i1]) nn++;

    if(thin_img[k1][j0][i0]) nn++;

    if(thin_img[k1][j0][i1]) nn++;

    if(thin_img[k1][j1][i0]) nn++;

    if(thin_img[k1][j1][i1]) nn++;
  }

  return nn > 0;
}
  
int collapseSkel3d::collapse_iteration(float dincr) 
{
  Volume<float> *dens = &this->dens, *dens_prev = &this->dens_prev;
  std::deque<coord3> *queue = &this->queue, *queue1 = &this->queue1; 

  if(sim_iter & 1) {
    //swap densities
    std::swap(dens, dens_prev);
    //swap queues
    std::swap(queue, queue1);
  }

  //make sure queue1 is empty
  queue1->clear();


  const float edt_thr = curr_dst + dincr;

  //fill in queue1 
  int interface_points = queue->size();

#pragma omp parallel for 
  for(int m=0; m<interface_points; m++) 
  {
    const coord3 &c = (*queue)[m];

    //assert(thin_img[c.z][c.y][c.x]);
    
    thin_img[c.z][c.y][c.x] = 253;

    for(int k=c.z-1;k<=c.z+1;++k)
      {	
	Image<byte>& thin_slice = thin_img[k];
	  
	for(int j=c.y-1;j<=c.y+1;++j)
	  {
	    byte* thin_row = thin_slice[j];
	    
	    for(int i=c.x-1;i<=c.x+1;++i) 
	      { 
		byte &thin_val = thin_row[i];	
		if(thin_val != 255) continue;
		//if(!isSimple(c.x, c.y, c.z, thin_img)) continue;
		
#pragma omp critical(queue1) 
		{
		  queue1->push_back(coord3(i,j,k));
		}
		thin_row[i] = 254;
	      }
	  }
      }
  }

  //std::stable_sort(queue->begin(), queue->end(), index_cmp_min(edt));

  std::stable_sort(queue->begin(), queue->end(), 
		   //index_cmp_min_div(u, v, w));
		   index_cmp_minb(thin_img,edt,*dens_prev));
		   //index_cmp_min(*dens_prev));

  //__gnu_parallel::sort(queue->begin(), queue->end(), index_cmp_min(*dens_prev));
  
  //std::stable_sort(queue->begin(), queue->end(),index_cmp_minb(thin_img,edt,*dens_prev));

  //candidate points for removal
  std::vector<coord3> thinned_set;

  //order-independant thinning (see Soille) 
  //phase 1: detection
  
  //no. points rejected for thinning
  int npushed_back = 0;
#pragma omp parallel for reduction(+: npushed_back)
  for(int i=0; i<interface_points; ++i)
  {
    const coord3 &c = (*queue)[i];    

    //printf("thread: %d: %d\n", omp_get_thread_num(), i);

    if(edt[c.z][c.y][c.x] < edt_thr && isSimple(c.x, c.y, c.z, thin_img)) {
    //if(sim_iter && isSimple(c.x, c.y, c.z, thin_img)) {
#pragma omp critical(thin) 
      {      
	thinned_set.push_back(c);
      }
    }
    else {
      //push far or non-simple points back to queue
#pragma omp critical(queue1) 
      {
	queue1->push_front(c);
      }
      thin_img[c.z][c.y][c.x] = 254;      
      npushed_back++;

    }
  }

  int tsize = thinned_set.size(), neroded = 0;

  //new interface points
  std::vector<coord3> ipoints(tsize);

  //phase 2: removal
  for(int i=0;i<tsize;i++) {
    const coord3 &c = thinned_set[i];

    //bool isEnd = isEndPoint(c.x, c.y, c.z, false, thin_img);

    if(isSimple(c.x, c.y, c.z, thin_img)) {
      thin_img[c.z][c.y][c.x] = 0;   //do the actual voxel removal
      ipoints[neroded++] = c;

#if 0
      max_dens[c.z][c.y][c.x] = std::max(max_dens[c.z][c.y][c.x], 
					 (*dens_prev)[c.z][c.y][c.x]/(0.5f*tdens));

#else

      if((*dens_prev)[c.z][c.y][c.x] >= 2.0f) { //SS or CS point
	//max_dens[c.z][c.y][c.x] = ( (*dens_prev)[c.z][c.y][c.x] + 2.0f*sim_iter)/tdens;
	//max_dens[c.z][c.y][c.x] = (2.0f*sim_iter)/tdens;

	//printf("%f %f\n", (4.0f*sim_iter)/tdens, (*dens_prev)[c.z][c.y][c.x]/(0.5f*tdens));
	
	max_dens[c.z][c.y][c.x] = std::max((*dens_prev)[c.z][c.y][c.x]/(0.5f*tdens),
					   (0.0f*sim_iter)/tdens); //2.0f, 4.0f
      }       
#endif      

    }
    else {
      queue1->push_front(c);
      thin_img[c.z][c.y][c.x] = 254;
      npushed_back++;
    }
  }

  ipoints.resize(neroded);

  ////////////////////////////////
  /*
    Compute 'importance' ok skel. points
  */
  ////////////////////////////////
  /*
    thinned points push their density forward to the new interface points;
    this transport is either (i) a diffusion process, or (ii) an advection.
  */

  float tot_dens = 0.0f;

  //reaction part (linear spring on density)
  reaction(ipoints, *dens_prev);

  //diffusion-like transport   
  //tot_dens += transport_dens_diffusion(1.0f, *dens, *dens_prev, ipoints);

  //advection transport
  //tot_dens += transport_dens_advection(1.0f, *dens, *dens_prev, ipoints);

  
  //LUK: tmp_dens !!!!
  const int width = thin_img.getWidth(), height = thin_img.getHeight(), 
    depth = thin_img.getDepth();

  Volume<float> tmp_dens(width, height, depth);

  std::vector<coord3> out_points;

  tot_dens += transport_dens_advection_unconstrained(1.0f, out_points, tmp_dens, *dens, *dens_prev, ipoints);

  tot_dens += transport_dens_advection_project(1.0f, out_points, tmp_dens, *dens, *dens_prev, ipoints);
  
  

#ifdef ADVECT_VEL
  //momentum(default)/velocity advection !!! 

  //transport_vel_advection(1.0f, *dens_prev, u, v, w, ipoints);
  transport_vel_diffusion(1.0f, *dens_prev, u, v, w, ipoints);
#endif

  //non-thinned ones just add their density at corresponding locations
#pragma omp parallel for reduction(+: tot_dens)   
  for(int i=0;i<npushed_back;i++) {
    const coord3 &c = (*queue1)[i];
    float dprev = (*dens_prev)[c.z][c.y][c.x];
    (*dens)[c.z][c.y][c.x] += dprev;
    (*dens_prev)[c.z][c.y][c.x] = 0.0f;
    tot_dens += dprev;
  }

  //clear previous density at interface points
#pragma omp parallel for   
  for(int i=0;i<neroded;i++) {
    const coord3 &c = ipoints[i];
    (*dens_prev)[c.z][c.y][c.x] = 0.0f;
  }

  ////////////////////////////////
  /*
    End compute 'importance' ok skel. points
  */
  ////////////////////////////////

  printf("tdens: %f curr_dst: %f max_dst: %f ipoints: %d tpoints: %d\n", 
	 tot_dens, curr_dst, max_dst, interface_points, neroded);

  ++sim_iter;

  curr_dst = edt_thr;

  //Return size of current interface; useful as a stopping criterion
  //return interface_points;     
  return (neroded>0 || sim_iter==1);
  
}

struct _PointFlux {
  int pos [3];
  float flux;
  float dist;
};

class _compare_point {
  /**
   * Create medial curves
   */
  bool curve;

 public:
  _compare_point(const bool curve=false) { this->curve = curve; }

  bool operator()(const _PointFlux & p1, const _PointFlux & p2) const {
    if (curve) {
      if (p1.dist>p2.dist) return true;
      else if (p1.dist==p2.dist && p1.flux<p2.flux) return true;
    } else {
      if (p1.flux<p2.flux) return true;
      else if (p1.flux==p2.flux && p1.dist>p2.dist) return true;
    }
    return false;
  }
};

static float trilin_interp(float x, float y, float z, Volume<float> &gx)
{
  register int intx = (int)floor(x);
  register int inty = (int)floor(y);
  register int intz = (int)floor(z);

  register float fracx = x - (float)intx;
  register float fracy = y - (float)inty;
  register float fracz = z - (float)intz;

  return (1.0f-fracx)*(1.0f-fracy)*(1.0f-fracz)*gx[intz][inty][intx]+
    fracx*(1.0f-fracy)*(1.0f-fracz)*gx[intz][inty][intx+1]+
    fracy*(1.0f-fracx)*(1.0f-fracz)*gx[intz][inty+1][intx]+
    fracz*(1.0f-fracx)*(1.0f-fracy)*gx[intz+1][inty][intx]+
    fracx*fracz*(1.0f-fracy)*gx[intz+1][inty][intx+1]+
    fracy*fracz*(1.0f-fracx)*gx[intz+1][inty+1][intx]+
    fracx*fracy*(1.0f-fracz)*gx[intz][inty+1][intx+1]+
    fracx*fracy*fracz*gx[intz+1][inty+1][intx+1];
}

void collapseSkel3d::get_logdensity(Volume<float> &logd)
{
  const float delta = 0.001f;

  const int width = edt.getWidth(), height = edt.getHeight(), depth = edt.getDepth();

  std::priority_queue< _PointFlux, std::vector<_PointFlux>, _compare_point > pqueue(true);

  // 1 - Put all the pixel inside the priority queue
  for(int i=0;i<depth;i++)
    for(int j=0;j<height;j++)
      for(int k=0;k<width;k++) {

	//inside object
	if(!thin_img[i][j][k]) continue;
	
	int nsmaller = 0;
	for(int z=i-1;z<=i+1;z++)
	  for(int y=j-1;y<=j+1;y++)
	    for(int x=k-1;x<=k+1;x++)
	      if(!thin_img[z][y][x]) nsmaller++;
		
	//border voxel
	if(nsmaller>0) 
	  logd[i][j][k] = 0.04f;

	if(edt[i][j][k]<2.0f) continue;
	
	_PointFlux p;
	p.pos[0] = k;
	p.pos[1] = j;
	p.pos[2] = i;
	p.flux = 0;
	p.dist = edt[i][j][k];
	pqueue.push(p);	
      }

  // 2 - Compute the logdensity
  while (!pqueue.empty()) {
    _PointFlux p = pqueue.top();
    pqueue.pop();

    const float
      Fx = u[p.pos[2]][p.pos[1]][p.pos[0]],
      Fy = v[p.pos[2]][p.pos[1]][p.pos[0]],
      Fz = w[p.pos[2]][p.pos[1]][p.pos[0]];

    logd[p.pos[2]][p.pos[1]][p.pos[0]] = 
      trilin_interp(p.pos[0]-Fx,p.pos[1]-Fy,p.pos[2]-Fz, logd) 
      - 0.5f*( dens[p.pos[2]][p.pos[1]][p.pos[0]] + 
	       trilin_interp(p.pos[0]-Fx,p.pos[1]-Fy,p.pos[2]-Fz, dens) );

    const float tmp = 1.0f - (1.0f-fabs(Fx)) * (1.0f-fabs(Fy)) * (1.0f-fabs(Fz));
    if (tmp>delta) logd[p.pos[2]][p.pos[1]][p.pos[0]]/=tmp; 
    else if (delta<1) logd[p.pos[2]][p.pos[1]][p.pos[0]] = 0;
  }
}

void collapseSkel3d::get_corrected_flux(Volume<float> &logd, Volume<float> &nflux)
{
  const float delta = 1.0f;

  const int width = edt.getWidth(), height = edt.getHeight(), depth = edt.getDepth();

  // 1 - Put all the pixel inside the priority queue
  for(int i=0;i<depth;i++)
    for(int j=0;j<height;j++)
      for(int k=0;k<width;k++) {

	//inside object
	if(!thin_img[i][j][k]) continue;

	const float
	  Fx = u[i][j][k],
	  Fy = v[i][j][k],
	  Fz = w[i][j][k];
	
	nflux[i][j][k] = (logd[i][j][k] - trilin_interp(k-Fx, j-Fy, i-Fz, logd))*
	  expf(logd[i][j][k]-0.5f*delta) + //LUK: ???
	  //expf(logd[i][j][k]-trilin_interp(k-Fx, j-Fy, i-Fz, logd)) + 
	    0.5f*( trilin_interp(k-Fx, j-Fy, i-Fz, dens) * 
		   expf(trilin_interp(k-Fx, j-Fy, i-Fz,logd)) + 
		   dens[i][j][k]*expf(logd[i][j][k]) );
      }
}
		   

void collapseSkel3d::hj3d(bool use_corr, float thres)
{
  const int width = edt.getWidth(), height = edt.getHeight(), depth = edt.getDepth();

  //spline kernel
  static const float kernel[10] = {1.0, 4.0, 1.0, 4.0, 16.0, 4.0, 1.0, 4.0, 1.0, 72.0};

  //compute gradient edt
  for(int i=0;i<depth;i++)
    for(int j=0;j<height;j++)
      for(int k=0;k<width;k++) {

	//inside object
	if(!thin_img[i][j][k]) continue;

	int idx1 = 0; float gx = 0.0f, gy = 0.0f, gz = 0.0f, kval; 
	for(int kk=-1;kk<=1;kk++)
	  for(int mm=-1;mm<=1;mm++) {	
	    kval = kernel[idx1++];
	    gx += kval*(edt[i+kk][j+mm][k+1]-edt[i+kk][j+mm][k-1]);
	    gy += kval*(edt[i+kk][j+1][k+mm]-edt[i+kk][j-1][k+mm]);
	    gz += kval*(edt[i+1][j+kk][k+mm]-edt[i-1][j+kk][k+mm]);	
	  }
	
	kval = sqrtf(gx*gx + gy*gy + gz*gz);	
	
	if(kval<1e-7f) continue;

	u[i][j][k]=gx/kval; v[i][j][k]=gy/kval; w[i][j][k]=gz/kval;
      }

  //compute dievergence
  for(int i=0;i<depth;i++)
    for(int j=0;j<height;j++)
      for(int k=0;k<width;k++) {

	//inside object
	if(!thin_img[i][j][k]) continue;

	float divg = 0.0f;
	int idx = 0;
	float *diff_vec;

	for(int ii=i-1;ii<=i+1;ii++)
	  for(int jj=j-1;jj<=j+1;jj++)
	    for(int kk=k-1;kk<=k+1;kk++,idx++) {

	      if(idx==13) {
		assert(ii==i && jj==j && kk==k);
		continue;
	      }

	      diff_vec = diff_vecs[idx];
	      
	      divg += diff_vec[0]*u[ii][jj][kk] + diff_vec[1]*v[ii][jj][kk] + 
		diff_vec[2]*w[ii][jj][kk];	      
	    }
	
	dens[i][j][k] = divg;
      }

  if(use_corr) {
    get_logdensity(dens_prev);

    Volume<float> nflux(width, height, depth);
    get_corrected_flux(dens_prev, nflux);
    dens = nflux;    
  }


  float min_dens = 1e+7f;
  for(int i=0;i<depth;i++)
    for(int j=0;j<height;j++)
      for(int k=0;k<width;k++) {

	//inside object
	if(!thin_img[i][j][k]) continue;

	//max_dens[i][j][k] = dens_prev[i][j][k]/1000.0f;
	max_dens[i][j][k] = -dens[i][j][k]/1000.0f;
	//if(max_dens[i][j][k]>1e+5f) max_dens[i][j][k]=1e+5f;
	min_dens = std::min(min_dens, dens[i][j][k]);
      }

  //printf("min_flux %f\n", min_dens);

  const bool curve = false;

  //init with border points
  std::priority_queue< _PointFlux, std::vector<_PointFlux>, _compare_point > pqueue(curve);

  Volume<byte> count(width, height, depth);
  Volume<byte> label(width, height, depth);

  for(int i=0;i<depth;i++)
    for(int j=0;j<height;j++)
      for(int k=0;k<width;k++) {
	  
	//inside object
	if(!thin_img[i][j][k]) continue;

	int nsmaller = 0;
	for(int z=i-1;z<=i+1;z++)
	  for(int y=j-1;y<=j+1;y++)
	    for(int x=k-1;x<=k+1;x++)
	      if(!thin_img[z][y][x]) nsmaller++;
	
	//border voxel
	if(nsmaller>0 && isSimple(k, j, i, thin_img) ) {
	  
	  assert(thin_img[i][j][k]==254);
	  
	  _PointFlux p;
	  p.pos[0] = k;
	  p.pos[1] = j;
	  p.pos[2] = i;
	  p.flux = dens[i][j][k];
	  p.dist = edt[i][j][k];
	  pqueue.push(p);
	  count[i][j][k] = 1; //in queue
	}
      }


  //compute skel
  while (!pqueue.empty()) {
    _PointFlux p = pqueue.top();     // Get the point with the max flux
    pqueue.pop();                    // Remove the point from the queue    
    count[p.pos[2]][p.pos[1]][p.pos[0]] = 0; // Reinit counter

      // Test if the point is simple
    if (isSimple(p.pos[0],p.pos[1],p.pos[2],thin_img)) {
      if ( !(label[p.pos[2]][p.pos[1]][p.pos[0]]==127 ||
	     isEndPoint(p.pos[0],p.pos[1],p.pos[2],curve,thin_img)) || p.flux>thres) {
	
	thin_img[p.pos[2]][p.pos[1]][p.pos[0]] = 0; // Remove the point
	max_dens[p.pos[2]][p.pos[1]][p.pos[0]] = 0;

        for (int k = -1; k<=1; ++k)
          for (int l = -1; l<=1; ++l)
            for (int m = -1; m<=1; ++m) {
              // Protection
	      /*
              if (p.pos[0]+k < 0 || p.pos[0]+k >= width() ||
                  p.pos[1]+l < 0 || p.pos[1]+l >= height() ||
                  p.pos[2]+m < 0 || p.pos[2]+m >= depth()) continue;
	      */
              if (thin_img[p.pos[2]+m][p.pos[1]+l][p.pos[0]+k]!=0 &&
		  count[p.pos[2]+m][p.pos[1]+l][p.pos[0]+k]<1 &&
                  isSimple(p.pos[0]+k,p.pos[1]+l,p.pos[2]+m, thin_img)) {
                _PointFlux p1;
                p1.pos[0] = p.pos[0]+k;
                p1.pos[1] = p.pos[1]+l;
                p1.pos[2] = p.pos[2]+m;
                p1.flux = dens[p.pos[2]+m][p.pos[1]+l][p.pos[0]+k];
                p1.dist = edt[p.pos[2]+m][p.pos[1]+l][p.pos[0]+k];
                pqueue.push(p1);
		count[p.pos[2]+m][p.pos[1]+l][p.pos[0]+k]=1;
              }
            }
      } else {
	label[p.pos[2]][p.pos[1]][p.pos[0]]=127; // Mark the point as skeletal
      }
    }
  }

}
