#include "collapseSkel3d.h"
#include "edt3d.h"
#include "level_set.h"
#include "filters.h"

#include <cmath>
#include <set>
#include <assert.h>
#include <algorithm>
#include <iostream>
#include <queue>
#include <unordered_map>

//Max size of the template cache for speeding up isSimple()
const int           TCACHE_SIZE = 500000;
typedef std::unordered_map<unsigned int, bool>
TemplateCache;         //Template cache. Each 3x3x3 config is encoded as a 27-bit key
TemplateCache         template_cache;
//number of calls to isSimple(). Efficiency of template cache = iss_calls/template_cache.size()
static unsigned int       iss_calls = 0;




//x z y
const static int        xc[] = {1, 1, 0, -1, -1, -1, 0, 1, 0, 1, 1, 0, -1, -1, -1, 0, 1, 1, 1, 0, -1, -1, -1, 0, 1, 0};
const static int        yc[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1};
const static int        zc[] = {0, -1, -1, -1, 0, 1, 1, 1, 0, 0, -1, -1, -1, 0, 1, 1, 1, 0, -1, -1, -1, 0, 1, 1, 1, 0};
static std::unique_ptr<unsigned char[]> simple_tab;

collapseSkel3d::collapseSkel3d(bool advect_vel_,
                               bool force_monotonic_): icmp(thin_img, edt, dens1)
{
  advect_vel = advect_vel_;
  force_monotonic = force_monotonic_;
  reset();
}

void collapseSkel3d::reset()
{
  tdens = 0.0f;
  sim_iter = 0;
  curr_dst = 0.0f;
  max_dst = 0.0f;
  cs_importance_boost = 0.1f;

  dens.clearVolume(0);
  dens1.clearVolume(0);
  thin_img.clearVolume(0);
  max_dens.clearVolume(0);
  edt.clearVolume(0);
  v.clearVolume(Vector());
  queue.clear();
  surf.DeleteSurface();
  surf = CIsoSurface<byte>();
}

bool collapseSkel3d::IsReady() { return sim_iter != 0 || forceReady; }

collapseSkel3d::~collapseSkel3d()
{
}


int collapseSkel3d::init(Volume<byte> &data, int thr, int &bordervoxels, bool smooth, bool enableImportanceBoosting)
{
  const int width = data.getWidth(), height = data.getHeight(),
            depth = data.getDepth();

  this->enableImportanceBoosting = enableImportanceBoosting;

  std::unique_ptr<std::vector<byte>> vec(new std::vector<byte>);                 
  
  //Generate isosurface
  data.toVector(*vec);
  surf.GenerateSurface(&(*vec)[0], thr + 1, width - 1, height - 1, depth - 1, 1.0,
                       1.0, 1.0);
  if (!surf.IsSurfaceValid())
    std::cerr << "CIsoSurface: Couldn't generate a valid surface." << std::endl;

  initSimpleTable("./tables/simple_tab.dat");
  //All expensive allocations below...
  dens.makeVolume(width, height, depth);
  dens1.makeVolume(width, height, depth);
  thin_img.makeVolume(width, height, depth);
  max_dens.makeVolume(width, height, depth);
  edt.makeVolume(width, height, depth);
  v.makeVolume(width, height, depth);

  max_dst = 0.0f;

  //init diffusion weights (lookup table)
  this->r = 2; //2.0f kernel radius

  int klen = 2 * this->r + 1;
  diff_weights.reset(new float[klen * klen * klen]);
  diff_vecs_holder.reset(new float[klen * klen * klen * 3]);
  diff_vecs = reinterpret_cast<float(*)[3]>(diff_vecs_holder.get());

  float min_w = 1e+6f;

  for (int idx = 0, zz = -r; zz <= r; ++zz)
    for (int yy = -r; yy <= r; ++yy)
      for (int xx = -r; xx <= r; ++xx, ++idx)
      {
        if ( abs(zz) == r || abs(yy) == r || abs(xx) == r)
          diff_weights[idx] = 0.5f;
        else
          diff_weights[idx] = 1.0f;
        min_w = std::min(min_w, diff_weights[idx]);

        float dz = (float) zz, dy = (float) yy, dx = (float) xx;
        float mag = sqrtf( dz * dz + dy * dy + dx * dx );
        dz /= mag;
        dy /= mag;
        dx /= mag;

        diff_vecs[idx][0] = dx;
        diff_vecs[idx][1] = dy;
        diff_vecs[idx][2] = dz;
      }

  printf("Minimum diffusion weight: %f.\n", min_w);

  int foreground = 0;

  std::cout << "Computing EDT..." << std::endl;

  Volume<int> sqr_edt(width, height, depth);
  edt_3d(sqr_edt, data, thr);

  for (int i = 0; i < depth; ++i)
    for (int j = 0; j < height; ++j)
      for (int k = 0; k < width; ++k)
      {
        if (sqr_edt(k, j, i))
        {
          float dst = sqrtf((float) sqr_edt(k, j, i));
          edt(k, j, i) = dst;
          max_dst = std::max(max_dst, dst);
        }

        if (data(k, j, i) > thr)
        {
          thin_img(k, j, i) = 255; //use EDT to threshold data[] into binary thin_img[]
          ++foreground;
        }
      }

  if (smooth)
    smoothEdt();

  bordervoxels = 0;
  tdens = 0.0f;
  for (int i = 0; i < depth; i++)
    for (int j = 0; j < height; j++)
      for (int k = 0; k < width; k++)
      {
        if (!thin_img(k, j, i)) continue;

        v(k, j, i) = Vector(0, 0, 0);

        int nsmaller = 0;
        for (int z = i - 1; z <= i + 1; z++)
          for (int y = j - 1; y <= j + 1; y++)
            for (int x = k - 1; x <= k + 1; x++)
              if (data(x, y, z) <= thr) ++nsmaller;

        if (nsmaller > 0 && isSimple_tab(coord3s(k, j, i)))                //border voxel
        {
          dens1(k, j, i)    = 1.0f;
          thin_img(k, j, i) = 254;
          queue.push_back(coord3s(k, j, i));

          if (advect_vel)
          {           
#if 0
            int idx1 = 0;
            float gx = 0.0f, gy = 0.0f, gz = 0.0f, kval;
            for (int kk = -1; kk <= 1; kk++)
              for (int mm = -1; mm <= 1; mm++)
              {
                kval = kernel[idx1++];
                gx += kval * (edt(k + 1, j + mm, i + kk) - edt(k - 1, j + mm, i + kk));
                gy += kval * (edt(k + mm, j + 1, i + kk) - edt(k + mm, j - 1, i + kk));
                gz += kval * (edt(k + mm, j + kk, i + 1) - edt(k + mm, j + kk, i - 1));
              }
            kval = sqrtf(gx * gx + gy * gy + gz * gz) + 1e-7f;
            Vector &vec = v(k, j, i);
            vec.u = gx / kval;
            vec.v = gy / kval;
            vec.w = gz / kval;
#else
            const int rr = 2; //2,4,10                              //4D regression gradient

            float gx = 0.0f, gy = 0.0f, gz = 0.0f;
            for (int ii = -rr; ii <= rr; ii++)
              for (int jj = -rr; jj <= rr; jj++)
                for (int kk = -rr; kk <= rr; kk++)
                {
                  if (i + ii < 0 || i + ii >= depth || j + jj < 0 || j + jj >= height ||
                      k + kk < 0 || k + kk >= width) continue;
                  if (!thin_img(k + kk, j + jj, i + ii)) continue;
                  if (ii == 0 && jj == 0 && kk == 0) continue;

                  float dst = sqrtf((float) (ii * ii + jj * jj + kk * kk));
                  float fk = 1;//edt[i+ii][j+jj][k+kk];
                  gz += ii * fk / dst;
                  gy += jj * fk / dst;
                  gx += kk * fk / dst;
                }

            float kval = sqrtf(gx * gx + gy * gy + gz * gz) + 1e-7f;
            auto& vec = v(k, j, i);
            vec.u = gx / kval;
            vec.v = gy / kval;
            vec.w = gz / kval;
#endif
          }
        }

        tdens += dens1(k, j, i);
      }

  bordervoxels = queue.size();

  printf("Total dens: %f\n", tdens);
  lastForegroundCount = foreground;

  return foreground;
}





int collapseSkel3d::initSimpleTable(const char *fname)
{
  const int tsize = 1 << 23;

  printf("initSimpleTable: loading table...");

  simple_tab.reset(new unsigned char[tsize]);
  assert(simple_tab);

  FILE *pfile = fopen(fname, "r");
  assert(pfile);

  int ret = fread(simple_tab.get(), sizeof(unsigned char), tsize, pfile);
  assert(ret == tsize);

  fclose(pfile);

  printf("done.\n");

  return 1;
}







//Compute the inverse-FT from the direct-FT. That is, for each boundary-point
void collapseSkel3d::computeIFT(Volume<int> &ft)
{
  //we add to it all skeleton-points that have it as a feature-point
  const int width = dens.getWidth(), height = dens.getHeight(),
            depth = dens.getDepth();

  std::cout << "Computing inverse FT..." << std::endl;

  ift.resize(width * height * depth);
  for (int i = 0; i < depth; ++i)
  {
    const Image<byte>  &ti = thin_img[i];
    const Image<int>   &fi = ft[i];
    for (int j = 0; j < height; ++j)
    {
      const byte*  tr = ti[j];
      const int*   fr = fi[j];
      for (int k = 0; k < width; ++k)
      {
        if (!tr[k]) continue;               //we only care about internal points

        int fp = fr[k];                   //decide coordinates x,y,z from FT-value
        int z = fp % depth;
        int x = fp / depth;
        int y = x % height;
        x = x / height;

        unsigned int idx = (x * height + y) * depth + z;
        ift[idx].addSkelPoint(fp);
      }
    }
  }
}

void collapseSkel3d::set_ready(bool isReady)
{
  forceReady = isReady;
}

void collapseSkel3d::fillGaps(float thr)
{
  const int width = dens.getWidth(), height = dens.getHeight(),
            depth = dens.getDepth();

  //cout<<"Computing FT..."<<endl;
  //!!ft_3d(ft, data, thr);

  computeIFT(ft);

  Volume<float> timp(width, height, depth);

  for (int i = 0; i < depth; i++)
    for (int j = 0; j < height; j++)
      for (int k = 0; k < width; k++)
      {
        if (max_dens[i][j][k] < thr) continue;
        int fp = ft[i][j][k];

        int z = fp % depth;           //decode fp to (x,y,z)
        int x = fp / depth;
        int y = x % height;
        x = x / height;

        std::vector<coord3s> nskel;
        for (int xx = x - 1; xx <= x + 1; xx++)
          for (int yy = y - 1; yy <= y + 1; yy++)
            for (int zz = z - 1; zz <= z + 1; zz++)
            {
              unsigned int idx = (xx * height + yy) * depth + zz;
              for (InverseFT::SkelSet::iterator it = ift[idx].skel_idxs.begin();
                   it != ift[idx].skel_idxs.end(); it++)
              {
                unsigned int idxs = *it;
                int sz = idxs % depth;
                int sx = idxs / depth;
                int sy = sx % height;
                sx = sx / height;
                //assert(sz < depth && sy < height && sx < width);
                //if(max_dens[sz][sy][sx]<thr) continue;
                nskel.push_back(coord3s(sx, sy, sz));
              }
            }

        for (size_t m = 0; m < nskel.size(); m++)
        {
          const coord3s &c = nskel[m];

          std::vector<Volume<float>::POINT3> points;
          timp.bresenham_linie_3D(k, j, i, c.x, c.y, c.z, points);

          if (points.size() < 2)
          {
            printf("bla\n");
            continue;
          }

          float a = max_dens[i][j][k], b = max_dens[c.z][c.y][c.x];
          float t = 1.0f, tincr = 1.0f / (points.size() - 1);
          for (size_t p = 0; p < points.size(); p++)
          {
            float val = a * t + b * (1.0f - t);
            //float val = std::max(a, b);
            const Volume<float>::POINT3 &pp = points[p];
            timp[(int)pp.coord[2]][(int)pp.coord[1]][(int)pp.coord[0]] =
              std::max(val, timp[(int)pp.coord[2]][(int)pp.coord[1]][(int)pp.coord[0]]);
            //timp[(int)pp.coord[2]][(int)pp.coord[1]][(int)pp.coord[0]] + val;
            t -= tincr;
          }
        }
      }

  const float w = 0.0f;

  for (int i = 0; i < depth; i++)
    for (int j = 0; j < height; j++)
      for (int k = 0; k < width; k++)
        max_dens[i][j][k] = w * max_dens[i][j][k] + (1.0f - w) * timp[i][j][k];
}





void collapseSkel3d::thinSimplify(float thresh1, float thresh2, bool curve)
{
  const int width = dens.getWidth(), height = dens.getHeight(),
            depth = dens.getDepth();

  for (int i = 0; i < depth;
       ++i)                         //1. First, store in thin_img the thresholded importance
    for (int j = 0; j < height; ++j)
      for (int k = 0; k < width; ++k)
      {
        const coord3s c(k, j, i);
        thin_img(c) = (max_dens(c) < thresh1) ? 0 : 255;
      }

  std::priority_queue<Coord3, std::vector<Coord3>, index_cmp_minv> pqueue;
  Volume<byte> label(width, height, depth);

  for (int i = 0; i < depth;
       i++)                         //2. Label all SS-border points with 1 (in label[]) and add them to pqueue
    for (int j = 0; j < height; j++)
      for (int k = 0; k < width; k++)
      {
        if (!thin_img(k, j, i))
          continue;                       //Skip non-skeletal voxels

        int nsmaller = 0;
        for (int z = i - 1; z <= i + 1; z++)
          for (int y = j - 1; y <= j + 1; y++)
            for (int x = k - 1; x <= k + 1; x++)
              if (!thin_img(x, y, z)) ++nsmaller;

        if (nsmaller > 0 && isSimple_tab(coord3s(k, j, i)))             //SS border voxel
        {
          Coord3 p(k, j, i, -max_dens(k, j, i));
          pqueue.push(p);
          label(k, j, i) = 1;                              //in queue
        }
      }

  while (!pqueue.empty())                                //3. Do the thinning of the SS:
  {
    Coord3  P =
      pqueue.top();                              // Get the point with the max flux
    const coord3s &p = P.c;
    pqueue.pop();                                    // Remove the point from the queue
    label(p) = 0;                                    // Reinit counter

    if (!isSimple_tab(p))
      continue;                            // Test if the point is simple - only delete simple points

    if (P.v > thresh2 || (label(p) != 127 &&
                          !isEndPoint(p, curve)))         // Also, don't delete CS-endpoints
    {
      thin_img(p) = 0;                                 // Remove the point from the SS
      max_dens(p) =
        0;                                 //...and also erase its importance
      //WARNING: this is not OK if we call this func again for other thresholds
      for (int k = -1; k <= 1; ++k)
        for (int l = -1; l <= 1; ++l)
          for (int m = -1; m <= 1; ++m)
          {
            coord3s a(p.x + k, p.y + l, p.z + m);
            if (thin_img(a) && label(a) < 1 && isSimple_tab(a))
            {
              Coord3 p1(a, -max_dens(a));
              pqueue.push(p1);
              label(p1.c) = 1;                             //in queue
            }
          }
    }
    else
    {
      label(p) = 127;                                // Mark the point as skeletal
    }
  }
}



void collapseSkel3d::detectBoundary(float thr, bool curve)
{
  const int width = max_dens.getWidth(), height = max_dens.getHeight(),
            depth = max_dens.getDepth();

  for (int i = 0; i < depth; ++i)
    for (int j = 0; j < height; ++j)
      for (int k = 0; k < width; ++k)
      {
        coord3s c(k, j, i);
        thin_img(c) = (max_dens(c) < thr) ? 0 : 255;
      }

  for (int i = 0; i < depth; ++i)
    for (int j = 0; j < height; ++j)
      for (int k = 0; k < width; ++k)
      {
        coord3s c(k, j, i);
        if (thin_img(c) && isEndPoint(c, curve))
          max_dens(c) = 1.0f;
        else
          max_dens(c) = 0;
      }
}



void collapseSkel3d::detectSSBoundary(float thr)
{
  const int width = dens.getWidth(), height = dens.getHeight(),
            depth = dens.getDepth();
  std::vector<coord3s> boundary;

  //First pass: detect boundary points using loop criterion
  for (int i = 0; i < depth; ++i)
    for (int j = 0; j < height; ++j)
      for (int k = 0; k < width; ++k)
      {
        coord3s c(k, j, i);
        bool is_inside = (max_dens(c) >= thr);

        if (is_inside)
        {
          bool is_boundary = ::isBoundary(c, max_dens, thr);
          thin_img(c) = (is_boundary) ? 255 : 128;
          if (is_boundary) boundary.push_back(c);
        }
      }
  //Second pass: erase small boundary connected-components
  for (size_t i = 0; i < boundary.size(); ++i)
  {
    const coord3s &b = boundary[i];

    int max_size = 6;
    int min_dist = 6;

    int sz = ::sizeCC(b, thin_img, max_size);
    if (sz >= max_size) continue;//1. Never delete too large fg-components

    coord3s p;
    int mind = ::closestCC(b, thin_img, p);
    if (mind > min_dist)
    //2. Relatively small component, but very far away from anyone else: OK, delete
    {
      ::eraseCC(b, thin_img, 128);
      continue;
    }

    sz = ::sizeCC(p, thin_img,
                  max_size);   //3. Relatively small component, close to a very small other component: OK, delete
    if (sz < max_size)
      ::eraseCC(b, thin_img, 128);
  }
}



void collapseSkel3d::assignImpLoops()
{
  const int ssize = thin_img.getWidth() * thin_img.getHeight(),
            depth = thin_img.getDepth();

  for (int i = 0; i < depth; i++)
  {
    float *max_dens_ptr = max_dens[i][0];
    byte *thin_img_ptr = thin_img[i][0];

    for (int j = 0; j < ssize; j++)
      if (thin_img_ptr[j] > 128)
        max_dens_ptr[j] = 1.0f;
  }
}




bool collapseSkel3d::isSimple_tab(const coord3s &c) const
{
  unsigned int idx = 0;

  for (int i = 0; i < 26; ++i)
    if (thin_img(c.x + xc[i], c.y + yc[i], c.z + zc[i]))
      idx |= (1 << i);

  return (simple_tab[idx >> 3] & (1 << (idx & 7))) > 0;
}




bool collapseSkel3d::isSimple(const coord3s &c) const
{
  byte thin_img[3][3][3];

  // Compute \bar{C}
  // Seeking for a component

  //Forms a 27-bit key to search the current 3x3x3 template in the cache
  unsigned int key = 0;                       
  unsigned int I   = 1;
  for (signed char k = -1; k <= 1; ++k)
  {
    const Image<byte> &it = this->thin_img[c.z + k];
    for (signed char j = -1; j <= 1; ++j)
    {
      //Cache 3x3 template around thin_img(x,y,z) locally for speed
      const byte* ir = it[c.y + j];
      for (signed char i = -1; i <= 1; ++i)         
      {
        bool value = ir[c.x + i] > 0;  
        //since we next only check if this location is (non)zero
        thin_img[i + 1][j + 1][k + 1] = value;   
        //Set I-th bit in hash-key to 1, if this voxel is set
        if (value) key |= I;                    
        I <<= 1;
      }
    }
  }


  //One more isSimple() call
  ++iss_calls;
  // Search if we have already encountered this configuration earlier.
  TemplateCache::const_iterator s = template_cache.find(key);   
  //If so, we know the answer, and we return it.
  if (s != template_cache.end()) return s->second;        

  signed char count = 0;
  signed char C_bar = 0;
  signed char visit[3][3][3];             // Visitor table
  signed char *V = &visit[0][0][0], *VE = V + 27;
  memset(visit, 0, 27 * sizeof(signed char));
  visit[1][1][1] = -1;

  // Look at X-axis
  for (signed char k = 0; k <= 2; ++k)        
    if (!thin_img[k][1][1] && !visit[k][1][1])
    {
      ++C_bar;
      visit[k][1][1] = ++count;

      // Follow component
      for (signed char l = 0; l <= 2; ++l)      
      {
        if (!thin_img[k][l][1] && !visit[k][l][1]) visit[k][l][1] = count;
        if (!thin_img[k][1][l] && !visit[k][1][l]) visit[k][1][l] = count;
      }
    }

  // Look at Y-axis
  for (signed char k = 0; k <= 2; ++k)        
    if (!thin_img[1][k][1] && !visit[1][k][1])
    {
      ++C_bar;
      visit[1][k][1] = ++count;
      signed char label = count, C;

      // Follow component
      for (signed char l = 0; l <= 2; l += 2)   
      {
        if (!thin_img[l][k][1])
        {
          if ((C = visit[l][k][1]) && C != label)
          {
            // Meld component
            --C_bar;                  
            for (signed char* v = V; v != VE; ++v)
              if (*v == C) *v = label;
          }
          else visit[l][k][1] = label;
        }

        if (!thin_img[1][k][l])
        {
          if ((C = visit[1][k][l]) && C != label)
          {
            // Meld component
            --C_bar;                  
            for (signed char* v = V; v != VE; ++v)
              if (*v == C) *v = label;
          }
          else visit[1][k][l] = label;
        }
      }
    }

  // Look at Z-axis
  for (signed char k = 0; k <= 2; ++k)
    if (!thin_img[1][1][k] && !visit[1][1][k])
    {
      ++C_bar;
      visit[1][1][k] = ++count;
      signed char label = count, C;

      // Follow component
      for (signed char l = 0; l <= 2; l += 2)   
      {
        if (!thin_img[l][1][k])
        {
          if ((C = visit[l][1][k]) && C != label)
          {
            // Meld component
            --C_bar;
            for (signed char* v = V; v != VE; ++v)
              if (*v == C) *v = label;
          }
          else visit[l][1][k] = label;
        }

        if (!thin_img[1][l][k])
        {
          if ((C = visit[1][l][k]) && C != label)
          {
            // Meld component
            --C_bar; 
            for (signed char* v = V; v != VE; ++v)
              if (*v == C) *v = label;
          }
          else visit[1][l][k] = label;
        }
      }
    }

  if (C_bar != 1)
  {
    //Learn this configuration (if the cache's not full)
    if (template_cache.size() < TCACHE_SIZE)      
      template_cache.insert(std::make_pair(key, false));
    return false;
  }

  // Reinit visit
  memset(visit, 0, 27 * sizeof(signed char));   
  visit[1][1][1] = -1;
  signed char C_asterix = 0;

  // Compute C^*
  // Seeking for a component
  for (signed char k = 0; k <= 2; ++k)
    for (signed char l = 0; l <= 2; ++l)
      for (signed char m = 0; m <= 2; ++m)
      {
        // Protection
        if (k == 1 && l == 1 && m == 1) continue; 
        if (!visit[k][l][m] && thin_img[k][l][m])
        {
          signed char label = 0;
          // Look after the neightbor
          for (signed char k1 = std::max(k - 1, 0); k1 <= k + 1 &&
               k1 <= 2; ++k1) 
            for (signed char l1 = std::max(l - 1, 0); l1 <= l + 1 && l1 <= 2; ++l1)
              for (signed char m1 = std::max(m - 1, 0); m1 <= m + 1 && m1 <= 2; ++m1)
              {
                signed char C;              // Search for a already known component

                if ((C = visit[k1][l1][m1]) > 0 && thin_img[k1][l1][m1])
                {
                  if (!label) label = C;
                  else if (label != C)
                  {
                    --C_asterix;            // Meld component
                    for (signed char* v = V; v != VE; ++v)
                      if (*v == C) *v = label;
                  }
                }
              }

          // Find a new component
          if (!label)
          {
            ++C_asterix;
            label = ++count;
          }
          visit[k][l][m] = label;
        }
      }

  //Learn this configuration (if the cache's not full)
  if (template_cache.size() < TCACHE_SIZE)      
    template_cache.insert(std::make_pair(key, C_asterix == 1));

  return (C_asterix == 1);
}







bool collapseSkel3d::isEndPoint(const coord3s &p, bool curve) const
{
  if (curve)
  {
    int isb = 0;                //CS case:
    for (int k = -1; k <= 1; ++k)
      for (int l = -1; l <= 1; ++l)
        for (int m = -1; m <= 1; ++m)
        {
          if (thin_img(p.x + m, p.y + l, p.z + k)) ++isb;
          if (isb > 2) return false;      //Early termination
        }

    if (isb == 2) return true; // The pixel with one neighbor
  }
  else                      //SS case:
  {
    // Use Pudney specification with the 9 plans
    static const int plan9 [9][8][3] = { { { -1, 0, -1}, {0, 0, -1}, {1, 0, -1}, { -1, 0, 0}, {1, 0, 0}, { -1, 0, 1}, {0, 0, 1}, {1, 0, 1} }, // Plan 1
      { { -1, 1, 0}, {0, 1, 0}, {1, 1, 0}, { -1, 0, 0}, {1, 0, 0}, { -1, -1, 0}, {0, -1, 0}, {1, -1, 0} }, // Plan 2
      { {0, -1, -1}, {0, 0, -1}, {0, 1, -1}, {0, -1, 0}, {0, 1, 0}, {0, -1, 1}, {0, 0, 1}, {0, 1, 1} }, // Plan 3
      { {1, 1, 1}, {0, 1, 0}, { -1, 1, -1}, {1, 0, 1}, { -1, 0, -1}, { -1, -1, -1}, {0, -1, 0}, {1, -1, 1} }, // Plan 4
      { { -1, 1, 1}, {0, 1, 0}, {1, 1, -1}, { -1, 0, 1}, {1, 0, -1}, { -1, -1, 1}, {0, -1, 0}, {1, -1, -1} }, // Plan 5
      { { -1, 1, 1}, {0, 1, 1}, {1, 1, 1}, { -1, 0, 0}, {1, 0, 0}, { -1, -1, -1}, {0, -1, -1}, {1, -1, -1} }, // Plan 6
      { { -1, 1, -1}, {0, 1, -1}, {1, 1, -1}, { -1, 0, 0}, {1, 0, 0}, { -1, -1, 1}, {0, -1, 1}, {1, -1, 1} }, // Plan 7
      { { -1, 1, -1}, { -1, 1, 0}, { -1, 1, 1}, {0, 0, -1}, {0, 0, 1}, {1, -1, -1}, {1, -1, 0}, {1, -1, 1} }, // Plan 8
      { {1, 1, -1}, {1, 1, 0}, {1, 1, 1}, {0, 0, -1}, {0, 0, 1}, { -1, -1, -1}, { -1, -1, 0}, { -1, -1, 1} }
    }; // Plan 9

    for (int k = 0; k < 9; ++k)        // Count the number of neighbors on each plan
    {
      byte count = 0;
      for (int l = 0; l < 8; ++l)
      {
        const int* pl = plan9[k][l];
        if (thin_img(p.x + pl[0], p.y + pl[1], p.z + pl[2])) ++count;
        if (count == 2) break;        // Early termination
      }

      if (count < 2) return true;
    }
  }

  // Else it's not...
  return false;
}

void collapseSkel3d::smoothEdt()
{
  const int width = dens.getWidth(), height = dens.getHeight(),
            depth = dens.getDepth();

  std::cout << "Smoothing EDT..." << std::endl;
  Volume<float> temp;
  temp.makeVolume(width, height, depth);
  temp = edt;

  for (int i = r; i < depth - r; ++i)
    for (int j = r; j < height - r; ++j)
      for (int k = r; k < width - r; ++k)
      {
        float avg = 0.0f, sum_w = 0.0f;
        for (int idx = 0, zz = -r; zz <= r; ++zz)
          for (int yy = -r; yy <= r; ++yy)
            for (int xx = -r; xx <= r; ++xx)
            {
              float w = diff_weights[idx];
              avg += w * temp(k + xx, j + yy, i + zz);
              sum_w += w;
            }

        edt(k, j, i) = avg / sum_w;
        if (edt(k, j, i) > 0.5f) thin_img(k, j, i) = 255;
      }
  temp.clearVolume();
}



void collapseSkel3d::simplify_skel(Volume<float> &lambda, float thr, float incr)
{
  const int width = lambda.getWidth(), height = lambda.getHeight(),
            depth = lambda.getDepth();
  static bool first = false;

  std::vector<Coord3> indices;

  if (!first)
  {
    for (int z = 1; z < depth - 1; z++)
      for (int y = 1; y < height - 1; y++)
        for (int x = 1; x < width - 1; x++)
        {
          thin_img(x, y, z) = 0;
          float l = lambda(x, y, z);
          if (l < thr) continue;            //small importance

          indices.push_back(Coord3(x, y, z, l));    //otherwise, consider for thinning
          thin_img(x, y, z) = 255;
        }
    first = true;
  }
  else
  {
    for (int z = 1; z < depth - 1; z++)
      for (int y = 1; y < height - 1; y++)
        for (int x = 1; x < width - 1; x++)
        {
          if (!thin_img(x, y, z))continue;

          int nb = 0;
          for (int zz = z - 1; zz <= z + 1; zz++)
            for (int yy = y - 1; yy <= y + 1; yy++)
              for (int xx = x - 1; xx <= x + 1; xx++)
                if (!thin_img(xx, yy, zz)) ++nb;

          float &l = lambda(x, y, z);

          if (!nb)
          {
            l = std::max(l, thr + 1e-7f);
            continue;
          }

          indices.push_back(Coord3(x, y, z, l));    //otherwise, consider for thinning
        }
  }

  int interface_points = indices.size();

  std::stable_sort(indices.begin(), indices.begin() + interface_points,
                   index_cmp_minv());
  std::vector<coord3s> thin_set;

  //for all points in importance order
  for (int i = 0; i < interface_points; i++)
  {
    const coord3s &c = indices[i].c;
    if (isSimple_tab(c)) thin_set.push_back(c);
  }

  interface_points = thin_set.size();

  for (int i = 0; i < interface_points; ++i)
  {
    const coord3s &c = thin_set[i];

    if (isSimple_tab(c))
    {
      thin_img(c) = 0;
      lambda(c) = thr - 1e-7f;
    }
    else
      lambda(c) = std::max(lambda(c), thr + 1e-7f);
  }
}



void collapseSkel3d::simplify_skel_filter(Volume<float> &lambda, float thr,
    float incr)
{
  const int width = lambda.getWidth(), height = lambda.getHeight(),
            depth = lambda.getDepth();

  Volume<float> tmp(lambda);

  for (int z = 1; z < depth - 1; z++)
    for (int y = 1; y < height - 1; y++)
      for (int x = 1; x < width - 1; x++)
      {

        thin_img[z][y][x] = 0;

        //small importance
        if (lambda[z][y][x] < thr)
        {
          continue;
        }

        thin_img[z][y][x] = 255;
      }

  for (int z = 1; z < depth - 1; z++)
    for (int y = 1; y < height - 1; y++)
      for (int x = 1; x < width - 1; x++)
      {

        if (thin_img[z][y][x]) continue;

        float avg_val = 0.0f;
        int nb = 0;
        for (int zz = z - 1; zz <= z + 1; zz++)
          for (int yy = y - 1; yy <= y + 1; yy++)
            for (int xx = x - 1; xx <= x + 1; xx++)
              if (thin_img[zz][yy][xx])
              {
                avg_val += lambda[zz][yy][xx];
                nb++;
              }

        if (nb < 9) continue;

        const float w = 0.1f;
        tmp[z][y][x] += w * (avg_val / nb); // + (1.0f-w)*lambda[z][y][x];
      }

  lambda = tmp;

}



//reaction part (as in reaction-xxx PDE)
void collapseSkel3d::reaction(const std::vector<coord3s> &ipoints,
                              Volume<float> &dens)
{
  for (int i = 0, size = ipoints.size(); i < size; i++)
  {
    const coord3s &c = ipoints[i];

    assert(!thin_img(c));

    if (dens[c.z][c.y][c.x] < 1.0f)
      dens[c.z][c.y][c.x] = 1.0f;
  }
}



// diffusion-like transport of density
float collapseSkel3d::transport_dens_diffusion(float weight,
    Volume<float> &dens, Volume<float> &dens_prev_, const Interface &ipoints)
{
  const int NP = ipoints.size();
  float tot_dens = 0.0f;
  static std::pair<float*, float>
  ball[8 * 8 * 8];       //Cache for thin-points and their weights, computed next
  //An entry stores the location of a density-value, and the weight to update it with
  for (int i = 0; i < NP;
       ++i)              //Thinned points push their density forward
  {
    const  Coord3 &C = ipoints[i];
    const  coord3s &c = C.c;                   //interface point
    float  dens_prev =
      C.v;                   //...and its cached dens_prev value (faster to access than volume indexing)
    float gx = 0.0f, gy = 0.0f, gz = 0.0f;
    float kval;

    if (!advect_vel)
    {
      const int rr = 2;           //1,2,4       //4D regression gradient
      for (int ii = -rr; ii <= rr; ++ii)
      {
        const Image<byte> &ti = thin_img[c.z + ii];
        for (int jj = -rr; jj <= rr; ++jj)
        {
          const byte* tj = ti[c.y + jj];
          for (int kk = -rr; kk <= rr; ++kk)
          {
            if (!tj[c.x + kk]) continue;
            //float fk  = 1.0f;         //dens_prev[c.z+ii][c.y+jj][c.x+kk];
            //float dst = fk/(ii*ii+jj*jj+kk*kk);
            gz += ii;
            gy += jj;
            gx += kk;
          }
        }
      }

      kval = weight * sqrtf(gx * gx + gy * gy + gz * gz) + 1e-7f;

      /*
      Volume<byte> &gimg = this->thin_img;
      //Volume<float>  &gimg = this->edt;
      int idx1 = 0; float gx = 0.0f, gy = 0.0f, gz = 0.0f, kval;
      for(int k=-1;k<=1;k++)
        for(int m=-1;m<=1;m++) {
      kval = kernel[idx1++];
      gx += kval*(gimg[c.z+k][c.y+m][c.x+1]-gimg[c.z+k][c.y+m][c.x-1]);
      gy += kval*(gimg[c.z+k][c.y+1][c.x+m]-gimg[c.z+k][c.y-1][c.x+m]);
      gz += kval*(gimg[c.z+1][c.y+k][c.x+m]-gimg[c.z-1][c.y+k][c.x+m]);
        }
      kval = sqrtf(gx*gx + gy*gy + gz*gz) + 1e-7f;
      */
      //If we estimate the velocity by the gradient, save it in v() for later use
      v(c) = Vector(gx / kval, gy / kval, gz / kval);
#if report_zero_vectors
      if (v(c).norm2() == 0.0f)
        std::cout << "zero velocity point: " << "(" << v(c).u << "," << v(c).v << "," << v(c).w << "), "
        << "edt :" << edt(c) << std::endl;
#endif
    }
    else
    {
      const Vector &vec = v(c);
      gx = vec.u;
      gy = vec.v;
      gz = vec.w;
      kval = vec.norm() + 1e-7f;
#if report_zero_vectors
      if (v(c).norm2() == 0.0f)
        std::cout << "zero velocity point: " << "(" << v(c).u << "," << v(c).v << "," << v(c).w << "), " 
        << "edt :" << edt(c) << std::endl;
#endif
    }

    gx /= kval;
    gy /= kval;
    gz /= kval;

    const float sigma2 = 0.15f; //0.15f
    /*
    int px=(int)(c.x+gx+0.5f);
    int py=(int)(c.y+gy+0.5f);
    int pz=(int)(c.z+gz+0.5f);

    bool do_dplane = false;
    float nx, ny, nz;
    if(!thin_img[pz][py][px]) {
      nx = gy*w[pz][py][px]-gz*v[pz][py][px];
      ny = gz*u[pz][py][px]-gx*w[pz][py][px];
      nz = gx*v[pz][py][px]-gy*u[pz][py][px];

      float nn = sqrtf(nx*nx+ny*ny+nz*nz);
      if(nn>1e-5f) {
    nx/=nn; ny/=nn; nz/=nn;
    do_dplane = true;
      }
    }
    */

    int B = 0;
    float sum_w = 0.0f;
    int ninside = 0;
    float *dw = diff_weights.get();
    auto dv = diff_vecs;
    int cX = c.x + r, cY = c.y + r, cZ = c.z + r;
    for (int zz = c.z - r; zz <= cZ; ++zz)
    {
      const Image<byte>  &ti = thin_img[zz];
      Image<float>       &di = dens[zz];
      for (int yy = c.y - r; yy <= cY; ++yy)
      {
        const byte*  tj = ti[yy];
        float*       dj = di[yy];
        for (int xx = c.x - r, ind = 0; xx <= cX; ++xx, ++dw, ++ind)
          if (tj[xx])
          {
            auto diff_vec = dv[ind];
            float dx = diff_vec[0] - gx, dy = diff_vec[1] - gy, dz = diff_vec[2] - gz;
            float dlen2 = dz * dz + dy * dy + dx * dx;
            /*
            if(do_dplane) {
              dlen2 = nx*(xx-c.x)+ny*(yy-c.y)+nz*(zz-c.z);
              dlen2 *= dlen2;
            }
            */
            //float w_grad = exp(-dlen2/(2.0f*sigma2));
            float w_grad = 1.0f + dlen2 / sigma2;
            float w = *dw / w_grad;
            sum_w += w;
            ++ninside;
            ball[B++] = std::make_pair(dj + xx,
                                  w);        //Add one density-location and its weight to the ball
          }
      }
    }
    //when thinning removes locally more than one voxel layer, this still happens
    //assert(sum_w>0.0f);

    float dprev = 0;                      //LUK: reaction
    if (sum_w > 0.0f)
      dprev = ((weight * dens_prev) + std::max(0.0f, ninside - 62.0f)) / sum_w;

#if 0
    //if(ninside<10)
    //dprev += dens_prev(c)*0.4f*(ninside-3.0f);
    //dprev += dens_prev(c)*0.02f*(ninside-10.0f);
    /*
    if(ninside>10 && ninside<20)
      //dprev += dens_prev(c)*0.005f*(ninside-10.0f);
       dprev = 0.0f;
    */
#endif

    for (--B; B >= 0;
         --B)                //Treat all points placed in the ball. This is much faster than
    {
      //reiterating over the 3D box and testing for thinness
      const std::pair<float*, float> &item = ball[B];
      *item.first += item.second *
                     dprev;         //Update density at stored location using stored weight
    }

    tot_dens += sum_w * dprev;

    //if(sum_w>0.0f) dens_prev(c) = 0.0f;
  }

  return tot_dens;
}





// advection transport of density
float collapseSkel3d::transport_dens_advection(float weight,
    Volume<float> &dens,
    Volume<float> &dens_prev,
    const Interface &ipoints)
{
  float tot_dens = 0.0f;

  //spline kernel
  static const float kernel[10] = {1.0f, 4.0f, 1.0f, 4.0f, 16.0f, 4.0f, 1.0f, 4.0f, 1.0f, 72.0f};

  for (size_t i = 0; i < ipoints.size(); i++)
  {
    const  coord3s &c = ipoints[i].c;              //interface p

    assert(!thin_img[c.z][c.y][c.x]);

    float gx = 0.0f, gy = 0.0f, gz = 0.0f, kval;

    //const bool flag = isRemovable(c.x, c.y, c.z, dens_prev);
    const bool flag = false;
    //const bool flag = true;

    if (flag)
    {
      Vector &vec = v(c);
      gx = vec.u;
      gy = vec.v;
      gz = vec.w;
      kval = vec.norm() + 1e-6f;
    }
    else
    {

      Volume<byte> &gimg = this->thin_img;
      //Volume<float>  &gimg = this->edt;

      int idx = 0;
      for (int k = -1; k <= 1; k++)
        for (int m = -1; m <= 1; m++)
        {
          kval = kernel[idx++];
          gx += kval * (gimg[c.z + k][c.y + m][c.x + 1] -
                        gimg[c.z + k][c.y + m][c.x - 1]);
          gy += kval * (gimg[c.z + k][c.y + 1][c.x + m] -
                        gimg[c.z + k][c.y - 1][c.x + m]);
          gz += kval * (gimg[c.z + 1][c.y + k][c.x + m] -
                        gimg[c.z - 1][c.y + k][c.x + m]);
        }
      kval = kernel[9];
      gx /= kval;
      gy /= kval;
      gz /= kval;
      kval = sqrtf(gx * gx + gy * gy + gz * gz) + 1e-6f;
    }

    if (kval > 1e-7f)
    {
      gx /= kval;
      gy /= kval;
      gz /= kval;

      gx += c.x;
      gy += c.y;
      gz += c.z;

      int i0 = (int)gx, i1 = i0 + 1, j0 = (int)gy, j1 = j0 + 1, k0 = (int)gz,
          k1 = k0 + 1;
      float s1 = gx - i0, s0 = 1 - s1, t1 = gy - j0, t0 = 1 - t1, m1 = gz - k0,
            m0 = 1 - m1;

      float sum_w = 0.0f;
      if (thin_img[k0][j0][i0])
        sum_w += s0 * t0 * m0;
      if (thin_img[k0][j0][i1])
        sum_w += s1 * t0 * m0;
      if (thin_img[k0][j1][i0])
        sum_w += s0 * t1 * m0;
      if (thin_img[k0][j1][i1])
        sum_w += s1 * t1 * m0;
      if (thin_img[k1][j0][i0])
        sum_w += s0 * t0 * m1;
      if (thin_img[k1][j0][i1])
        sum_w += s1 * t0 * m1;
      if (thin_img[k1][j1][i0])
        sum_w += s0 * t1 * m1;
      if (thin_img[k1][j1][i1])
        sum_w += s1 * t1 * m1;

      if (sum_w > 1e-7f)
      {
        float rho_remaining = (weight * dens_prev[c.z][c.y][c.x]) / sum_w;
        /*
        float diff = weight*std::max(1.0f-dens_prev[c.z][c.y][c.x], 0.0f);
        rho_remaining += (sum_w>0.0f) ? 1.0f*diff/sum_w : 0.0f;
        */

        if (thin_img[k0][j0][i0])
          dens[k0][j0][i0] += s0 * t0 * m0 * rho_remaining;
        if (thin_img[k0][j0][i1])
          dens[k0][j0][i1] += s1 * t0 * m0 * rho_remaining;
        if (thin_img[k0][j1][i0])
          dens[k0][j1][i0] += s0 * t1 * m0 * rho_remaining;
        if (thin_img[k0][j1][i1])
          dens[k0][j1][i1] += s1 * t1 * m0 * rho_remaining;
        if (thin_img[k1][j0][i0])
          dens[k1][j0][i0] += s0 * t0 * m1 * rho_remaining;
        if (thin_img[k1][j0][i1])
          dens[k1][j0][i1] += s1 * t0 * m1 * rho_remaining;
        if (thin_img[k1][j1][i0])
          dens[k1][j1][i0] += s0 * t1 * m1 * rho_remaining;
        if (thin_img[k1][j1][i1])
          dens[k1][j1][i1] += s1 * t1 * m1 * rho_remaining;

        tot_dens += sum_w * rho_remaining;
        //dens_prev[c.z][c.y][c.x] = 0.0f;

      }
      else
      {
        printf("sum_w\n");
        dens[c.z][c.y][c.x] = dens_prev[c.z][c.y][c.x];
      }
    }
    else
    {
      printf("kval\n");
      dens[c.z][c.y][c.x] = dens_prev[c.z][c.y][c.x];
    }
  }

  return tot_dens;
}


// advection transport of density
float collapseSkel3d::transport_dens_advection_fwd(float weight,
    Volume<float> &dens,
    Volume<float> &dens_prev, Volume<float> &tmp_dens,
    std::vector<coord3s> &ipoints)
{
  float tot_dens = 0.0f;

  //spline kernel
  static const float kernel[10] = {1.0f, 4.0f, 1.0f, 4.0f, 16.0f, 4.0f, 1.0f, 4.0f, 1.0f, 72.0f};

  for (size_t i = 0; i < ipoints.size(); i++)
  {
    const coord3s &c = ipoints[i];

    assert(!thin_img[c.z][c.y][c.x]);

    if (tmp_dens[c.z][c.y][c.x] >= 1.0f) continue;

    float gx = 0.0f, gy = 0.0f, gz = 0.0f, kval;

    //const bool flag = isRemovable(c.x, c.y, c.z, dens_prev);
    const bool flag = false;
    //const bool flag = true;

    if (flag)
    {
      const Vector &vec = v(c);
      gx = vec.u;
      gy = vec.v;
      gz = vec.w;
      kval = vec.norm() + 1e-6f;
    }
    else
    {

      Volume<byte> &gimg = this->thin_img;
      //Volume<float>  &gimg = this->edt;

      int idx = 0;
      for (int k = -1; k <= 1; k++)
        for (int m = -1; m <= 1; m++)
        {
          kval = kernel[idx++];
          gx += kval * (gimg[c.z + k][c.y + m][c.x + 1] -
                        gimg[c.z + k][c.y + m][c.x - 1]);
          gy += kval * (gimg[c.z + k][c.y + 1][c.x + m] -
                        gimg[c.z + k][c.y - 1][c.x + m]);
          gz += kval * (gimg[c.z + 1][c.y + k][c.x + m] -
                        gimg[c.z - 1][c.y + k][c.x + m]);
        }
      kval = sqrtf(gx * gx + gy * gy + gz * gz);
    }

    if (kval > 1e-7f)
    {
      gx /= kval;
      gy /= kval;
      gz /= kval;

      gx += c.x;
      gy += c.y;
      gz += c.z;

      int i0 = (int)gx, i1 = i0 + 1, j0 = (int)gy, j1 = j0 + 1, k0 = (int)gz,
          k1 = k0 + 1;
      float s1 = gx - i0, s0 = 1 - s1, t1 = gy - j0, t0 = 1 - t1, m1 = gz - k0,
            m0 = 1 - m1;

      float sum_w = 0.0f;
      if (thin_img[k0][j0][i0])
        sum_w += s0 * t0 * m0;
      if (thin_img[k0][j0][i1])
        sum_w += s1 * t0 * m0;
      if (thin_img[k0][j1][i0])
        sum_w += s0 * t1 * m0;
      if (thin_img[k0][j1][i1])
        sum_w += s1 * t1 * m0;
      if (thin_img[k1][j0][i0])
        sum_w += s0 * t0 * m1;
      if (thin_img[k1][j0][i1])
        sum_w += s1 * t0 * m1;
      if (thin_img[k1][j1][i0])
        sum_w += s0 * t1 * m1;
      if (thin_img[k1][j1][i1])
        sum_w += s1 * t1 * m1;

      if (sum_w > 1e-7f)
      {
        //float rho_remaining = (weight*dens_prev[c.z][c.y][c.x]);
        float rho_remaining = (weight * (1.0f - tmp_dens[c.z][c.y][c.x]) *
                               dens_prev[c.z][c.y][c.x]);
        /*
        float diff = weight*std::max(1.0f-dens_prev[c.z][c.y][c.x], 0.0f);
        rho_remaining += (sum_w>0.0f) ? 1.0f*diff/sum_w : 0.0f;
        */

        if (thin_img[k0][j0][i0])
          dens[k0][j0][i0] += s0 * t0 * m0 * rho_remaining;
        if (thin_img[k0][j0][i1])
          dens[k0][j0][i1] += s1 * t0 * m0 * rho_remaining;
        if (thin_img[k0][j1][i0])
          dens[k0][j1][i0] += s0 * t1 * m0 * rho_remaining;
        if (thin_img[k0][j1][i1])
          dens[k0][j1][i1] += s1 * t1 * m0 * rho_remaining;
        if (thin_img[k1][j0][i0])
          dens[k1][j0][i0] += s0 * t0 * m1 * rho_remaining;
        if (thin_img[k1][j0][i1])
          dens[k1][j0][i1] += s1 * t0 * m1 * rho_remaining;
        if (thin_img[k1][j1][i0])
          dens[k1][j1][i0] += s0 * t1 * m1 * rho_remaining;
        if (thin_img[k1][j1][i1])
          dens[k1][j1][i1] += s1 * t1 * m1 * rho_remaining;

        tot_dens += rho_remaining;
        //tot_dens += sum_w*rho_remaining;
        //dens_prev[c.z][c.y][c.x] = 0.0f;

      }
      else
      {
        printf("sum_w\n");
        dens[c.z][c.y][c.x] = dens_prev[c.z][c.y][c.x];
      }
    }
    else
    {
      printf("kval\n");
      dens[c.z][c.y][c.x] = dens_prev[c.z][c.y][c.x];
    }
  }

  return tot_dens;
}

void collapseSkel3d::potential_new_interface(std::vector<coord3s> &ipoints,
    std::vector<coord3s> &new_ipoints)
{
  const int rad = 1;

  for (size_t i = 0; i < ipoints.size(); ++i)
  {
    const coord3s &c = ipoints[i];

    assert(!thin_img[c.z][c.y][c.x]);

    for (int zz = c.z - rad; zz <= c.z + rad; zz++)
      for (int yy = c.y - rad; yy <= c.y + rad; yy++)
        for (int xx = c.x - rad; xx <= c.x + rad; xx++)
        {

          if (!thin_img[zz][yy][xx]) continue;

          new_ipoints.push_back(coord3s(xx, yy, zz));
        }
  }

  std::sort(new_ipoints.begin(), new_ipoints.end());
  new_ipoints.erase( unique( new_ipoints.begin(), new_ipoints.end() ),
                     new_ipoints.end() );
}

float collapseSkel3d::transport_dens_advection_back(float weight,
    Volume<float> &dens,
    Volume<float> &dens_prev, Volume<float> &tmp_dens,
    std::vector<coord3s> &new_ipoints,
    std::vector<coord3s> &ipoints)
{
  //spline kernel
  static const float kernel[10] = {1.0f, 4.0f, 1.0f, 4.0f, 16.0f, 4.0f, 1.0f, 4.0f, 1.0f, 72.0f};

  float tdens = 0.0f;

  for (size_t i = 0; i < new_ipoints.size(); ++i)
  {
    const coord3s &c = new_ipoints[i];

    assert(thin_img[c.z][c.y][c.x]);

    Volume<byte> &gimg = this->thin_img;

    float gx = 0.0f, gy = 0.0f, gz = 0.0f, kval;

    int idx = 0;
    for (int k = -1; k <= 1; k++)
      for (int m = -1; m <= 1; m++)
      {
        kval = kernel[idx++];
        gx += kval * (gimg[c.z + k][c.y + m][c.x + 1] -
                      gimg[c.z + k][c.y + m][c.x - 1]);
        gy += kval * (gimg[c.z + k][c.y + 1][c.x + m] -
                      gimg[c.z + k][c.y - 1][c.x + m]);
        gz += kval * (gimg[c.z + 1][c.y + k][c.x + m] -
                      gimg[c.z - 1][c.y + k][c.x + m]);
      }
    kval = sqrtf(gx * gx + gy * gy + gz * gz);

    if (kval < 1e-7f)
    {
      printf("advection_back kval\n");
      continue;
    }

    gx /= kval;
    gy /= kval;
    gz /= kval;

    float z = c.z - gz, y = c.y - gy, x = c.x - gx;

    /*
    int i0=(int)(x+0.5f), j0=(int)(y+0.5f), k0=(int)(z+0.5f);

    if(thin_img[k0][j0][i0]) continue;

    float rho_remaining = (weight*dens_prev[k0][j0][i0]);

    dens[c.z][c.y][c.x] += rho_remaining;
    */


    int i0 = (int)x, i1 = i0 + 1, j0 = (int)y, j1 = j0 + 1, k0 = (int)z,
        k1 = k0 + 1;
    float s1 = x - i0, s0 = 1 - s1, t1 = y - j0, t0 = 1 - t1, m1 = z - k0,
          m0 = 1 - m1;

    if (!thin_img[k0][j0][i0])
    {
      dens[c.z][c.y][c.x] += s0 * t0 * m0 * dens_prev[k0][j0][i0];
      tdens += s0 * t0 * m0 * dens_prev[k0][j0][i0];
      tmp_dens[k0][j0][i0] += s0 * t0 * m0;
    }
    if (!thin_img[k0][j0][i1])
    {
      dens[c.z][c.y][c.x] += s1 * t0 * m0 * dens_prev[k0][j0][i1];
      tdens += s1 * t0 * m0 * dens_prev[k0][j0][i1];
      tmp_dens[k0][j0][i1] += s1 * t0 * m0;
    }
    if (!thin_img[k0][j1][i0])
    {
      dens[c.z][c.y][c.x] += s0 * t1 * m0 * dens_prev[k0][j1][i0];
      tdens += s0 * t1 * m0 * dens_prev[k0][j1][i0];
      tmp_dens[k0][j1][i0] += s0 * t1 * m0;
    }
    if (!thin_img[k0][j1][i1])
    {
      dens[c.z][c.y][c.x] += s1 * t1 * m0 * dens_prev[k0][j1][i1];
      tdens += s1 * t1 * m0 * dens_prev[k0][j1][i1];
      tmp_dens[k0][j1][i1] += s1 * t1 * m0;
    }
    if (!thin_img[k1][j0][i0])
    {
      dens[c.z][c.y][c.x] += s0 * t0 * m1 * dens_prev[k1][j0][i0];
      tdens += s0 * t0 * m1 * dens_prev[k1][j0][i0];
      tmp_dens[k1][j0][i0] += s0 * t0 * m1;
    }
    if (!thin_img[k1][j0][i1])
    {
      dens[c.z][c.y][c.x] += s1 * t0 * m1 * dens_prev[k1][j0][i1];
      tdens += s1 * t0 * m1 * dens_prev[k1][j0][i1];
      tmp_dens[k1][j0][i1] += s1 * t0 * m1;
    }
    if (!thin_img[k1][j1][i0])
    {
      dens[c.z][c.y][c.x] += s0 * t1 * m1 * dens_prev[k1][j1][i0];
      tdens += s0 * t1 * m1 * dens_prev[k1][j1][i0];
      tmp_dens[k1][j1][i0] += s0 * t1 * m1;
    }
    if (!thin_img[k1][j1][i1])
    {
      dens[c.z][c.y][c.x] += s1 * t1 * m1 * dens_prev[k1][j1][i1];
      tdens += s1 * t1 * m1 * dens_prev[k1][j1][i1];
      tmp_dens[k1][j1][i1] += s1 * t1 * m1;
    }
  }

  return tdens;

}


float collapseSkel3d::transport_dens_advection_project(float weight,
    std::vector<coord3s> &out_coords, Volume<float> &tmp_dens, Volume<float> &dens,
    Volume<float> &dens_prev,
    std::vector<coord3s> &ipoints)
{
  float tot_dens = 0.0f;


#if 0
  const int rad = 1;

  for (int i = 0; i < out_coords.size(); i++)
  {
    const coord3s &c = out_coords[i];

    assert(!thin_img[c.z][c.y][c.x]);

    int search = true;

    for (int zz = c.z - rad; zz <= c.z + rad; zz++)
      for (int yy = c.y - rad; yy <= c.y + rad; yy++)
        for (int xx = c.x - rad; xx <= c.x + rad; xx++)
        {

          //if(!search) continue;

          if (!thin_img[zz][yy][xx]) continue;

          //LUK: problem see box.fld
          //printf("%f %f %f\n", dens[zz][yy][xx], dens_prev[zz][yy][xx], tmp_dens[c.z][c.y][c.x]);

          //if( dens[zz][yy][xx]>1.99f+dens_prev[zz][yy][xx] ) continue;
          if ( dens[zz][yy][xx] > dens_prev[zz][yy][xx] ) continue;

          if (isSimple_tab(coord3s(xx, yy, zz)))
          {
            dens[zz][yy][xx] += tmp_dens[c.z][c.y][c.x];
            tot_dens += tmp_dens[c.z][c.y][c.x];
            //tmp_dens[c.z][c.y][c.x] = 0.0f;
            search = false;
            break;
          }
        }

    if (search)
    {
      dens[c.z][c.y][c.x] += tmp_dens[c.z][c.y][c.x];
      //tmp_dens[c.z][c.y][c.x] = 0.0f;
      printf("bla\n");
    }

  }


  return tot_dens;
  ////////////////////////////////
#endif




  for (size_t i = 0; i < out_coords.size(); ++i)
  {
    const coord3s &c = out_coords[i];

    assert(!thin_img(c));

    const int x = c.x;
    const int y = c.y;
    const int z = c.z;

    if (tmp_dens[z][y][x] < 1e-7f) continue;


    float gx = 0.0f, gy = 0.0f, gz = 0.0f, kval;

    const Vector &vec = v(x, y, z);
    gx = vec.u;
    gy = vec.v;
    gz = vec.w;

    kval = vec.norm();

#if 1
    if (kval > 1e-7f)
    {
      gx /= kval;
      gy /= kval;
      gz /= kval;

      for (float ll = -3.5f; ll < 3.6f; ll += 0.01f)
      {

        //gx+=x; gy+=y; gz+=z;
        float gx1 = x + ll * gx;
        float gy1 = y + ll * gy;
        float gz1 = z + ll * gz;

        /*
            int i0=(int)gx1, i1=i0+1, j0=(int)gy1, j1=j0+1, k0=(int)gz1, k1=k0+1;
            float s1 = gx1-i0, s0 = 1-s1, t1 = gy1-j0, t0 = 1-t1, m1 = gz1-k0, m0 = 1-m1;

            float sum_w = 0.0f;
            if(thin_img[k0][j0][i0])
        sum_w += s0*t0*m0;
            if(thin_img[k0][j0][i1])
        sum_w += s1*t0*m0;
            if(thin_img[k0][j1][i0])
        sum_w += s0*t1*m0;
            if(thin_img[k0][j1][i1])
        sum_w += s1*t1*m0;
            if(thin_img[k1][j0][i0])
        sum_w += s0*t0*m1;
            if(thin_img[k1][j0][i1])
        sum_w += s1*t0*m1;
            if(thin_img[k1][j1][i0])
        sum_w += s0*t1*m1;
            if(thin_img[k1][j1][i1])
        sum_w += s1*t1*m1;
        */

        int i0 = (int)(gx1 + 0.5f), j0 = (int)(gy1 + 0.5f), k0 = (int)(gz1 + 0.5f);
        float s0 = 1, t0 = 1, m0 = 1;

        float sum_w = 0.0f;
        if (thin_img[k0][j0][i0])
          sum_w += s0 * t0 * m0;


        if (sum_w > 1e-7f)
        {
          float rho_remaining = (weight * tmp_dens[z][y][x]) / sum_w;
          /*
          float diff = weight*std::max(1.0f-dens_prev[c.z][c.y][c.x], 0.0f);
          rho_remaining += (sum_w>0.0f) ? 1.0f*diff/sum_w : 0.0f;
          */

          if (thin_img[k0][j0][i0])
          {
            dens[k0][j0][i0] += s0 * t0 * m0 * rho_remaining;

            tot_dens += sum_w * rho_remaining;

            tmp_dens[z][y][x] = 0.0f;
          }

          /*
          if(thin_img[k0][j0][i0])
            dens[k0][j0][i0] += s0*t0*m0*rho_remaining;
          if(thin_img[k0][j0][i1])
            dens[k0][j0][i1] += s1*t0*m0*rho_remaining;
          if(thin_img[k0][j1][i0])
            dens[k0][j1][i0] += s0*t1*m0*rho_remaining;
          if(thin_img[k0][j1][i1])
            dens[k0][j1][i1] += s1*t1*m0*rho_remaining;
          if(thin_img[k1][j0][i0])
            dens[k1][j0][i0] += s0*t0*m1*rho_remaining;
          if(thin_img[k1][j0][i1])
            dens[k1][j0][i1] += s1*t0*m1*rho_remaining;
          if(thin_img[k1][j1][i0])
            dens[k1][j1][i0] += s0*t1*m1*rho_remaining;
          if(thin_img[k1][j1][i1])
            dens[k1][j1][i1] += s1*t1*m1*rho_remaining;

          tot_dens += sum_w*rho_remaining;

          tmp_dens[z][y][x]=0.0f;
          */

          break;

        }
        else
        {
          //printf("prj sum_w %f\n", ll);
          //dens[z][y][x] = dens_prev[z][y][x];

          //printf("%f %f %f\n", gx-x, gy-y, gz-z);
        }
      }//end ll

      if (tmp_dens[z][y][x] > 1e-7f)
      {
        printf("prj sum_w\n");
        //assert(edt[z][y][x]<3.1f);
        //tot_dens += tmp_dens[z][y][x];
        dens[z][y][x] += tmp_dens[z][y][x];
      }

    }
    else
    {
      printf("prj kval\n");
      //dens[z][y][x] = dens_prev[z][y][x];
    }
#endif

  }

  /*
  for(int i=0;i<ipoints.size();i++) {
   const coord3s &c = ipoints[i];

   thin_img[c.z][c.y][c.x] = 0;
  }
  */
  return tot_dens;
}

float collapseSkel3d::transport_dens_advection_unconstrained(float weight,
    std::vector<coord3s> &out_coords, Volume<float> &tmp_dens, Volume<float> &dens,
    Volume<float> &dens_prev,
    std::vector<coord3s> &ipoints)
{
  float tot_dens = 0.0f;

  float lost_dens = 0.0f;

  //spline kernel
  static const float kernel[10] = {1.0f, 4.0f, 1.0f, 4.0f, 16.0f, 4.0f, 1.0f, 4.0f, 1.0f, 72.0f};

  for (size_t i = 0; i < ipoints.size(); ++i)
  {
    const coord3s &c = ipoints[i];

    assert(!thin_img[c.z][c.y][c.x]);

    float gx = 0.0f, gy = 0.0f, gz = 0.0f, kval;

    //const bool flag = isRemovable(c.x, c.y, c.z, dens_prev);
    //const bool flag = false;
    const bool flag = true;

    if (flag)
    {
      const Vector &vec = v(c);
      gx = vec.u;
      gy = vec.v;
      gz = vec.w;
      kval = vec.norm();
      //kval = std::max(fabsf(gx), std::max(fabsf(gy), fabsf(gz)));
    }
    else
    {

      Volume<byte> &gimg = this->thin_img;
      //Volume<float>  &gimg = this->edt;

      int idx = 0;
      for (int k = -1; k <= 1; k++)
        for (int m = -1; m <= 1; m++)
        {
          kval = kernel[idx++];
          gx += kval * (gimg[c.z + k][c.y + m][c.x + 1] -
                        gimg[c.z + k][c.y + m][c.x - 1]);
          gy += kval * (gimg[c.z + k][c.y + 1][c.x + m] -
                        gimg[c.z + k][c.y - 1][c.x + m]);
          gz += kval * (gimg[c.z + 1][c.y + k][c.x + m] -
                        gimg[c.z - 1][c.y + k][c.x + m]);
        }
      kval = kernel[9];
      gx /= kval;
      gy /= kval;
      gz /= kval;
      kval = sqrtf(gx * gx + gy * gy + gz * gz) + 1e-6f;
    }

    if (kval > 1e-7f)
    {
      gx /= kval;
      gy /= kval;
      gz /= kval;

      gx += c.x;
      gy += c.y;
      gz += c.z;

      /*
      int i0=(int)gx, i1=i0+1, j0=(int)gy, j1=j0+1, k0=(int)gz, k1=k0+1;
      float s1 = gx-i0, s0 = 1-s1, t1 = gy-j0, t0 = 1-t1, m1 = gz-k0, m0 = 1-m1;

      float sum_w = 0.0f;
      if(thin_img[k0][j0][i0])
      sum_w += s0*t0*m0;
      if(thin_img[k0][j0][i1])
      sum_w += s1*t0*m0;
      if(thin_img[k0][j1][i0])
      sum_w += s0*t1*m0;
      if(thin_img[k0][j1][i1])
      sum_w += s1*t1*m0;
      if(thin_img[k1][j0][i0])
      sum_w += s0*t0*m1;
      if(thin_img[k1][j0][i1])
      sum_w += s1*t0*m1;
      if(thin_img[k1][j1][i0])
      sum_w += s0*t1*m1;
      if(thin_img[k1][j1][i1])
      sum_w += s1*t1*m1;

      //LUK:
      sum_w = 1.0f;
      */

      int i0 = (int)(gx + 0.5f), j0 = (int)(gy + 0.5f), k0 = (int)(gz + 0.5f);
      float s0 = 1, t0 = 1, m0 = 1;

      float sum_w = 0.0f;
      sum_w += s0 * t0 * m0;


      if (sum_w > 1e-7f)
      {
        float rho_remaining = (weight * dens_prev[c.z][c.y][c.x]) / sum_w;
        /*
        float diff = weight*std::max(1.0f-dens_prev[c.z][c.y][c.x], 0.0f);
        rho_remaining += diff/sum_w;
        */

        if (thin_img[k0][j0][i0])
        {
          dens[k0][j0][i0] += s0 * t0 * m0 * rho_remaining;
          tot_dens += sum_w * rho_remaining;
        }
        else
        {

          out_coords.push_back(coord3s(i0, j0, k0));
          tmp_dens[k0][j0][i0] += s0 * t0 * m0 * rho_remaining;

          /*
          out_coords.push_back(coord3s(c.x, c.y, c.z));
          tmp_dens[c.z][c.y][c.x] = s0*t0*m0*rho_remaining;
          */
          lost_dens += sum_w * rho_remaining;

        }

        /*
          if(thin_img[k0][j0][i0])
            dens[k0][j0][i0] += s0*t0*m0*rho_remaining;
          else {
            out_coords.push_back(coord3s(i0, j0, k0));
            tmp_dens[k0][j0][i0] += s0*t0*m0*rho_remaining;
          }
          if(thin_img[k0][j0][i1])
            dens[k0][j0][i1] += s1*t0*m0*rho_remaining;
          else {
            out_coords.push_back(coord3s(i1, j0, k0));
            tmp_dens[k0][j0][i1] += s1*t0*m0*rho_remaining;
          }
          if(thin_img[k0][j1][i0])
            dens[k0][j1][i0] += s0*t1*m0*rho_remaining;
          else {
            out_coords.push_back(coord3s(i0, j1, k0));
            tmp_dens[k0][j1][i0] += s0*t1*m0*rho_remaining;
          }
          if(thin_img[k0][j1][i1])
            dens[k0][j1][i1] += s1*t1*m0*rho_remaining;
          else {
            out_coords.push_back(coord3s(i1, j1, k0));
            tmp_dens[k0][j1][i1] += s1*t1*m0*rho_remaining;
          }
          if(thin_img[k1][j0][i0])
            dens[k1][j0][i0] += s0*t0*m1*rho_remaining;
          else {
            out_coords.push_back(coord3s(i0, j0, k1));
            tmp_dens[k1][j0][i0] += s0*t0*m1*rho_remaining;
          }
          if(thin_img[k1][j0][i1])
            dens[k1][j0][i1] += s1*t0*m1*rho_remaining;
          else {
            out_coords.push_back(coord3s(i1, j0, k1));
            tmp_dens[k1][j0][i1] += s1*t0*m1*rho_remaining;
          }
          if(thin_img[k1][j1][i0])
            dens[k1][j1][i0] += s0*t1*m1*rho_remaining;
          else {
            out_coords.push_back(coord3s(i0, j1, k1));
            tmp_dens[k1][j1][i0] += s0*t1*m1*rho_remaining;
          }
          if(thin_img[k1][j1][i1])
            dens[k1][j1][i1] += s1*t1*m1*rho_remaining;
          else {
            out_coords.push_back(coord3s(i1, j1, k1));
            tmp_dens[k1][j1][i1] += s1*t1*m1*rho_remaining;
          }
        */

        //tot_dens += sum_w*rho_remaining;
      }
      else
      {
        printf("unc sum_w\n");
        dens[c.z][c.y][c.x] = dens_prev[c.z][c.y][c.x];
      }
    }
    else
    {
      printf("unc kval\n");
      dens[c.z][c.y][c.x] = dens_prev[c.z][c.y][c.x];
    }
  }

  if (lost_dens > 1e-7f)
    printf("lost: %f\n", lost_dens);



  return tot_dens;
}

// advection transport of velocity/momentum
void collapseSkel3d::transport_vel_advection(float weight, Volume<float> &dens,
    Volume<Vector> &v,
    std::vector<coord3s> &ipoints)
{
  for (size_t i = 0; i < ipoints.size(); ++i)
  {
    const coord3s &c = ipoints[i];

    assert(!thin_img[c.z][c.y][c.x]);

    const Vector &vec = v(c);
    float gx = vec.u;
    float gy = vec.v;
    float gz = vec.w;
    float kval = sqrtf(gx * gx + gy * gy + gz * gz);
    //float kval = dens[c.z][c.y][c.x];

    if (kval > 1e-7f)
    {
      //float gx1=gx, gy1=gy, gz1=gz;

      gx /= kval;
      gy /= kval;
      gz /= kval;

      float gx1 = gx, gy1 = gy, gz1 = gz;

      gx += c.x;
      gy += c.y;
      gz += c.z;

      int i0 = (int)gx, i1 = i0 + 1, j0 = (int)gy, j1 = j0 + 1, k0 = (int)gz,
          k1 = k0 + 1;
      float s1 = gx - i0, s0 = 1 - s1, t1 = gy - j0, t0 = 1 - t1, m1 = gz - k0,
            m0 = 1 - m1;

      float sum_w = 0.0f;
      if (thin_img[k0][j0][i0])
        sum_w += s0 * t0 * m0;
      if (thin_img[k0][j0][i1])
        sum_w += s1 * t0 * m0;
      if (thin_img[k0][j1][i0])
        sum_w += s0 * t1 * m0;
      if (thin_img[k0][j1][i1])
        sum_w += s1 * t1 * m0;
      if (thin_img[k1][j0][i0])
        sum_w += s0 * t0 * m1;
      if (thin_img[k1][j0][i1])
        sum_w += s1 * t0 * m1;
      if (thin_img[k1][j1][i0])
        sum_w += s0 * t1 * m1;
      if (thin_img[k1][j1][i1])
        sum_w += s1 * t1 * m1;

      if (sum_w > 1e-7f)
      {

        //advect momentum field
        float rho_remaining = (weight * dens[c.z][c.y][c.x]) / sum_w;

        //advect velocity field
        //float rho_remaining = weight/sum_w;


        if (thin_img[k0][j0][i0])
        {
          Vector &vec = v(i0, j0, k0);
          vec.u += s0 * t0 * m0 * rho_remaining * gx1;
          vec.v += s0 * t0 * m0 * rho_remaining * gy1;
          vec.w += s0 * t0 * m0 * rho_remaining * gz1;
        }
        if (thin_img[k0][j0][i1])
        {
          Vector &vec = v(i1, j0, k0);
          vec.u += s1 * t0 * m0 * rho_remaining * gx1;
          vec.v += s1 * t0 * m0 * rho_remaining * gy1;
          vec.w += s1 * t0 * m0 * rho_remaining * gz1;
        }
        if (thin_img[k0][j1][i0])
        {
          Vector &vec = v(i0, j1, k0);
          vec.u += s0 * t1 * m0 * rho_remaining * gx1;
          vec.v += s0 * t1 * m0 * rho_remaining * gy1;
          vec.w += s0 * t1 * m0 * rho_remaining * gz1;
        }
        if (thin_img[k0][j1][i1])
        {
          Vector &vec = v(i1, j1, k0);
          vec.u += s1 * t1 * m0 * rho_remaining * gx1;
          vec.v += s1 * t1 * m0 * rho_remaining * gy1;
          vec.w += s1 * t1 * m0 * rho_remaining * gz1;
        }
        if (thin_img[k1][j0][i0])
        {
          Vector &vec = v(i0, j0, k1);
          vec.u += s0 * t0 * m1 * rho_remaining * gx1;
          vec.v += s0 * t0 * m1 * rho_remaining * gy1;
          vec.w += s0 * t0 * m1 * rho_remaining * gz1;
        }
        if (thin_img[k1][j0][i1])
        {
          Vector &vec = v(i1, j0, k1);
          vec.u += s1 * t0 * m1 * rho_remaining * gx1;
          vec.v += s1 * t0 * m1 * rho_remaining * gy1;
          vec.w += s1 * t0 * m1 * rho_remaining * gz1;
        }
        if (thin_img[k1][j1][i0])
        {
          Vector &vec = v(i0, j1, k1);
          vec.u += s0 * t1 * m1 * rho_remaining * gx1;
          vec.v += s0 * t1 * m1 * rho_remaining * gy1;
          vec.w += s0 * t1 * m1 * rho_remaining * gz1;
        }
        if (thin_img[k1][j1][i1])
        {
          Vector &vec = v(i1, j1, k1);
          vec.u += s1 * t1 * m1 * rho_remaining * gx1;
          vec.v += s1 * t1 * m1 * rho_remaining * gy1;
          vec.w += s1 * t1 * m1 * rho_remaining * gz1;
        }
      }
    }
  }
}




// advection transport of velocity/momentum
void collapseSkel3d::transport_vel_diffusion(float weight,
    const Interface &ipoints)
{
  static std::pair<Vector*, float>
  ball[8 * 8 * 8];      //Cache for points to be updated. Same idea as in transport_dens_diffusion.

  int NP = ipoints.size();

  for (int i = 0; i < NP; ++i)
  {
    const Coord3 &C = ipoints[i];
    const coord3s &c = C.c;

    const Vector &vec = v(c);
    float gx = vec.u;
    float gy = vec.v;
    float gz = vec.w;
    float kval = vec.norm2();
    if (kval < 1e-10f) continue;

    kval = sqrtf(kval);
    gx /= kval;
    gy /= kval;
    gz /= kval;

    /*
    float *diff_vec;
    const float sigma2 = 0.05f;
    */

    int B = 0;
    float sum_w = 0.0f;
    float* dw = diff_weights.get();
    int ninside = 0;
    int cZ = c.z + r, cY = c.y + r, cX = c.x + r;
    for (int zz = c.z - r; zz <= cZ; ++zz)
    {
      Image<Vector> &iv = v[zz];
      const Image<byte>  &it = thin_img[zz];
      for (int yy = c.y - r; yy <= cY; ++yy)
      {
        Vector*     vv = iv[yy];
        const byte* vt = it[yy];
        for (int xx = c.x - r; xx <= cX; ++xx, ++dw)
        {
          if (!vt[xx]) continue;
          /*
          diff_vec = diff_vecs[idx];
          float dx=diff_vec[0]-gx, dy=diff_vec[1]-gy, dz=diff_vec[2]-gz;
          float dlen2 = dz*dz + dy*dy + dx*dx;
          //float w_grad = exp(-dlen2/(2.0f*sigma2));
          float w_grad = 1.0f/(1.0f + dlen2/sigma2);
          sum_w += (*dw)*w_grad;
          */
          float w = *dw;
          sum_w += w;
          ball[B++] = std::make_pair(&vv[xx], w);
          ++ninside;
        }
      }
    }

    if (!ninside) continue;

    //when thinning removes locally more than one voxel layer, this still happens
    //assert(sum_w>0.0f);

    float dprev = ipoints[i].v /
                  sum_w;               //Update velocity vector for cached points.
    gx *= dprev;
    gy *= dprev;
    gz *= dprev;
    for (--B; B >= 0; --B)
    {
      const std::pair<Vector*, float> &item = ball[B];
      float val = item.second;
      Vector &vec = *item.first;
      vec.u += val * gx;
      vec.v += val * gy;
      vec.w += val * gz;
    }
  }
}



bool collapseSkel3d::isRemovable(int x, int y, int z, Volume<float> &dens)
{
  coord3s c(x, y, z);

  float dd = dens(c) + 1e-7f;
  const Vector &vec = v(c);
  float gx = vec.u / dd;
  float gy = vec.v / dd;
  float gz = vec.w / dd;
  float kval = vec.norm();

  int nn = 0;

  if (kval > 1e-7f)
  {
    gx /= kval;
    gy /= kval;
    gz /= kval;

    gx += c.x;
    gy += c.y;
    gz += c.z;

    int i0 = (int)gx, i1 = i0 + 1, j0 = (int)gy, j1 = j0 + 1, k0 = (int)gz,
        k1 = k0 + 1;

    if (thin_img[k0][j0][i0]) nn++;

    if (thin_img[k0][j0][i1]) nn++;

    if (thin_img[k0][j1][i0]) nn++;

    if (thin_img[k0][j1][i1]) nn++;

    if (thin_img[k1][j0][i0]) nn++;

    if (thin_img[k1][j0][i1]) nn++;

    if (thin_img[k1][j1][i0]) nn++;

    if (thin_img[k1][j1][i1]) nn++;
  }

  return nn > 0;
}



void  collapseSkel3d::advect_conservative(Volume<float> &dens,
    Volume<float> &dens_prev)
{
  const int width = dens.getWidth(), height = dens.getHeight(),
            depth = dens.getDepth();

  const float dt = 0.99990f;

  int i0, j0, k0, i1, j1, k1;
  float x, y, z, s0, t0, s1, t1, m0, m1;

  Volume<float> weights(width, height, depth);

  //step 1: compute donor weights
  for (int k = 1; k < depth - 1; k++)
    for (int j = 1; j < height - 1; j++)
      for (int i = 1; i < width - 1; i++)
      {
        const Vector &vec = v(i, j, k);
        float mag = vec.norm();

        if (mag < 1e-5f)
        {
          x = (float) i;
          y = (float) j;
          z = (float) k;
        }
        else
        {
          float gx, gy, gz;
          gx = vec.u / mag;
          gy = vec.v / mag;
          gz = vec.w / mag;
          x = i - dt * gx;
          y = j - dt * gy;
          z = k - dt * gz;
        }

        if (x < 0.5f) x = 0.5f;
        if (x > width - 1.5f) x = width - 1.5f;
        i0 = (int)x;
        i1 = i0 + 1;
        if (y < 0.5f) y = 0.5f;
        if (y > height - 1.5f) y = height - 1.5f;
        j0 = (int)y;
        j1 = j0 + 1;
        if (z < 0.5f) z = 0.5f;
        if (z > depth - 1.5f) z = depth - 1.5f;
        k0 = (int)z;
        k1 = k0 + 1;

        s1 = x - i0;
        s0 = 1 - s1;
        t1 = y - j0;
        t0 = 1 - t1;
        m1 = z - k0;
        m0 = 1 - m1;

        weights[k0][j0][i0] += s0 * t0 * m0;
        weights[k0][j0][i1] += s1 * t0 * m0;
        weights[k0][j1][i0] += s0 * t1 * m0;
        weights[k0][j1][i1] += s1 * t1 * m0;
        weights[k1][j0][i0] += s0 * t0 * m1;
        weights[k1][j0][i1] += s1 * t0 * m1;
        weights[k1][j1][i0] += s0 * t1 * m1;
        weights[k1][j1][i1] += s1 * t1 * m1;
      }

  //step 2: standard backward tracing with clamping weights
  //(cannot output more density than I have)
  for (int k = 1; k < depth - 1; k++)
    for (int j = 1; j < height - 1; j++)
      for (int i = 1; i < width - 1; i++)
      {
        const Vector &vec = v(i, j, k);
        float mag = vec.norm();

        if (mag < 1e-5f)
        {
          x = (float) i;
          y = (float) j;
          z = (float) k;
        }
        else
        {
          float gx, gy, gz;
          gx = vec.u / mag;
          gy = vec.v / mag;
          gz = vec.w / mag;
          x = i - dt * gx;
          y = j - dt * gy;
          z = k - dt * gz;
        }

        if (x < 0.5f) x = 0.5f;
        if (x > width - 1.5f) x = width - 1.5f;
        i0 = (int)x;
        i1 = i0 + 1;
        if (y < 0.5f) y = 0.5f;
        if (y > height - 1.5f) y = height - 1.5f;
        j0 = (int)y;
        j1 = j0 + 1;
        if (z < 0.5f) z = 0.5f;
        if (z > depth - 1.5f) z = depth - 1.5f;
        k0 = (int)z;
        k1 = k0 + 1;

        s1 = x - i0;
        s0 = 1 - s1;
        t1 = y - j0;
        t0 = 1 - t1;
        m1 = z - k0;
        m0 = 1 - m1;

        dens[k][j][i] =  s0 * t0 * m0 * dens_prev[k0][j0][i0] / std::max(1.0f,
                         weights[k0][j0][i0]);
        dens[k][j][i] += s1 * t0 * m0 * dens_prev[k0][j0][i1] / std::max(1.0f,
                         weights[k0][j0][i1]);
        dens[k][j][i] += s0 * t1 * m0 * dens_prev[k0][j1][i0] / std::max(1.0f,
                         weights[k0][j1][i0]);
        dens[k][j][i] += s1 * t1 * m0 * dens_prev[k0][j1][i1] / std::max(1.0f,
                         weights[k0][j1][i1]);
        dens[k][j][i] += s0 * t0 * m1 * dens_prev[k1][j0][i0] / std::max(1.0f,
                         weights[k1][j0][i0]);
        dens[k][j][i] += s1 * t0 * m1 * dens_prev[k1][j0][i1] / std::max(1.0f,
                         weights[k1][j0][i1]);
        dens[k][j][i] += s0 * t1 * m1 * dens_prev[k1][j1][i0] / std::max(1.0f,
                         weights[k1][j1][i0]);
        dens[k][j][i] += s1 * t1 * m1 * dens_prev[k1][j1][i1] / std::max(1.0f,
                         weights[k1][j1][i1]);
      }

  //step 3: push remaining density in a forward tracing step
  //(those cells which still have density, try push it forward)
  for (int k = 1; k < depth - 1; k++)
    for (int j = 1; j < height - 1; j++)
      for (int i = 1; i < width - 1; i++)
      {
        if (weights[k][j][i] >= 1.0f) continue;

        float rho_remaining = (1.0f - weights[k][j][i]) * dens_prev[k][j][i];
        if (rho_remaining > 0.0f)
        {
          const Vector &vec = v(i, j, k);
          float mag = vec.norm();

          if (mag < 1e-5f)
          {
            x = (float) i;
            y = (float) j;
            z = (float) k;
          }
          else
          {
            float gx, gy, gz;
            gx = vec.u / mag;
            gy = vec.v / mag;
            gz = vec.w / mag;
            x = i + dt * gx;
            y = j + dt * gy;
            z = k + dt * gz;
          }

          if (x < 0.5f) x = 0.5f;
          if (x > width - 1.5f) x = width - 1.5f;
          i0 = (int)x;
          i1 = i0 + 1;
          if (y < 0.5f) y = 0.5f;
          if (y > height - 1.5f) y = height - 1.5f;
          j0 = (int)y;
          j1 = j0 + 1;
          if (z < 0.5f) z = 0.5f;
          if (z > depth - 1.5f) z = depth - 1.5f;
          k0 = (int)z;
          k1 = k0 + 1;

          s1 = x - i0;
          s0 = 1 - s1;
          t1 = y - j0;
          t0 = 1 - t1;
          m1 = z - k0;
          m0 = 1 - m1;

          dens[k0][j0][i0] += s0 * t0 * m0 * rho_remaining;
          dens[k0][j0][i1] += s1 * t0 * m0 * rho_remaining;
          dens[k0][j1][i0] += s0 * t1 * m0 * rho_remaining;
          dens[k0][j1][i1] += s1 * t1 * m0 * rho_remaining;
          dens[k1][j0][i0] += s0 * t0 * m1 * rho_remaining;
          dens[k1][j0][i1] += s1 * t0 * m1 * rho_remaining;
          dens[k1][j1][i0] += s0 * t1 * m1 * rho_remaining;
          dens[k1][j1][i1] += s1 * t1 * m1 * rho_remaining;
        }
      }

  for (int k = 1; k < depth - 1; k++)
    for (int j = 1; j < height - 1; j++)
      for (int i = 1; i < width - 1; i++)
      {

        max_dens[k][j][i] = std::max(max_dens[k][j][i], dens[k][j][i] / (0.5f * tdens));
      }
}




int collapseSkel3d::collapse_iteration(float dincr)               //Main entry point
{
  Volume<float> *dens = &this->dens, *dens_prev = &this->dens1;
  NarrowBand *queue = &this->queue, *queue1 = &this->queue1;

  if (sim_iter & 1)                     //ping-pong between dens,queue and dens_prev,queue1
  {
    std::swap(dens, dens_prev);               //swap densities
    std::swap(queue, queue1);               //swap queues
  }

  int npushed_back = 0;
  curr_dst += dincr;                    //dig a step 'dincr' deeper from the voxels in 'queue'
  int interface_points = queue->size();
  Interface thinned_set;                  //candidate points for removal
  thinned_set.reserve(interface_points);          //faster insertion next

  for (NarrowBand::iterator it = queue->begin(); it != queue->end(); ++it)
  {
    coord3s &c = *it;
    thin_img(c) = 253;

    for (int k = c.z - 1; k <= c.z + 1; ++k)
    {
      Image<byte> &thin_slice = thin_img[k];
      for (int j = c.y - 1; j <= c.y + 1;
           ++j) //Find new, undiscovered points of the ones in 'queue'
      {
        byte* thin_row = thin_slice[j];
        for (int i = c.x - 1; i <= c.x + 1; ++i)
        {
          byte &thin_val = thin_row[i];
          if (thin_val != 255) continue;
          queue1->push_back(coord3s(i, j, k));   //one new discovered candidate for removal
          thin_val = 254;
        }
      }
    }

    if ((curr_dst > max_dst || curr_dst > getEDT()(c)) && isSimple_tab(c))
    {
      //Store dens_prev values in thinned_set. Sort is then much faster,
      //since we don't need to index that large volume so many times
      thinned_set.push_back(Coord3(c, (*dens_prev)(c))); //this close point is surely OK to be removed
    }
    else                          //push far or non-simple points back to queue1 to process at further iterations
    {
      queue1->push_front(c);
      ++npushed_back;
    }
  }

  queue->clear();                     //done with queue for current iteration. All stuff is now in queue1

  //ALEX: I moved this here, since sorting thinned_set is much faster than sorting queue.
  //      I think it's safe, meaning, we don't care the order we push stuff into queue1. Is this true??
  std::stable_sort(thinned_set.begin(), thinned_set.end(), index_cmp_minv());


  int q1size = queue1->size();                //no. points rejected for thinning
  int tsize = thinned_set.size(), neroded = 0;

  for (int i = 0; i < tsize; ++i)           //phase 2: removal
  {
    const Coord3 &C = thinned_set[i];
    const coord3s &c = C.c;
    float     dprev = C.v;                  //reuse the cached dens_prev value, avoids one volume access
    float    &mdens = max_dens(c);

    if (isSimple_tab(c))
    {
      if (dprev >= 2.0f)                  //SS or CS point:
      {
        float dens_imp = dprev / (0.5f * tdens);
        if (fabsf(dprev - (*dens)(c)) < 4.0f)     //
          mdens = dens_imp;
        else                        //
        {
          mdens = (4.0f * sim_iter) / tdens;      //compute TOA
          if (!force_monotonic)             //if we force importance monotonicity, then mdens is the TOA.
            mdens = std::max(dens_imp, mdens);        //else, don't allow density to drop below some minimal value.

          if (((8.0f * sim_iter) / tdens < dens_imp) && isEndPoint(c, true) && enableImportanceBoosting)
            mdens += cs_importance_boost;         //important CS point
        }
      }

      thin_img(c) = 0;                    //do the actual voxel removal
      thinned_set[neroded++] = C;
    }
    else
      queue1->push_front(c);
  }

  npushed_back += queue1->size() - q1size;

  thinned_set.resize(neroded);

  //Compute 'importance' ok skel. points
  //Thinned points push their density forward to the new interface points;
  //this transport is either (i) a diffusion process, or (ii) an advection.

  //reaction part (linear spring on density)
  //reaction(ipoints, *dens_prev);

  //diffusion-like transport
  float tot_dens = transport_dens_diffusion(1.0f, *dens, *dens_prev, thinned_set);

  //advection transport
  //float tot_dens = transport_dens_advection(1.0f, *dens, *dens_prev, thinned_set);

  /*
  //LUK: tmp_dens !!!!
  const int width = thin_img.getWidth(), height = thin_img.getHeight(), depth = thin_img.getDepth();
  Volume<float> tmp_dens(width, height, depth);
  std::vector<coord3s> out_points;
  tot_dens += transport_dens_advection_unconstrained(1.0f, out_points, tmp_dens, *dens, *dens_prev, ipoints);
  tot_dens += transport_dens_advection_project(1.0f, out_points, tmp_dens, *dens, *dens_prev, ipoints);
  */

  /*
  vector<coord3s> new_ipoints;
  potential_new_interface(ipoints, new_ipoints);
  const int width = thin_img.getWidth(), height = thin_img.getHeight(), depth = thin_img.getDepth();
  Volume<float> tmp_dens(width, height, depth);
  tot_dens += transport_dens_advection_back(1.0f, *dens, *dens_prev, tmp_dens, new_ipoints, ipoints);
  tot_dens += transport_dens_advection_fwd(1.0f, *dens, *dens_prev, tmp_dens, ipoints);
  */

  /*
  vector<coord3s> new_ipoints;
  potential_new_interface(ipoints, new_ipoints);

  for(int i=0;i<new_ipoints.size();i++) {
      const coord3s &c = new_ipoints[i];

      float minv = 1e+7f;
      for(int zz=c.z-1;zz<=c.z+1;zz++)
  for(int yy=c.y-1;yy<=c.y+1;yy++)
    for(int xx=c.x-1;xx<=c.x+1;xx++) {
      if(!thin_img[zz][yy][xx])
        minv = std::min(minv, (*dens_prev)[zz][yy][xx]);
    }

      if(minv<1e+4f && (*dens)[c.z][c.y][c.x]<minv ) {
  (*dens)[c.z][c.y][c.x] = minv;
  //printf("newv: %f %f\n", (*dens)[c.z][c.y][c.x], minv);
      }

    }
  */


  if (advect_vel)
  {
    //momentum(default)/velocity advection !!!
    //transport_vel_advection(1.0f, *dens_prev, u, v, w, ipoints);
    transport_vel_diffusion(1.0f, thinned_set);
  }

  //non-thinned ones just add their density at corresponding locations
  for (NarrowBand::const_iterator it = queue1->begin(); npushed_back &&
       it != queue1->end(); ++it, --npushed_back)
  {
    const coord3s &c = *it;
    float        &d = (*dens)(c);
    float    &dprev = (*dens_prev)(c);
    d        += dprev;
    dprev     = 0.0f;
  }

  //clear previous density at interface points
  for (int i = 0; i < neroded; ++i)
    (*dens_prev)(thinned_set[i].c) = 0.0f;

  printf("tdens: %f curr_dst: %f max_dst: %f ipoints: %d tpoints: %d ", tot_dens,
         curr_dst, max_dst, interface_points, neroded);
  if (template_cache.size())
    printf("tcache: %lu, cache efficiency: %f\n", template_cache.size(),
           float(iss_calls) / template_cache.size());
  printf("\n");

  log.add(LogEntry(sim_iter, tot_dens, max_dst, curr_dst, interface_points,
                   neroded));

  ++sim_iter;

  /*
  if(!neroded && sim_iter>3) {
    printf("ls\n");
    levelSet<float, less<float> > ls(max_dens);
    ls.Init( sim_iter -5.0f);//(2.0f*(sim_iter-5))/tdens );
    float mtime = ls.fastMarching();
    ls.output(max_dens, mtime);
    max_dens.grad_order3(u, v, w);
  }
  */

  return (neroded > 0 || sim_iter == 1);
}

