堆溢出(2)-堆内存分配操作流程

结合源码对内存分配的流程进行分析。

谈到linux内存分配，关键的就是malloc和free两个函数，下面主要分析这两个函数源码总结堆内存分配的流程。当然其中还涉及到malloc_consolidate等函数，也会在后面分析其作用过程。部分源码比较长，意思写在注释里面了，不想看可以直接看源码后的流程总结部分。

malloc

__libc_malloc

malloc是c语言中常用的内存申请函数，__libc_malloc为其在glibc中的实现。下面是glibc中__libc_malloc代码实现

/*malloc 在glibc的实现  其更底层是_int_malloc*/
void *
__libc_malloc (size_t bytes)
{
  mstate ar_ptr;/*malloc_state指针*/
  void *victim;

  _Static_assert (PTRDIFF_MAX <= SIZE_MAX / 2,
                  "PTRDIFF_MAX is not more than half of SIZE_MAX");
  /*检查是否有内存分配钩子*/
  void *(*hook) (size_t, const void *)
    = atomic_forced_read (__malloc_hook);
  if (__builtin_expect (hook != NULL, 0))
    return (*hook)(bytes, RETURN_ADDRESS (0));
#if USE_TCACHE
  /* int_free also calls request2size, be careful to not pad twice.  */

  size_t tbytes;
  /*根据传入的参数计算chunk大小，得到tcache对应的下标*/
  if (!checked_request2size (bytes, &tbytes))
    {
      __set_errno (ENOMEM);
      return NULL;
    }
  size_t tc_idx = csize2tidx (tbytes);/*转为tcache 的index*/
  /*初始化 tcache*/
  MAYBE_INIT_TCACHE ();

  DIAG_PUSH_NEEDS_COMMENT;
  if (tc_idx < mp_.tcache_bins//根据size得到的idx在合法的范围内
      && tcache
      && tcache->counts[tc_idx] > 0)//对应的tcache 链不为空
    {
      return tcache_get (tc_idx);//转入tcache获取
    }
  DIAG_POP_NEEDS_COMMENT;
#endif

  if (SINGLE_THREAD_P)
    {
      victim = _int_malloc (&main_arena, bytes);
      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
	      &main_arena == arena_for_chunk (mem2chunk (victim)));
      return victim;
    }
  /*寻找一个 arena 来试图分配内存，并上锁，宏位于arena.c */
  arena_get (ar_ptr, bytes);

  /*调用 _int_malloc 函数申请对应大小的内存*/
  victim = _int_malloc (ar_ptr, bytes);
  /* Retry with another arena only if we were able to find a usable arena
     before.  */
  if (!victim && ar_ptr != NULL)
  /*如果分配失败的话，ptmalloc 会再次尝试*/
    {
      LIBC_PROBE (memory_malloc_retry, 1, bytes);
      ar_ptr = arena_get_retry (ar_ptr, bytes);
      victim = _int_malloc (ar_ptr, bytes);
    }
  /*如果申请到了 arena，退出前解锁*/
  if (ar_ptr != NULL)
    __libc_lock_unlock (ar_ptr->mutex);
  /*断言目前状态满足以下条件之一*/
  /* 没有申请到内存
   * mmap的内存
   * 申请到的内存必须在其所分配的arena中
   */
  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
          ar_ptr == arena_for_chunk (mem2chunk (victim)));
  /*返回内存*/
  return victim;
}
libc_hidden_def (__libc_malloc)

总结逻辑

检查是否有内存分配钩子函数，有的话调用之
如果启用了tcache会先计算tcache的索引看对应的tcache链是否为空，不为空就从中获取一个chunk使用
寻找一个 arena 来试图分配内存，并上锁
调用_int_malloc函数申请对应大小的内存
失败后再次尝试
断言执行后的可能出现的三种意外情况，如果出现则报错退出
- 没有申请到内存
- mmap得到的内存
- 申请到的内存不在其所分配的arena中
返回申请的内存区域指针

其中有一点可以看到，tcache作为亲儿子，检查是优先于其他所有的bin的，这是做启用tcache的libc相关堆题时需要注意的。

_int_malloc

除此之外，malloc对内存申请的主要逻辑基本都在_int_malloc，下面看其源码，源码比较长，给了关键部分的注释。

/*
   ------------------------------ malloc ------------------------------
 */
/*真实的malloc存在*/
static void *
_int_malloc (mstate av, size_t bytes)
{
  INTERNAL_SIZE_T nb;               /* normalized request size */
  unsigned int idx;                 /* associated bin index */
  mbinptr bin;                      /* associated bin */

  mchunkptr victim;                 /* inspected/selected chunk */
  INTERNAL_SIZE_T size;             /* its size */
  int victim_index;                 /* its bin index */

  mchunkptr remainder;              /* remainder from a split */
  unsigned long remainder_size;     /* its size */

  unsigned int block;               /* bit map traverser */
  unsigned int bit;                 /* bit map traverser */
  unsigned int map;                 /* current word of binmap */

  mchunkptr fwd;                    /* misc temp for linking */
  mchunkptr bck;                    /* misc temp for linking */

#if USE_TCACHE
  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
#endif

  /*
     Convert request size to internal form by adding SIZE_SZ bytes
     overhead plus possibly more to obtain necessary alignment and/or
     to obtain a size of at least MINSIZE, the smallest allocatable
     size. Also, checked_request2size returns false for request sizes
     that are so large that they wrap around zero when padded and
     aligned.
   */
  /*检查请求  将请求大小转为chunk对齐后的大小*/
  if (!checked_request2size (bytes, &nb))
    {
      __set_errno (ENOMEM);
      return NULL;
    }

  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
     mmap.  */
  /*无可用arena*/
  if (__glibc_unlikely (av == NULL))
    {
      /*调用sysmalloc申请*/
      void *p = sysmalloc (nb, av);
      if (p != NULL)
	alloc_perturb (p, bytes);
      return p;
    }

  /*
     If the size qualifies as a fastbin, first check corresponding bin.
     This code is safe to execute even if av is not yet initialized, so we
     can try it without checking, which saves some time on this fast path.
   */

#define REMOVE_FB(fb, victim, pp)			\
  do							\
    {							\
      victim = pp;					\
      if (victim == NULL)				\
	break;						\
    }							\
  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
	 != victim);					\
  /*如果大小小于fastbin，从fastbin中获取合适大小*/
  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
    {
      /*计算大小对应的fastbin 索引*/
      idx = fastbin_index (nb);
      /*对应的fastbin链的头指针*/
      mfastbinptr *fb = &fastbin (av, idx);
      mchunkptr pp;
      victim = *fb;
      /*判断此fastbin链是不是空的*/
      if (victim != NULL)
	{
	  if (SINGLE_THREAD_P)
	    *fb = victim->fd;
	  else
    /*利用fd遍历对应的bin内是否有空闲的chunk块*/
	    REMOVE_FB (fb, pp, victim);
    /*存在一个可用的chunk*/
	  if (__glibc_likely (victim != NULL))
	    {
        /*判断这个chunk的size与其fastbin链所在的索引index是否是相对应的*/
        /*就是这里导致了在fastbin伪造chunk时由write-anything-anywhere转为了受限地址的任意写入*/
	      size_t victim_idx = fastbin_index (chunksize (victim));
	      if (__builtin_expect (victim_idx != idx, 0))
		malloc_printerr ("malloc(): memory corruption (fast)");
        /*调试状态下进行的细致的检查*/
	      check_remalloced_chunk (av, victim, nb);
        /*这部分是使用tcache的逻辑*/
#if USE_TCACHE
	      /* While we're here, if we see other chunks of the same size,
		 stash them in the tcache.  */
     /*找对应的tcache索引*/
	      size_t tc_idx = csize2tidx (nb);
	      if (tcache && tc_idx < mp_.tcache_bins)
		{
		  mchunkptr tc_victim;

		  /* While bin not empty and tcache not full, copy chunks.  */
      /*tcache没满 放进去填满*/
		  while (tcache->counts[tc_idx] < mp_.tcache_count
			 && (tc_victim = *fb) != NULL)
		    {
		      if (SINGLE_THREAD_P)
			*fb = tc_victim->fd;
		      else
			{
			  REMOVE_FB (fb, pp, tc_victim);
			  if (__glibc_unlikely (tc_victim == NULL))
			    break;
			}
		      tcache_put (tc_victim, tc_idx);
		    }
		}
#endif
        /*找到可用chunk  转为mem指针输出*/
	      void *p = chunk2mem (victim);
        /* 如果设置了perturb_type, 则将获取到的chunk初始化为 perturb_type ^ 0xff*/
	      alloc_perturb (p, bytes);
	      return p;
	    }
	}
    }

  /*
     If a small request, check regular bin.  Since these "smallbins"
     hold one size each, no searching within bins is necessary.
     (For a large request, we need to wait until unsorted chunks are
     processed to find best fit. But for small ones, fits are exact
     anyway, so we can check now, which is faster.)
   */
  /*如果大小位于small bin中的话  得到位于的smallbin索引 */
  if (in_smallbin_range (nb))
    {
      /*获取small bin对应的索引*/
      idx = smallbin_index (nb);
      /*找到对应的bin*/
      bin = bin_at (av, idx);
      /*获取其最后一个chunk 如果为bin 则说明bin是空的 否则有下面两个情况*/
      if ((victim = last (bin)) != bin)
        {
          bck = victim->bk;
        /*bin不为空  检查这个chunk的尾部chunk的前一个chunk 防止出现伪造*/
	  if (__glibc_unlikely (bck->fd != victim))
	    malloc_printerr ("malloc(): smallbin double linked list corrupted");
          /*设置inuse位*/
          set_inuse_bit_at_offset (victim, nb);
          /* 修改对应的bin链表将 small bin 的最后一个 chunk 取出来*/
          bin->bk = bck;
          bck->fd = bin;

          if (av != &main_arena)
	    set_non_main_arena (victim);
      /*调试状态下进行的细致的检查*/
          check_malloced_chunk (av, victim, nb);
#if USE_TCACHE
	  /* While we're here, if we see other chunks of the same size,
	     stash them in the tcache.  */
       /*当从这个small bin中找到一个可用chunk后，会把bin上其他的所有chunk都放入tcache*/
	  size_t tc_idx = csize2tidx (nb);
	  if (tcache && tc_idx < mp_.tcache_bins)
	    {
	      mchunkptr tc_victim;

	      /* While bin not empty and tcache not full, copy chunks over.  */
	      while (tcache->counts[tc_idx] < mp_.tcache_count
		     && (tc_victim = last (bin)) != bin)
		{
		  if (tc_victim != 0)
		    {
          /*这里是先从链上取下chunk，类似于unlink操作，不过相比之下更加的简单，没有安全检查  tcache smallbin unlink操作的漏洞原因*/
		      bck = tc_victim->bk;
		      set_inuse_bit_at_offset (tc_victim, nb);
		      if (av != &main_arena)
			set_non_main_arena (tc_victim);
		      bin->bk = bck;
		      bck->fd = bin;
          /*将取下的chunk放入tcache中*/
		      tcache_put (tc_victim, tc_idx);
	            }
		}
	    }
#endif
          /*找到可用chunk  转为mem指针输出*/
          void *p = chunk2mem (victim);
          /* 如果设置了perturb_type, 则将获取到的chunk初始化为 perturb_type ^ 0xff*/
          alloc_perturb (p, bytes);
          return p;
        }
    }

  /*
     If this is a large request, consolidate fastbins before continuing.
     While it might look excessive to kill all fastbins before
     even seeing if there is space available, this avoids
     fragmentation problems normally associated with fastbins.
     Also, in practice, programs tend to have runs of either small or
     large requests, but less often mixtures, so consolidation is not
     invoked all that often in most programs. And the programs that
     it is called frequently in otherwise tend to fragment.
   */

  else
    {
      /*获取size对应的large bin中的索引*/
      idx = largebin_index (nb);
      /*如果存在fastbin的话，会处理(合并所有的)fastbin */
      if (atomic_load_relaxed (&av->have_fastchunks))
        malloc_consolidate (av);
    }

  /*
     Process recently freed or remaindered chunks, taking one only if
     it is exact fit, or, if this a small request, the chunk is remainder from
     the most recent non-exact fit.  Place other traversed chunks in
     bins.  Note that this step is the only place in any routine where
     chunks are placed in bins.
     The outer loop here is needed because we might not realize until
     near the end of malloc that we should have consolidated, so must
     do so and retry. This happens at most once, and only when we would
     otherwise need to expand memory to service a "small" request.
   */

#if USE_TCACHE
  INTERNAL_SIZE_T tcache_nb = 0;
  size_t tc_idx = csize2tidx (nb);
  if (tcache && tc_idx < mp_.tcache_bins)
    tcache_nb = nb;
  int return_cached = 0;

  tcache_unsorted_count = 0;
#endif
  /*大循环遍历unsorted bin  大致逻辑如下*/
  /*
   * 按照 FIFO 的方式逐个将 unsorted bin 中的 chunk 取出来
   * 如果是 small request，则考虑是不是恰好满足，是的话，直接返回
   * 如果不是的话，放到对应的 bin 中
   * 尝试从 large bin 中分配用户所需的内存
   *  -- from ctf-wiki  不一定正确
   */
  for (;; )
    {
      int iters = 0;
      /*unsorted bin不为空时*/
      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
        {
          /*victim为unsorted bin的最后一个chunk*/
          /*bck设置为unsorted bin的倒数第二一个chunk*/
          bck = victim->bk;
          /*获取chunk的size*/
          size = chunksize (victim);
          /*获取下一个chunk*/
          mchunkptr next = chunk_at_offset (victim, size);
          /*判断当前chunk以及下一个chunk各项信息是否合法*/
          if (__glibc_unlikely (size <= 2 * SIZE_SZ)
              || __glibc_unlikely (size > av->system_mem))
            malloc_printerr ("malloc(): invalid size (unsorted)");
          if (__glibc_unlikely (chunksize_nomask (next) < 2 * SIZE_SZ)
              || __glibc_unlikely (chunksize_nomask (next) > av->system_mem))
            malloc_printerr ("malloc(): invalid next size (unsorted)");
          if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
            malloc_printerr ("malloc(): mismatching next->prev_size (unsorted)");
          if (__glibc_unlikely (bck->fd != victim)
              || __glibc_unlikely (victim->fd != unsorted_chunks (av)))
            malloc_printerr ("malloc(): unsorted double linked list corrupted");
          if (__glibc_unlikely (prev_inuse (next)))
            malloc_printerr ("malloc(): invalid next->prev_inuse (unsorted)");

          /*
             If a small request, try to use last remainder if it is the
             only chunk in unsorted bin.  This helps promote locality for
             runs of consecutive small requests. This is the only
             exception to best-fit, and applies only when there is
             no exact fit for a small chunk.
           */
          /*用户的请求为 small bin chunk  先考虑是否unsorted bin中只有last remainder*/
          if (in_smallbin_range (nb) &&
              bck == unsorted_chunks (av) &&
              victim == av->last_remainder &&
              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
            {
              /* split and reattach remainder */
              /*新remainder的大小*/
              remainder_size = size - nb;
              /*获取新的 remainder 的位置*/
              remainder = chunk_at_offset (victim, nb);
              /*更新unsorted bin*/
              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
              /*更新malloc_state中记录的last remainder*/
              av->last_remainder = remainder;
              /*更新remainder的指针*/
              remainder->bk = remainder->fd = unsorted_chunks (av);
              if (!in_smallbin_range (remainder_size))
                {
                  remainder->fd_nextsize = NULL;
                  remainder->bk_nextsize = NULL;
                }
              /*设置victim chunk的头部*/
              set_head (victim, nb | PREV_INUSE |
                        (av != &main_arena ? NON_MAIN_ARENA : 0));
              /*设置 remainder 的头部*/
              set_head (remainder, remainder_size | PREV_INUSE);
              /*设置记录remainder大小的prev_size字段，因为处于空闲状态*/
              set_foot (remainder, remainder_size);

              check_malloced_chunk (av, victim, nb);
              /*chunk转为mem指针*/
              void *p = chunk2mem (victim);
              /*chunk初始化*/
              alloc_perturb (p, bytes);
              return p;
            }

          /* remove from unsorted list */
          if (__glibc_unlikely (bck->fd != victim))
            malloc_printerr ("malloc(): corrupted unsorted chunks 3");
          /*取出chunk*/
          unsorted_chunks (av)->bk = bck;
          bck->fd = unsorted_chunks (av);

          /* Take now instead of binning if exact fit */
          /*如果大小合适  直接使用*/
          if (size == nb)
            {
              /*设置*/
              set_inuse_bit_at_offset (victim, size);
              if (av != &main_arena)
		set_non_main_arena (victim);
#if USE_TCACHE
	      /* Fill cache first, return to user only if cache fills.
		 We may return one of these chunks later.  */
     /*向tcache中放*/
	      if (tcache_nb
		  && tcache->counts[tc_idx] < mp_.tcache_count)
		{
		  tcache_put (victim, tc_idx);
      /*标志位，被置位说明满足要求的chunk被放入tcache中了，后面以此判断是否需要从tcache中取*/
		  return_cached = 1;
		  continue;
		}
	      else
		{
#endif
              /* 调试时细致检查*/
              check_malloced_chunk (av, victim, nb);
              /*chunk转为mem指针返回*/
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
#if USE_TCACHE
		}
#endif
            }
          /*当前chunk不符合要求时  整理chunk*/
          /* place chunk in bin */
          /*如果chunk在small bin大小范围  整理后放回small bin*/
          if (in_smallbin_range (size))
            {
              victim_index = smallbin_index (size);
              bck = bin_at (av, victim_index);
              fwd = bck->fd;
            }
          else/*否则 取出来放入large bin  下面是对应的放入large bin逻辑*/
            {
              /*对应的large bin的索引*/
              victim_index = largebin_index (size);
              bck = bin_at (av, victim_index);// 当前的large bin 的头部
              fwd = bck->fd;

              /* maintain large bins in sorted order */
              /*下面部分暗示了每个  large bin是从大到小排列的*/
              /*large bin不是空的*/
              if (fwd != bck)
                {
                  /* Or with inuse bit to speed comparisons */
                  size |= PREV_INUSE;
                  /* if smaller than smallest, bypass loop below */
                  /*bck->bk 存储着相应 large bin 中最小的chunk  如果比这个还小，只需要插入到链表尾部*/
                  assert (chunk_main_arena (bck->bk));
                  if ((unsigned long) (size)
		      < (unsigned long) chunksize_nomask (bck->bk))
                    {
                      fwd = bck;//fwd 指向 large bin 头部
                      bck = bck->bk;//bck 指向 largin bin 尾部 chunk

                      victim->fd_nextsize = fwd->fd;//victim 的 fd_nextsize 指向 largin bin 的第一个 chunk
                      victim->bk_nextsize = fwd->fd->bk_nextsize;//victim 的 bk_nextsize 指向原来链表的第一个 chunk 指向的 bk_nextsize
                      /*原来链表的第一个 chunk 的 bk_nextsize 指向 victim*/
                      /*原来指向链表第一个 chunk 的 fd_nextsize 指向 victim*/ 
                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
                    }
                  else//当前要插入的 victim 的大小大于最小的 chunk
                    {
                      assert (chunk_main_arena (fwd));
                      //从链表头部开始找比victim小于或等于的chunk
                      while ((unsigned long) size < chunksize_nomask (fwd))
                        {
                          fwd = fwd->fd_nextsize;
			  assert (chunk_main_arena (fwd));
                        }
                      /*如果找到个相等的，就直接将 chunk 插入到该chunk的后面，不改变nextsize指针*/
                      if ((unsigned long) size
			  == (unsigned long) chunksize_nomask (fwd))
                        /* Always insert in the second position.  */
                        fwd = fwd->fd;
                      else//如果找到的chunk和当前victim大小不一样，需要构造双向链表将当前的chunk链进去
                        {
                          victim->fd_nextsize = fwd;
                          victim->bk_nextsize = fwd->bk_nextsize;
                          if (__glibc_unlikely (fwd->bk_nextsize->fd_nextsize != fwd))
                            malloc_printerr ("malloc(): largebin double linked list corrupted (nextsize)");
                          fwd->bk_nextsize = victim;
                          victim->bk_nextsize->fd_nextsize = victim;
                        }
                      bck = fwd->bk;
                      if (bck->fd != fwd)
                        malloc_printerr ("malloc(): largebin double linked list corrupted (bk)");
                    }
                }
              else//对应的large bin是空的话，直接让fd_nextsize与bk_nextsize构成双向链表
                victim->fd_nextsize = victim->bk_nextsize = victim;
            }
          /*对应的 bin的binmap进行标记，并修改这个bin的链表*/
          mark_bin (av, victim_index);
          victim->bk = bck;
          victim->fd = fwd;
          fwd->bk = victim;
          bck->fd = victim;

#if USE_TCACHE
      /* If we've processed as many chunks as we're allowed while
	 filling the cache, return one of the cached ones.  */
   
   /*设置一个清理上限，清理到一定次数就不再清理了*/
      ++tcache_unsorted_count;
      if (return_cached
	  && mp_.tcache_unsorted_limit > 0
	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
	{
	  return tcache_get (tc_idx);
	}
#endif
        /*这个清理过程最多迭代10000次  超过后退出*/
#define MAX_ITERS       10000
          if (++iters >= MAX_ITERS)
            break;
        }

#if USE_TCACHE
      /* If all the small chunks we found ended up cached, return one now.  */
      if (return_cached)
	{
    /*根据前面的标志说明tcache中已经有了满足要求的small chunk，从tcache中取出一个给用户使用*/
	  return tcache_get (tc_idx);
	}
#endif

      /*
         If a large request, scan through the chunks of current bin in
         sorted order to find smallest that fits.  Use the skip list for this.
       */
      /*明确一下  进行到这里 前面从unsorted bin中对chunk进行了回收 仍然没发现可用的chunk   这意味着small bin中的chunk都不符合*/
      /*在large chunk中开始搜索*/
      if (!in_smallbin_range (nb))
        {
          /*获得对应的bin*/
          bin = bin_at (av, idx);

          /* skip scan if empty or largest chunk is too small */
          //如果对应的 bin为空或者其中的chunk最大的也很小，那就跳过
          if ((victim = first (bin)) != bin
	      && (unsigned long) chunksize_nomask (victim)
	        >= (unsigned long) (nb))
            {
              /*反向遍历链表 直到找到一个不小于需要申请大小的chunk*/
              victim = victim->bk_nextsize;
              while (((unsigned long) (size = chunksize (victim)) <
                      (unsigned long) (nb)))
                victim = victim->bk_nextsize;

              /* Avoid removing the first entry for a size so that the skip
                 list does not have to be rerouted.  */
              /* 这边是一个简化操作的技巧  如果找到的chunk并不是最后一个 而且其前面还有大小与它相同的，那就
               * 取它前面的那个chunk  这样就可以避免调整bk_nextsize
               * 因为一个bin中同样大小的chunk 只有一个会被链在bk_nextsize上
               */
              if (victim != last (bin)
		  && chunksize_nomask (victim)
		    == chunksize_nomask (victim->fd))
                victim = victim->fd;
              /*计算分配和剩余的大小*/
              remainder_size = size - nb;
              /*将chunk unlink出来*/
              unlink_chunk (av, victim);

              /* Exhaust */
              /*剩下的大小不足以当做一个chunk*/
              if (remainder_size < MINSIZE)
                {
                  /*好像并没进行其他操作*/
                  set_inuse_bit_at_offset (victim, size);
                  if (av != &main_arena)
		    set_non_main_arena (victim);
                }
              /* Split */
              else
                {
                  /*如果剩下的还能构成一个chunk  就分裂它  获取剩下的部分的指针 */
                  remainder = chunk_at_offset (victim, nb);
                  /* We cannot assume the unsorted list is empty and therefore
                     have to perform a complete insert here.  */
                  /*获取unsorted bin的头部与第一个chunk，准备链入unsorted bin*/
                  bck = unsorted_chunks (av);
                  fwd = bck->fd;
                  /*先判断unsorted bin是不是被破坏了*/
		  if (__glibc_unlikely (fwd->bk != bck))
		    malloc_printerr ("malloc(): corrupted unsorted chunks");
                  /*链入unsorted bin*/
                  remainder->bk = bck;
                  remainder->fd = fwd;
                  bck->fd = remainder;
                  fwd->bk = remainder;
                  /*这个剩余的chunk如果不在small bin范围的话 设置对应的字段*/
                  if (!in_smallbin_range (remainder_size))
                    {
                      remainder->fd_nextsize = NULL;
                      remainder->bk_nextsize = NULL;
                    }
                  /*设置分裂出来的两个chunk的头部*/
                  set_head (victim, nb | PREV_INUSE |
                            (av != &main_arena ? NON_MAIN_ARENA : 0));
                  set_head (remainder, remainder_size | PREV_INUSE);
                  /*设置remainder的大小*/
                  set_foot (remainder, remainder_size);
                }
              /*老三样*/
              /*调试时会进行的细致检查*/
              check_malloced_chunk (av, victim, nb);
              /*chunk指针转为mem指针*/
              void *p = chunk2mem (victim);
              /*chunk初始填充*/
              alloc_perturb (p, bytes);
              return p;
            }
        }

      /*
         Search for a chunk by scanning bins, starting with next largest
         bin. This search is strictly by best-fit; i.e., the smallest
         (with ties going to approximately the least recently used) chunk
         that fits is selected.
         The bitmap avoids needing to check that most blocks are nonempty.
         The particular case of skipping all bins during warm-up phases
         when no chunks have been returned yet is faster than it might look.
       */
      /*到这里  如果还没找到可用的chunk  意味着large bin也无能为力*/
      /*尝试增加idx来寻找*/
      ++idx;
      /*获取对应的bin*/
      bin = bin_at (av, idx);
      /*这一段有点没懂  binmap是标记bin是否为空的 这里是可以通过这样对所有的bin做统一处理吗？*/
      /*当前索引在binmap中的block索引  从而找到对应的map #define idx2block(i) ((i) >> BINMAPSHIFT)  ,BINMAPSHIFT=5
       *前面说过一个map是32位  所以需要右移5
       */
      block = idx2block (idx);
      map = av->binmap[block];
      bit = idx2bit (idx);//idx对应的位设置为1，其它位为0
      /*上述过程就是计算了idx应该对应的map 以及自己的bit  以进行比较*/

      for (;; )
        {
          /* Skip rest of block if there are no more set bits in this block.  */
          /*如果bit>map，则表示该 map 中没有比当前所需要chunk大的空闲块*/
          /*而如果bit等于0 那表示上面idx2bit的参数为0*/
          if (bit > map || bit == 0)
            {
              do/*寻找下一个block，直到其对应的map不为0*/
                {
                  /*如果bllock超出了binmap的大小  说明还是找不到 那就需要去top chunk找了*/
                  if (++block >= BINMAPSIZE) /* out of bins */
                    goto use_top;
                }
              while ((map = av->binmap[block]) == 0);
              /* 找到了map，获取对应的bin，因为该map中的chunk大小都比所需的chunk大(idx先自增了)，而且
               * map本身不为0，所以这个map所在的所有bin中必然存在合适的一个chunk。*/
              bin = bin_at (av, (block << BINMAPSHIFT));/*这个bin是map的最小bin  因为index最小*/
              bit = 1;
            }

          /* Advance to bin with set bit. There must be one. */
          /*从最小bin找，直到找到合适的bin  前面说过 这个过程必然能找到*/
          while ((bit & map) == 0)
            {
              bin = next_bin (bin);
              bit <<= 1;
              assert (bit != 0);
            }

          /* Inspect the bin. It is likely to be non-empty */
          /*获取对应的bin*/
          victim = last (bin);

          /*  If a false alarm (empty bin), clear the bit. */
          /*如果victim=bin，那么我们就将map对应的位清0，然后获取下一个bin*/
          /*victim等于bin说明这个bin是空的 不可能啊？*/
          if (victim == bin)
            {
              av->binmap[block] = map &= ~bit; /* Write through */
              bin = next_bin (bin);
              bit <<= 1;
            }

          else
            {
              /*获取chunk的size*/
              size = chunksize (victim);

              /*  We know the first chunk in this bin is big enough to use. */
              assert ((unsigned long) (size) >= (unsigned long) (nb));
              /*计算分割后剩余的大小*/
              remainder_size = size - nb;

              /* unlink */
              unlink_chunk (av, victim);

              /* Exhaust */
              /*跟上面逻辑一样 分割后不够一个chunk的  这也没写怎么处理的呀？*/
              if (remainder_size < MINSIZE)
                {
                  set_inuse_bit_at_offset (victim, size);
                  if (av != &main_arena)
		    set_non_main_arena (victim);
                }

              /* Split */
              else/*分裂后还能够一个chunk  那就分裂它*/
                {/*计算剩余的chunk的偏移*/
                  remainder = chunk_at_offset (victim, nb);

                  /* We cannot assume the unsorted list is empty and therefore
                     have to perform a complete insert here.  */
                  /*将剩余的chunk插入到unsorted bin*/
                  bck = unsorted_chunks (av);
                  fwd = bck->fd;
		  if (__glibc_unlikely (fwd->bk != bck))
		    malloc_printerr ("malloc(): corrupted unsorted chunks 2");
                  remainder->bk = bck;
                  remainder->fd = fwd;
                  bck->fd = remainder;
                  fwd->bk = remainder;

                  /* advertise as last remainder */
                  /* 如果在small bin范围内，就将其标记为remainder*/
                  if (in_smallbin_range (nb))
                    av->last_remainder = remainder;
                  if (!in_smallbin_range (remainder_size))
                    {
                      remainder->fd_nextsize = NULL;
                      remainder->bk_nextsize = NULL;
                    }
                  /*设置老三样*/
                  set_head (victim, nb | PREV_INUSE |
                            (av != &main_arena ? NON_MAIN_ARENA : 0));
                  set_head (remainder, remainder_size | PREV_INUSE);
                  set_foot (remainder, remainder_size);
                }
              check_malloced_chunk (av, victim, nb);
              /*继续老三样*/
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
            }
        }

    use_top:/*这部分开始使用top chunk*/
      /*
         If large enough, split off the chunk bordering the end of memory
         (held in av->top). Note that this is in accord with the best-fit
         search rule.  In effect, av->top is treated as larger (and thus
         less well fitting) than any other available chunk since it can
         be extended to be as large as necessary (up to system
         limitations).
         We require that av->top always exists (i.e., has size >=
         MINSIZE) after initialization, so if it would otherwise be
         exhausted by current request, it is replenished. (The main
         reason for ensuring it exists is that we may need MINSIZE space
         to put in fenceposts in sysmalloc.)
       */
      /*获取top chunk*/
      victim = av->top;
      /*top chunk大小*/
      size = chunksize (victim);

      if (__glibc_unlikely (size > av->system_mem))
        malloc_printerr ("malloc(): corrupted top size");
      /*top chunk很大  分裂完还是满足  需要大小+最小chunk大小  那就分裂它*/
      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
        {
          remainder_size = size - nb;
          remainder = chunk_at_offset (victim, nb);
          av->top = remainder;
          set_head (victim, nb | PREV_INUSE |
                    (av != &main_arena ? NON_MAIN_ARENA : 0));
          set_head (remainder, remainder_size | PREV_INUSE);

          check_malloced_chunk (av, victim, nb);
          void *p = chunk2mem (victim);
          alloc_perturb (p, bytes);
          return p;
        }

      /* When we are using atomic ops to free fast chunks we can get
         here for all block sizes.  */
      /*否则就判断是否有 fast chunk*/
      else if (atomic_load_relaxed (&av->have_fastchunks))
        {
          /*先进行fastbin合并*/
          malloc_consolidate (av);
          /* restore original bin index */
          /*
           * 判断需要的chunk是在small bin范围还是large bin范围  然后计算对应的索引
           * 
           **/
          if (in_smallbin_range (nb))
            idx = smallbin_index (nb);
          else
            idx = largebin_index (nb);
        }

      /*
         Otherwise, relay to handle system-dependent cases
       */
      else/*进行到这  就说明堆内存不够了 需要调用sysmalloc来申请内存*/
        {
          void *p = sysmalloc (nb, av);
          if (p != NULL)
            alloc_perturb (p, bytes);
          return p;
        }
    }
}

总结下大致的流程：

检查是否有可用的arena，没有的话就向调用sysmalloc申请，有的话就继续下一步
如果小于fastbin最大size，尝试从fastbin获取合适大小的chunk，转换为mem指针输出
- 根据size计算index得到对应的fastbin chunk链，不为空则取头部
- 如果启用了tcache则看此链是否存在其他chunk，全放入tcache直到到达上限(7个)
- 注意这部分对找到的chunk做了size与链的所在index是否对应的判断，所以在fastbin中塞入伪造chunk时需要注意size字段
如果fastbin中没找到，大小在small bin范围，则前往small bin查找
- 根据size计算index得到所对应的smallbin的 chunk链，判断是否为空(并检查尾部chunk和其上一个chunk的前后项指针防止伪造)，最后取出尾部的chunk
- 同样的如果启用tcache则用剩下的chunk填满对应的tcache链
到这一步，先使用malloc_consolidate函数合并当前arena下所有的fastbin
开始处理unsorted bin，按照 FIFO 的方式逐个将 unsorted bin 中的 chunk 取出来做判断
- 判断chunk本身信息是否合法，主要是下面几个方面
  - size在合法范围
  - 以size作为偏移得到的下一个chunk的size是否合法
  - 下一个chunk的prev_size是不是与当前size相等
  - fd指针是不是正确
  - 下一个chunk的prev_inuse位是否正确
- 如果用户请求的small bin chunk，而unsorted bin中只有last_remainder,并且其size足够大，就分裂它返回，并更新malloc_state中的last_remainder记录
- 检查当前chunk的bk指向chunk的fd指针是否正确(防止伪造)
- 将chunk取出来
  - 如果大小合适，放入tcache后继续，设置return_cached表明后面可以从tcache中获取chunk
    - 如果tcache填满则直接返回这个chunk
  - chunk大小不对，按照大小范围放回smallbin和largebin
    - 放回largebin时有两点需要注意
      - 每个largebin链内部按从大到小排列
      - 如果链中不存在大小相同的，需要将chunk插入fd_nextsize和bk_next_size组成的链表(有相同大小不需要此步，意味着next_size链中只存放不同大小的chunk)
  - 修改对应的binmap
  - 根据return_cached去判断是否前往tcache取chunk
  - 至此还未找到chunk，前往largebin中找
    - 找到对应的largebin链：如果chunk链为空或者其中的chunk都很小就跳过；否则就反向遍历找一个满足要求大小的chunk，然后分裂这个chunk返回，并将剩余的放入unsorted bin
    - 大小对应的chunk链没找到就循环增加index去更大的chunk链找
    - 还是找不到满足的就去top chunk找，topchunk满足就分裂后返回
    - 堆内存不够了，调用sysmalloc来申请内存

做题速览精简版，简化了下逻辑

根据size找到tcache的索引，对应的链表不为空就直接拆下来，并将tcache链计数器减一
tcache没找到，就前往fastbin中
size 小于fastbin 找fastbin 找到，填满tcache 然后结束
size在small bin范围中，找small chunk，找到，填满tcache 然后结束
都不在，合并所有fastbin
循环
- 检查unsorted bin的last_remainder，满足一定条件，分裂之，剩余的标记为last_remainder
- 在unsorted bin搜索
  - 遇到精确大小填充先tcache ，否则就放回small bin/large bin(整理垃圾)
  - 在tcache中找找到就返回不行就进行下一步
  - 在small/large bin找合适大小的chunk(large bin中的不一定是精确大小)
    - 分裂找到的chunk，将合适大小的chunk填入返回，剩余的放入unsorted bin
- 使用top chunk
- 向系统申请

free

__libc_free

free用来释放一片内存区域，与之对应的libc实现是__libc_free，源码如下

void
__libc_free (void *mem)
{
  mstate ar_ptr;
  mchunkptr p;                          /* chunk corresponding to mem */
  /*是否有钩子函数 __free_hook*/
  void (*hook) (void *, const void *)
    = atomic_forced_read (__free_hook);
  if (__builtin_expect (hook != NULL, 0))
    {
      (*hook)(mem, RETURN_ADDRESS (0));
      return;
    }
  /*free一个NULL不起作用*/
  if (mem == 0)                              /* free(0) has no effect */
    return;
  /*mem指针转换为chunk指针*/
  p = mem2chunk (mem);
  /*内存是mmap得到的*/
  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
    {
      /* See if the dynamic brk/mmap threshold needs adjusting.
	 Dumped fake mmapped chunks do not affect the threshold.  */
      if (!mp_.no_dyn_threshold
          && chunksize_nomask (p) > mp_.mmap_threshold
          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
	  && !DUMPED_MAIN_ARENA_CHUNK (p))
        {
          mp_.mmap_threshold = chunksize (p);
          mp_.trim_threshold = 2 * mp_.mmap_threshold;
          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
                      mp_.mmap_threshold, mp_.trim_threshold);
        }
      munmap_chunk (p);
      return;
    }
  /*初始化tcache MAYBE_INIT_TCACHE()在tcache不为空时无作用*/
  MAYBE_INIT_TCACHE ();
  /*根据chunk获得分配区的指针*/
  ar_ptr = arena_for_chunk (p);
  /*调用_int_free进行释放*/
  _int_free (ar_ptr, p, 0);
}
libc_hidden_def (__libc_free)

__libc_free主要干了这么些事

检查是否有内存分配钩子函数，有的话调用之，这与malloc类似
内存是mmap得到的，尝试释放
调用MAYBE_INIT_TCACHE初始化tcache
获得chunk所在arena指针
调用_int_free进行内存释放

_int_free

由上面流程可以看到，其主要实现逻辑也和malloc相似，在_int_free函数中。下面是_int_free源码。

/*
   ------------------------------ free ------------------------------
 */
/*这部分是真正的free*/
static void
_int_free (mstate av, mchunkptr p, int have_lock)
{
  INTERNAL_SIZE_T size;        /* its size */
  mfastbinptr *fb;             /* associated fastbin */
  mchunkptr nextchunk;         /* next contiguous chunk */
  INTERNAL_SIZE_T nextsize;    /* its size */
  int nextinuse;               /* true if nextchunk is used */
  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
  mchunkptr bck;               /* misc temp for linking */
  mchunkptr fwd;               /* misc temp for linking */
  /*获取size大小*/
  size = chunksize (p);

  /* Little security check which won't hurt performance: the
     allocator never wrapps around at the end of the address space.
     Therefore we can exclude some size values which might appear
     here by accident or by "design" from some intruder.  */
  /*对指针进行检查  第一个指针大于-size是啥意思？  第二个是判断对齐*/
  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
      || __builtin_expect (misaligned_chunk (p), 0))
    malloc_printerr ("free(): invalid pointer");
  /* We know that each chunk is at least MINSIZE bytes in size or a
     multiple of MALLOC_ALIGNMENT.  */
  /*大小没有最小的chunk大或大小不是MALLOC_ALIGNMENT的整数倍*/
  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
    malloc_printerr ("free(): invalid size");
  /*检查chunk是否处于使用状态*/
  check_inuse_chunk(av, p);

#if USE_TCACHE
  {
    /*找对应到tcache的index*/
    size_t tc_idx = csize2tidx (size);
    if (tcache != NULL && tc_idx < mp_.tcache_bins)
      {
	/* Check to see if it's already in the tcache.  */
	tcache_entry *e = (tcache_entry *) chunk2mem (p);

	/* This test succeeds on double free.  However, we don't 100%
	   trust it (it also matches random payload data at a 1 in
	   2^<size_t> chance), so verify it's not an unlikely
	   coincidence before aborting.  */
  /*遍历链表看是不是有相同的tcache chunk 防止double free*/
	if (__glibc_unlikely (e->key == tcache))
	  {
	    tcache_entry *tmp;
	    LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
	    for (tmp = tcache->entries[tc_idx];
		 tmp;
		 tmp = tmp->next)
	      if (tmp == e)
		malloc_printerr ("free(): double free detected in tcache 2");
	    /* If we get here, it was a coincidence.  We've wasted a
	       few cycles, but don't abort.  */
	  }
  /*如果对应的tcache链没满  就是小于7个  那就调用tcache_put放进去*/
	if (tcache->counts[tc_idx] < mp_.tcache_count)
	  {
	    tcache_put (p, tc_idx);/*直接放回对应tcache链的头部，也没有对放入的chunk的inuse置位*/
	    return;
	  }
      }
  }
#endif

  /*
    If eligible, place chunk on a fastbin so it can be found
    and used quickly in malloc.
  */
  /*判断是不是在 fast bin 范围*/
  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())

#if TRIM_FASTBINS
      /*
	If TRIM_FASTBINS set, don't place chunks
	bordering top into fastbins
      */
      /*此chunk是fast chunk，并且下一个chunk是top chunk，则不能插入*/
      && (chunk_at_offset(p, size) != av->top)
#endif
      ) {
    /*下一个chunk的大小不能小于两倍的SIZE_SZ,并且下一个chunk的大小不能大于system_mem*/
    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
			  <= 2 * SIZE_SZ, 0)
	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
			     >= av->system_mem, 0))
      {
	bool fail = true;
	/* We might not have a lock at this point and concurrent modifications
	   of system_mem might result in a false positive.  Redo the test after
	   getting the lock.  */
	if (!have_lock)
	  {
	    __libc_lock_lock (av->mutex);
	    fail = (chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem);
	    __libc_lock_unlock (av->mutex);
	  }

	if (fail)
	  malloc_printerr ("free(): invalid next size (fast)");
      }
    /*将chunk的mem设置为perturb_byte*/
    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
    
    atomic_store_relaxed (&av->have_fastchunks, true);
    /*由size获取fastbin的索引*/
    unsigned int idx = fastbin_index(size);
    /*获取对应fastbin的头指针*/
    fb = &fastbin (av, idx);

    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
    /*使用原子操作将P插入到fastbin链表中*/
    mchunkptr old = *fb, old2;

    if (SINGLE_THREAD_P)
      {
	/* Check that the top of the bin is not the record we are going to
	   add (i.e., double free).  */
	if (__builtin_expect (old == p, 0))
	  malloc_printerr ("double free or corruption (fasttop)");
	p->fd = old;
	*fb = p;
      }
    else
      do
	{
	  /* Check that the top of the bin is not the record we are going to
	     add (i.e., double free).  */
    /*可以看到，对fastbin 的chunk进行free时，只检查了是不是其对应的chunk链的第一个chunk相同，并没有检查是不是与chunk链上的每一个chunk相同*/
	  if (__builtin_expect (old == p, 0))
	    malloc_printerr ("double free or corruption (fasttop)");
	  p->fd = old2 = old;
	}
      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
	     != old2);

    /* Check that size of fastbin chunk at the top is the same as
       size of the chunk that we are adding.  We can dereference OLD
       only if we have the lock, otherwise it might have already been
       allocated again.  */
    if (have_lock && old != NULL
	&& __builtin_expect (fastbin_index (chunksize (old)) != idx, 0))
      malloc_printerr ("invalid fastbin entry (free)");
  }

  /*
    Consolidate other non-mmapped chunks as they arrive.
  */
  /*内存合并部分*/
  else if (!chunk_is_mmapped(p)) {

    /* If we're single-threaded, don't lock the arena.  */
    if (SINGLE_THREAD_P)
      have_lock = true;
    /*先请求锁*/
    if (!have_lock)
      __libc_lock_lock (av->mutex);

    nextchunk = chunk_at_offset(p, size);

    /* Lightweight tests: check whether the block is already the
       top block.  */
    /*轻量级检测*/
    /*free的chunk不能是top chunk*/
    if (__glibc_unlikely (p == av->top))
      malloc_printerr ("double free or corruption (top)");
    /* Or whether the next chunk is beyond the boundaries of the arena.  */
    /*下一个chunk不能超过arena的边界*/
    if (__builtin_expect (contiguous (av)
			  && (char *) nextchunk
			  >= ((char *) av->top + chunksize(av->top)), 0))
	malloc_printerr ("double free or corruption (out)");
    /* Or whether the block is actually not marked used.  */
    /*chunk的使用标记没有被标记 防止double free？*/
    if (__glibc_unlikely (!prev_inuse(nextchunk)))
      malloc_printerr ("double free or corruption (!prev)");
    /*下一个chunk的大小*/
    nextsize = chunksize(nextchunk);
    /*下一个chunk的大小是否不大于2*SIZE_SZ，或者nextsize是否大于系统可提供内存*/
    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
	|| __builtin_expect (nextsize >= av->system_mem, 0))
      malloc_printerr ("free(): invalid next size (normal)");
    /*释放 并填充chunk的mem部分*/
    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);

    /* consolidate backward */
    /*后向合并  合并的是比他地址低的chunk*/
    if (!prev_inuse(p)) {
      prevsize = prev_size (p);
      size += prevsize;
      p = chunk_at_offset(p, -((long) prevsize));
      if (__glibc_unlikely (chunksize(p) != prevsize))
        malloc_printerr ("corrupted size vs. prev_size while consolidating");
      unlink_chunk (av, p);
    }
    /*下一个chunk不是top chunk*/
    if (nextchunk != av->top) {
      /* get and clear inuse bit */
      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
    /*而且下一个并不inuse  合并掉下一个chunk*/
      /* consolidate forward */
      if (!nextinuse) {
	unlink_chunk (av, nextchunk);
	size += nextsize;
      } else
	clear_inuse_bit_at_offset(nextchunk, 0);

      /*
	Place the chunk in unsorted chunk list. Chunks are
	not placed into regular bins until after they have
	been given one chance to be used in malloc.
      */
      /*合并完放入unsorted bin*/
      bck = unsorted_chunks(av);
      fwd = bck->fd;
      if (__glibc_unlikely (fwd->bk != bck))
	malloc_printerr ("free(): corrupted unsorted chunks");
      p->fd = fwd;
      p->bk = bck;
      /* 如果是large chunk，就设置nextsize指针字段为NULL*/
      if (!in_smallbin_range(size))
	{
	  p->fd_nextsize = NULL;
	  p->bk_nextsize = NULL;
	}
      bck->fd = p;
      fwd->bk = p;

      set_head(p, size | PREV_INUSE);
      set_foot(p, size);

      check_free_chunk(av, p);
    }

    /*
      If the chunk borders the current high end of memory,
      consolidate into top
    */
    /*释放的chunk的下一个chunk如果是top chunk，合并到 top chunk*/
    else {
      size += nextsize;
      set_head(p, size | PREV_INUSE);
      av->top = p;
      check_chunk(av, p);
    }

    /*
      If freeing a large space, consolidate possibly-surrounding
      chunks. Then, if the total unused topmost memory exceeds trim
      threshold, ask malloc_trim to reduce top.
      Unless max_fast is 0, we don't know if there are fastbins
      bordering top, so we cannot tell for sure whether threshold
      has been reached unless fastbins are consolidated.  But we
      don't want to consolidate on each free.  As a compromise,
      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
      is reached.
    */
    /*如果合并后的 chunk 的大小大于FASTBIN_CONSOLIDATION_THRESHOLD，向系统返还内存*/
    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
      if (atomic_load_relaxed (&av->have_fastchunks))
	malloc_consolidate(av);

      if (av == &main_arena) {
#ifndef MORECORE_CANNOT_TRIM
	if ((unsigned long)(chunksize(av->top)) >=
	    (unsigned long)(mp_.trim_threshold))
	  systrim(mp_.top_pad, av);
#endif
      } else {
	/* Always try heap_trim(), even if the top chunk is not
	   large, because the corresponding heap might go away.  */
	heap_info *heap = heap_for_ptr(top(av));

	assert(heap->ar_ptr == av);
	heap_trim(heap, mp_.top_pad);
      }
    }

    if (!have_lock)
      __libc_lock_unlock (av->mutex);
  }
  /*
    If the chunk was allocated via mmap, release via munmap().
  */
  /*释放 mmap 的 chunk*/
  else {
    munmap_chunk (p);
  }
}

逻辑总结如下

chunk检查
- 大小小于-size(防止符号问题？)
- 判断对齐
- 大小没有最小的chunk大或大小不是MALLOC_ALIGNMENT的整数倍
- 检查chunk是否处于使用状态
启用tcache时找到对应tcache的index
- 遍历链表看是不是有相同的tcache chunk 防止double free
- 对应的tcache链没满就放进去
在 fastbin 范围
- 如果下一个chunk是top chunk，不能直接插入
- 下一个chunk大小合法，就直接插入对应的chunk链结束
前一个chunk是空闲的(这里前一个指的是内存地址上的前一个)，则unlink 前面的chunk，合并两个chunk
后一个chunk是空闲的，unlink 后面的chunk，合并两个chunk
放入unsorted bin
释放 mmap 的 chunk

malloc_consolidate

malloc_consolidate函数用在fastbin中的chunk合并过程，下面在源码中简要梳理下。

static void malloc_consolidate(mstate av)
{
  mfastbinptr*    fb;                 /* current fastbin being consolidated */
  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
  mchunkptr       p;                  /* current chunk being consolidated */
  mchunkptr       nextp;              /* next chunk to consolidate */
  mchunkptr       unsorted_bin;       /* bin header */
  mchunkptr       first_unsorted;     /* chunk to link to */

  /* These have same use as in free() */
  mchunkptr       nextchunk;
  INTERNAL_SIZE_T size;
  INTERNAL_SIZE_T nextsize;
  INTERNAL_SIZE_T prevsize;
  int             nextinuse;

  atomic_store_relaxed (&av->have_fastchunks, false);

  unsorted_bin = unsorted_chunks(av);

  /*
    Remove each chunk from fast bin and consolidate it, placing it
    then in unsorted bin. Among other reasons for doing this,
    placing in unsorted bin avoids needing to calculate actual bins
    until malloc is sure that chunks aren't immediately going to be
    reused anyway.
  */
  /*按照fd顺序遍历fastbins的每一个bin，将bin中的每一个chunk都合并掉*/
  maxfb = &fastbin (av, NFASTBINS - 1);
  fb = &fastbin (av, 0);
  do {
    p = atomic_exchange_acq (fb, NULL);
    if (p != 0) {
      do {
	{
	  unsigned int idx = fastbin_index (chunksize (p));
	  if ((&fastbin (av, idx)) != fb)
	    malloc_printerr ("malloc_consolidate(): invalid chunk size");
	}

	check_inuse_chunk(av, p);
	nextp = p->fd;

	/* Slightly streamlined version of consolidation code in free() */
	size = chunksize (p);
	nextchunk = chunk_at_offset(p, size);
	nextsize = chunksize(nextchunk);
  /*找当前chunk前面的chunk  尝试合并*/
	if (!prev_inuse(p)) {
	  prevsize = prev_size (p);
	  size += prevsize;
	  p = chunk_at_offset(p, -((long) prevsize));
	  if (__glibc_unlikely (chunksize(p) != prevsize))
	    malloc_printerr ("corrupted size vs. prev_size in fastbins");
	  unlink_chunk (av, p);
	}
  /*当前chunk后面的chunk  尝试合并*/
	if (nextchunk != av->top) {
    //判断 nextchunk inuse位
	  nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
	  if (!nextinuse) {
	    size += nextsize;
	    unlink_chunk (av, nextchunk);
	  } else
      //设置 nextchunk 的 prev inuse 为0，表示可以合并当前fast chunk
	    clear_inuse_bit_at_offset(nextchunk, 0);

	  first_unsorted = unsorted_bin->fd;
	  unsorted_bin->fd = p;
	  first_unsorted->bk = p;

	  if (!in_smallbin_range (size)) {
	    p->fd_nextsize = NULL;
	    p->bk_nextsize = NULL;
	  }

	  set_head(p, size | PREV_INUSE);
	  p->bk = unsorted_bin;
	  p->fd = first_unsorted;
	  set_foot(p, size);
	}

	else {
	  size += nextsize;
	  set_head(p, size | PREV_INUSE);
	  av->top = p;
	}

      } while ( (p = nextp) != 0);

    }
  } while (fb++ != maxfb);
}

unlink_chunk

unlink_chunk函数用于将某个chunk从所在的chunk双向链表上取下来，较早版本的ublink因为安全检查不够充分，导致会出现任意地址读写的情况发生，不过如今已经修复了。下面通过在源码中添加注释的方式说下大致过程。

/* Take a chunk off a bin list.  */
/*将chunk p 从bin的链表拆下*/
/*需要注意的
 *
 *(1)其实它并没改变p本身的fd和bk  可用来泄露堆地址或libc地址
 *(2)使用情况
 * 根据ctf-wiki所述，使用场景有4
 *  malloc             从恰好大小合适的 large bin 中获取 chunk/从比请求的 chunk 所在的 bin 大的 bin 中取 chunk
 *  free               后向合并，合并物理相邻低地址空闲 chunk/前向合并，合并物理相邻高地址空闲 chunk(除了 top chunk)
 *  malloc_consolidate 后向合并，合并物理相邻低地址空闲 chunk/前向合并，合并物理相邻高地址空闲 chunk（除了 top chunk）
 *  realloc            前向扩展，合并物理相邻高地址空闲 chunk（除了 top chunk）
 **/
static void
unlink_chunk (mstate av, mchunkptr p)
{
  /*检查了当前的size和下一个chunk中储存的prev_size是否对的上*/
  if (chunksize (p) != prev_size (next_chunk (p)))
    /*glibc malloc 时检测到错误的时候，会调用 malloc_printerr */
    malloc_printerr ("corrupted size vs. prev_size");

  mchunkptr fd = p->fd;
  mchunkptr bk = p->bk;
  /*检查双链表是不是正确的， 防止简单篡改空闲的 chunk 的 fd 与 bk 来实现任意写的效果  这也是原来unlink利用方式的原理*/
  if (__builtin_expect (fd->bk != p || bk->fd != p, 0))
    malloc_printerr ("corrupted double-linked list");

  fd->bk = bk;
  bk->fd = fd;
  /*不在small bin中 就是说碰到前面提到的large block的情况 需要判断chunk的后两个字段  就是fd_nextsize 和bk_nextsize*/
  /*如果P->fd_nextsize为NULL，表明P未插入到nextsize链表中。*/

  if (!in_smallbin_range (chunksize_nomask (p)) && p->fd_nextsize != NULL)
    {
      /*这个检查和前面的chunk检查类似*/
      if (p->fd_nextsize->bk_nextsize != p
	  || p->bk_nextsize->fd_nextsize != p)
	malloc_printerr ("corrupted double-linked list (not small)");
      /*这说明fd不在nextsize链表*/
      if (fd->fd_nextsize == NULL)
	{
    /*nextsize双链表是否只有P本身*/
	  if (p->fd_nextsize == p)
      /*就将p取走*/
	    fd->fd_nextsize = fd->bk_nextsize = fd;
	  else
	    {
        /*不止p一个 就将FD插入到 nextsize双链表*/
	      fd->fd_nextsize = p->fd_nextsize;
	      fd->bk_nextsize = p->bk_nextsize;
	      p->fd_nextsize->bk_nextsize = fd;
	      p->bk_nextsize->fd_nextsize = fd;
	    }
	}
      else/*fd在nextsize链表，直接取p  也就是拆双链表*/
	{
	  p->fd_nextsize->bk_nextsize = p->bk_nextsize;
	  p->bk_nextsize->fd_nextsize = p->fd_nextsize;
	}
    }
}

以上就是关于堆内存分配的一些东西，后面就要开始看堆溢出的利用姿势了。