The GCC that comes with my Fedora installation doesn’t appear to have a __sync_val_compare_and_swap that works with __uint128_t, so here it is:

#undef NDEBUG
#include <assert.h>

inline __uint128_t InterlockedCompareExchange128( volatile __uint128_t * src, __uint128_t cmp, __uint128_t with )
{
  __asm__ __volatile__
  (
      "lock cmpxchg16b %1"
      : "+A" ( cmp )
      , "+m" ( *src )
      : "b" ( (long long)with )
      , "c" ( (long long)(with>>64) )
      : "cc"
  );
  return cmp;
}

int main(int argc, char* argv[])
{
  __uint128_t a=0, b=0, c=0x0123456789ABCDEFULL;
  c <<= 64;
  c |= 0xFEDCBA9876543210ULL;
  assert(b == InterlockedCompareExchange128(&a, b, c));
  assert(a == c);
  assert(c == InterlockedCompareExchange128(&a, b, b));
  assert(a == c);
  assert(c == InterlockedCompareExchange128(&a, c, b));
  assert(a == b);
  assert(b == InterlockedCompareExchange128(&a, c, c));
  assert(a == b);
  return 0;
}