--- /dev/null
+Bottom: 6c3a4d4cc0146d11acbd41ec30ac983f004e11c7
+Top: 1ba59f6f7d9886c4ce5b8aa5fa06f770d53b8bf2
+Author: Sean Hefty <sean.hefty@intel.com>
+Date: 2012-08-06 13:48:46 -0700
+
+Refresh of dup2
+
+---
+
+diff --git a/src/cma.h b/src/cma.h
+index cedc0c3..6c3df27 100644
+--- a/src/cma.h
++++ b/src/cma.h
+@@ -79,6 +79,31 @@ static inline uint64_t ntohll(uint64_t x) { return x; }
+ #define fastlock_destroy(lock) pthread_mutex_destroy(lock)
+ #define fastlock_acquire(lock) pthread_mutex_lock(lock)
+ #define fastlock_release(lock) pthread_mutex_unlock(lock)
++
++typedef struct { pthread_mutex_t mut; int val; } atomic_t;
++static inline int atomic_inc(atomic_t *atomic)
++{
++ int v;
++
++ pthread_mutex_lock(&atomic->mut);
++ v = ++(atomic->val);
++ pthread_mutex_unlock(&atomic->mut);
++ return v;
++}
++static inline int atomic_dec(atomic_t *atomic)
++{
++ int v;
++
++ pthread_mutex_lock(&atomic->mut);
++ v = --(atomic->val);
++ pthread_mutex_unlock(&atomic->mut);
++ return v;
++}
++static inline void atomic_init(atomic_t *atomic)
++{
++ pthread_mutex_init(&atomic->mut, NULL);
++ atomic->val = 0;
++}
+ #else
+ typedef struct {
+ sem_t sem;
+@@ -103,7 +128,14 @@ static inline void fastlock_release(fastlock_t *lock)
+ if (__sync_sub_and_fetch(&lock->cnt, 1) > 0)
+ sem_post(&lock->sem);
+ }
++
++typedef struct { volatile int val; } atomic_t;
++#define atomic_inc(v) (__sync_add_and_fetch(&(v)->val, 1))
++#define atomic_dec(v) (__sync_sub_and_fetch(&(v)->val, 1))
++#define atomic_init(v) ((v)->val = 0)
+ #endif /* DEFINE_ATOMICS */
++#define atomic_get(v) ((v)->val)
++#define atomic_set(v, s) ((v)->val = s)
+
+ int ucma_max_qpsize(struct rdma_cm_id *id);
+ int ucma_complete(struct rdma_cm_id *id);
+diff --git a/src/preload.c b/src/preload.c
+index 4b891a1..b716e66 100644
+--- a/src/preload.c
++++ b/src/preload.c
+@@ -83,6 +83,7 @@ struct socket_calls {
+ int (*getsockopt)(int socket, int level, int optname,
+ void *optval, socklen_t *optlen);
+ int (*fcntl)(int socket, int cmd, ... /* arg */);
++ int (*dup2)(int oldfd, int newfd);
+ };
+
+ static struct socket_calls real;
+@@ -105,6 +106,8 @@ enum fd_type {
+ struct fd_info {
+ enum fd_type type;
+ int fd;
++ struct fd_info *dupfdi;
++ atomic_t refcnt;
+ };
+
+ static int fd_open(void)
+@@ -122,6 +125,8 @@ static int fd_open(void)
+ goto err1;
+ }
+
++ atomic_init(&fdi->refcnt);
++ atomic_set(&fdi->refcnt, 1);
+ pthread_mutex_lock(&mut);
+ ret = idm_set(&idm, index, fdi);
+ pthread_mutex_unlock(&mut);
+@@ -252,6 +257,7 @@ static void init_preload(void)
+ real.setsockopt = dlsym(RTLD_NEXT, "setsockopt");
+ real.getsockopt = dlsym(RTLD_NEXT, "getsockopt");
+ real.fcntl = dlsym(RTLD_NEXT, "fcntl");
++ real.dup2 = dlsym(RTLD_NEXT, "dup2");
+
+ rs.socket = dlsym(RTLD_DEFAULT, "rsocket");
+ rs.bind = dlsym(RTLD_DEFAULT, "rbind");
+@@ -887,9 +893,44 @@ int fcntl(int socket, int cmd, ... /* arg */)
+ return ret;
+ }
+
++/*
++ * dup2 is not thread safe
++ */
+ int dup2(int oldfd, int newfd)
+ {
+- int fd;
+- return (fd_get(oldfd, &fd) == fd_rsocket) ?
+- : dup2(oldfd, newfd);
++ struct fd_info *oldfdi, *newfdi;
++ int ret;
++
++ oldfdi = idm_lookup(&idm, oldfd);
++ newfdi = idm_lookup(&idm, newfd);
++ if (newfdi) {
++ /* newfd cannot have been dup'ed directly */
++ if (atomic_get(&newfdi->refcnt) > 1)
++ return ERR(EBUSY);
++ close(newfd);
++ }
++
++ ret = real.dup2(oldfd, newfd);
++ if (!oldfdi || ret != newfd)
++ return ret;
++
++ newfdi = calloc(1, sizeof *fdi);
++ if (!newfdi) {
++ close(newfd);
++ return ERR(ENOMEM);
++ }
++
++ pthread_mutex_lock(&mut);
++ idm_set(&idm, newfd, newfdi);
++ pthread_mutex_unlock(&mut);
++
++ if (oldfdi->dupfdi)
++ oldfdi = oldfdi->dupfdi;
++ newfdi->fd = oldfdi->fd;
++ newfdi->type = oldfdi->type;
++ newfdi->dupfdi = oldfdi;
++ atomic_init(&newfdi->refcnt);
++ atomic_set(&newfdi->refcnt, 1);
++ atomic_inc(&oldfdi->refcnt);
++ return newfd;
+ }