nsenter.c 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. // +build cgo
  2. //
  3. // formated with indent -linux nsenter.c
  4. #include <errno.h>
  5. #include <fcntl.h>
  6. #include <linux/limits.h>
  7. #include <linux/sched.h>
  8. #include <signal.h>
  9. #include <stdio.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include <sys/prctl.h>
  13. #include <sys/types.h>
  14. #include <unistd.h>
  15. #include <getopt.h>
  16. #define pr_perror(fmt, ...) fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__)
  17. static const kBufSize = 256;
  18. static const char *kNsEnter = "nsenter";
  19. void get_args(int *argc, char ***argv)
  20. {
  21. // Read argv
  22. int fd = open("/proc/self/cmdline", O_RDONLY);
  23. if (fd < 0) {
  24. pr_perror("Unable to open /proc/self/cmdline");
  25. exit(1);
  26. }
  27. // Read the whole commandline.
  28. ssize_t contents_size = 0;
  29. ssize_t contents_offset = 0;
  30. char *contents = NULL;
  31. ssize_t bytes_read = 0;
  32. do {
  33. contents_size += kBufSize;
  34. contents = (char *)realloc(contents, contents_size);
  35. bytes_read =
  36. read(fd, contents + contents_offset,
  37. contents_size - contents_offset);
  38. if (bytes_read < 0) {
  39. pr_perror("Unable to read from /proc/self/cmdline");
  40. exit(1);
  41. }
  42. contents_offset += bytes_read;
  43. }
  44. while (bytes_read > 0);
  45. close(fd);
  46. // Parse the commandline into an argv. /proc/self/cmdline has \0 delimited args.
  47. ssize_t i;
  48. *argc = 0;
  49. for (i = 0; i < contents_offset; i++) {
  50. if (contents[i] == '\0') {
  51. (*argc)++;
  52. }
  53. }
  54. *argv = (char **)malloc(sizeof(char *) * ((*argc) + 1));
  55. int idx;
  56. for (idx = 0; idx < (*argc); idx++) {
  57. (*argv)[idx] = contents;
  58. contents += strlen(contents) + 1;
  59. }
  60. (*argv)[*argc] = NULL;
  61. }
  62. // Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12)
  63. #if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
  64. #define _GNU_SOURCE
  65. #include <sched.h>
  66. #include "syscall.h"
  67. #ifdef SYS_setns
  68. int setns(int fd, int nstype)
  69. {
  70. return syscall(SYS_setns, fd, nstype);
  71. }
  72. #endif
  73. #endif
  74. void print_usage()
  75. {
  76. fprintf(stderr,
  77. "nsenter --nspid <pid> --console <console> -- cmd1 arg1 arg2...\n");
  78. }
  79. void nsenter()
  80. {
  81. int argc, c;
  82. char **argv;
  83. get_args(&argc, &argv);
  84. // check argv 0 to ensure that we are supposed to setns
  85. // we use strncmp to test for a value of "nsenter" but also allows alternate implmentations
  86. // after the setns code path to continue to use the argv 0 to determine actions to be run
  87. // resulting in the ability to specify "nsenter-mknod", "nsenter-exec", etc...
  88. if (strncmp(argv[0], kNsEnter, strlen(kNsEnter)) != 0) {
  89. return;
  90. }
  91. #ifdef PR_SET_CHILD_SUBREAPER
  92. if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) == -1) {
  93. pr_perror("Failed to set child subreaper");
  94. exit(1);
  95. }
  96. #endif
  97. static const struct option longopts[] = {
  98. {"nspid", required_argument, NULL, 'n'},
  99. {"console", required_argument, NULL, 't'},
  100. {NULL, 0, NULL, 0}
  101. };
  102. pid_t init_pid = -1;
  103. char *init_pid_str = NULL;
  104. char *console = NULL;
  105. while ((c = getopt_long_only(argc, argv, "n:c:", longopts, NULL)) != -1) {
  106. switch (c) {
  107. case 'n':
  108. init_pid_str = optarg;
  109. break;
  110. case 't':
  111. console = optarg;
  112. break;
  113. }
  114. }
  115. if (init_pid_str == NULL) {
  116. print_usage();
  117. exit(1);
  118. }
  119. init_pid = strtol(init_pid_str, NULL, 10);
  120. if ((init_pid == 0 && errno == EINVAL) || errno == ERANGE) {
  121. pr_perror("Failed to parse PID from \"%s\" with output \"%d\"",
  122. init_pid_str, init_pid);
  123. print_usage();
  124. exit(1);
  125. }
  126. argc -= 3;
  127. argv += 3;
  128. if (setsid() == -1) {
  129. pr_perror("setsid failed");
  130. exit(1);
  131. }
  132. // before we setns we need to dup the console
  133. int consolefd = -1;
  134. if (console != NULL) {
  135. consolefd = open(console, O_RDWR);
  136. if (consolefd < 0) {
  137. pr_perror("Failed to open console %s", console);
  138. exit(1);
  139. }
  140. }
  141. // blocking until the parent placed the process inside correct cgroups.
  142. unsigned char s;
  143. if (read(3, &s, 1) != 1 || s != '1') {
  144. pr_perror("failed to receive synchronization data from parent");
  145. exit(1);
  146. }
  147. // Setns on all supported namespaces.
  148. char ns_dir[PATH_MAX];
  149. memset(ns_dir, 0, PATH_MAX);
  150. snprintf(ns_dir, PATH_MAX - 1, "/proc/%d/ns/", init_pid);
  151. int ns_dir_fd;
  152. ns_dir_fd = open(ns_dir, O_RDONLY | O_DIRECTORY);
  153. if (ns_dir_fd < 0) {
  154. pr_perror("Unable to open %s", ns_dir);
  155. exit(1);
  156. }
  157. char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" };
  158. const int num = sizeof(namespaces) / sizeof(char *);
  159. int i;
  160. for (i = 0; i < num; i++) {
  161. // A zombie process has links on namespaces, but they can't be opened
  162. struct stat st;
  163. if (fstatat(ns_dir_fd, namespaces[i], &st, AT_SYMLINK_NOFOLLOW)
  164. == -1) {
  165. if (errno == ENOENT)
  166. continue;
  167. pr_perror("Failed to stat ns file %s for ns %s",
  168. ns_dir, namespaces[i]);
  169. exit(1);
  170. }
  171. int fd = openat(ns_dir_fd, namespaces[i], O_RDONLY);
  172. if (fd == -1) {
  173. pr_perror("Failed to open ns file %s for ns %s",
  174. ns_dir, namespaces[i]);
  175. exit(1);
  176. }
  177. // Set the namespace.
  178. if (setns(fd, 0) == -1) {
  179. pr_perror("Failed to setns for %s", namespaces[i]);
  180. exit(1);
  181. }
  182. close(fd);
  183. }
  184. close(ns_dir_fd);
  185. // We must fork to actually enter the PID namespace.
  186. int child = fork();
  187. if (child == -1) {
  188. pr_perror("Unable to fork a process");
  189. exit(1);
  190. }
  191. if (child == 0) {
  192. if (consolefd != -1) {
  193. if (dup2(consolefd, STDIN_FILENO) != 0) {
  194. pr_perror("Failed to dup 0");
  195. exit(1);
  196. }
  197. if (dup2(consolefd, STDOUT_FILENO) != STDOUT_FILENO) {
  198. pr_perror("Failed to dup 1");
  199. exit(1);
  200. }
  201. if (dup2(consolefd, STDERR_FILENO) != STDERR_FILENO) {
  202. pr_perror("Failed to dup 2\n");
  203. exit(1);
  204. }
  205. }
  206. // Finish executing, let the Go runtime take over.
  207. return;
  208. } else {
  209. // Parent, wait for the child.
  210. int status = 0;
  211. if (waitpid(child, &status, 0) == -1) {
  212. pr_perror("nsenter: Failed to waitpid with error");
  213. exit(1);
  214. }
  215. // Forward the child's exit code or re-send its death signal.
  216. if (WIFEXITED(status)) {
  217. exit(WEXITSTATUS(status));
  218. } else if (WIFSIGNALED(status)) {
  219. kill(getpid(), WTERMSIG(status));
  220. }
  221. exit(1);
  222. }
  223. return;
  224. }