[klibc] kinit: replacement for in-kernel do_mount, ipconfig, nfsroot kinit provides the default root-mounting code. It should be compatible with the in-kernel root-mounting code (modulo bugs); it also provides a few minor enhancements. Signed-off-by: H. Peter Anvin --- commit 30ba12d98c16e30f6d4d0e4b7c7971b15f4ac442 tree a93459e4cfee55139e966b1107fd0d1a8bf72232 parent a48648edf3e88b8b2c1de3cdb04ecc600f933041 author H. Peter Anvin Sun, 02 Jul 2006 12:26:08 -0700 committer H. Peter Anvin Sun, 02 Jul 2006 12:26:08 -0700 usr/kinit/Kbuild | 28 + usr/kinit/README | 9 usr/kinit/devname.c | 114 +++++ usr/kinit/do_mounts.c | 221 ++++++++++ usr/kinit/do_mounts.h | 48 ++ usr/kinit/do_mounts_md.c | 398 ++++++++++++++++++ usr/kinit/do_mounts_mtd.c | 44 ++ usr/kinit/fstype/Kbuild | 25 + usr/kinit/fstype/cramfs_fs.h | 85 ++++ usr/kinit/fstype/ext2_fs.h | 79 ++++ usr/kinit/fstype/ext3_fs.h | 92 ++++ usr/kinit/fstype/fstype.c | 296 ++++++++++++++ usr/kinit/fstype/fstype.h | 23 + usr/kinit/fstype/jfs_superblock.h | 114 +++++ usr/kinit/fstype/luks_fs.h | 44 ++ usr/kinit/fstype/lvm2_sb.h | 18 + usr/kinit/fstype/main.c | 58 +++ usr/kinit/fstype/minix_fs.h | 85 ++++ usr/kinit/fstype/reiserfs_fs.h | 69 +++ usr/kinit/fstype/romfs_fs.h | 56 +++ usr/kinit/fstype/swap_fs.h | 18 + usr/kinit/fstype/xfs_sb.h | 16 + usr/kinit/getarg.c | 57 +++ usr/kinit/getintfile.c | 30 + usr/kinit/initrd.c | 199 +++++++++ usr/kinit/ipconfig/Kbuild | 31 + usr/kinit/ipconfig/README | 103 +++++ usr/kinit/ipconfig/bootp_packet.h | 34 ++ usr/kinit/ipconfig/bootp_proto.c | 213 ++++++++++ usr/kinit/ipconfig/bootp_proto.h | 8 usr/kinit/ipconfig/dhcp_proto.c | 212 ++++++++++ usr/kinit/ipconfig/dhcp_proto.h | 18 + usr/kinit/ipconfig/ipconfig.h | 35 ++ usr/kinit/ipconfig/main.c | 758 +++++++++++++++++++++++++++++++++++ usr/kinit/ipconfig/netdev.c | 251 ++++++++++++ usr/kinit/ipconfig/netdev.h | 83 ++++ usr/kinit/ipconfig/packet.c | 286 +++++++++++++ usr/kinit/ipconfig/packet.h | 9 usr/kinit/kinit.c | 330 +++++++++++++++ usr/kinit/kinit.h | 73 +++ usr/kinit/name_to_dev.c | 202 +++++++++ usr/kinit/nfsmount/Kbuild | 27 + usr/kinit/nfsmount/README.locking | 26 + usr/kinit/nfsmount/dummypmap.c | 188 +++++++++ usr/kinit/nfsmount/dummypmap.h | 13 + usr/kinit/nfsmount/dummypmap_test.c | 2 usr/kinit/nfsmount/main.c | 263 ++++++++++++ usr/kinit/nfsmount/mount.c | 357 ++++++++++++++++ usr/kinit/nfsmount/nfsmount.h | 39 ++ usr/kinit/nfsmount/portmap.c | 73 +++ usr/kinit/nfsmount/sunrpc.c | 253 ++++++++++++ usr/kinit/nfsmount/sunrpc.h | 99 +++++ usr/kinit/nfsroot.c | 113 +++++ usr/kinit/open.c | 18 + usr/kinit/ramdisk_load.c | 271 +++++++++++++ usr/kinit/readfile.c | 86 ++++ usr/kinit/resume.c | 75 +++ usr/kinit/run-init/Kbuild | 25 + usr/kinit/run-init/run-init.c | 95 ++++ usr/kinit/run-init/run-init.h | 38 ++ usr/kinit/run-init/runinitlib.c | 216 ++++++++++ usr/kinit/xpio.c | 51 ++ usr/kinit/xpio.h | 11 + 63 files changed, 7211 insertions(+), 0 deletions(-) diff --git a/usr/kinit/Kbuild b/usr/kinit/Kbuild new file mode 100644 index 0000000..db79dfd --- /dev/null +++ b/usr/kinit/Kbuild @@ -0,0 +1,28 @@ +# +# Kbuild file for kinit +# + +static-y := kinit +kinit-y := kinit.o do_mounts.o ramdisk_load.o initrd.o +kinit-y += name_to_dev.o devname.o +kinit-y += getarg.o getintfile.o open.o readfile.o xpio.o +kinit-y += do_mounts_md.o do_mounts_mtd.o nfsroot.o resume.o + +kinit-y += ipconfig/ +kinit-y += nfsmount/ +kinit-y += run-init/ +kinit-y += fstype/ + +# Additional include paths files +KLIBCCFLAGS += -I$(srctree)/$(src)/ipconfig \ + -I$(srctree)/$(src)/nfsmount \ + -I$(srctree)/$(src)/run-init \ + -I$(srctree)/$(src)/fstype + +# Cleaning +targets := kinit kinit.g +subdir- := ipconfig nfsmount run-init fstype + + +# install binary +install-y := kinit diff --git a/usr/kinit/README b/usr/kinit/README new file mode 100644 index 0000000..fa7f645 --- /dev/null +++ b/usr/kinit/README @@ -0,0 +1,9 @@ +kinit - tiny init program +------------------------- + +This program is intended for use as /sbin/init in an initramfs +environment. It currently replaces the kernel's ipconfig and nfsroot +code. + +-- +Bryan O'Sullivan (2003/05/05) diff --git a/usr/kinit/devname.c b/usr/kinit/devname.c new file mode 100644 index 0000000..c09be46 --- /dev/null +++ b/usr/kinit/devname.c @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include +#include +#include + +/* + * devname.c + * + * Print the name of a block device. + */ +#define BUF_SIZE 512 + +static int scansysdir(char *namebuf, char *sysdir, dev_t dev) +{ + char *dirtailptr = strchr(sysdir, '\0'); + DIR *dir; + int done = 0; + struct dirent *de; + char *systail; + FILE *sysdev; + unsigned long ma, mi; + char *ep; + ssize_t rd; + + if (!(dir = opendir(sysdir))) + return 0; + + *dirtailptr++ = '/'; + + while (!done && (de = readdir(dir))) { + /* Assume if we see a dot-name in sysfs it's special */ + if (de->d_name[0] == '.') + continue; + + if (de->d_type != DT_UNKNOWN && de->d_type != DT_DIR) + continue; + + if (strlen(de->d_name) >= + (BUF_SIZE - 64) - (dirtailptr - sysdir)) + continue; /* Badness... */ + + strcpy(dirtailptr, de->d_name); + systail = strchr(sysdir, '\0'); + + strcpy(systail, "/dev"); + if (!(sysdev = fopen(sysdir, "r"))) + continue; + + /* Abusing the namebuf as temporary storage here. */ + rd = fread(namebuf, 1, BUF_SIZE, sysdev); + namebuf[rd] = '\0'; /* Just in case... */ + + fclose(sysdev); + + ma = strtoul(namebuf, &ep, 10); + if (ma != major(dev) || *ep != ':') + continue; + + mi = strtoul(ep + 1, &ep, 10); + if (*ep != '\n') + continue; + + if (mi == minor(dev)) { + /* Found it! */ + strcpy(namebuf, de->d_name); + done = 1; + } else { + /* we have a major number match, scan for partitions */ + *systail = '\0'; + done = scansysdir(namebuf, sysdir, dev); + } + } + + closedir(dir); + return done; +} + +const char *bdevname(dev_t dev) +{ + static char buf[BUF_SIZE]; + char sysdir[BUF_SIZE]; + char *p; + + strcpy(sysdir, "/sys/block"); + + if (!scansysdir(buf, sysdir, dev)) + strcpy(buf, "dev"); /* prints e.g. dev(3,5) */ + + p = strchr(buf, '\0'); + snprintf(p, sizeof buf - (p - buf), "(%d,%d)", major(dev), minor(dev)); + + return buf; +} + +#ifdef TEST_DEVNAME /* Standalone test */ + +int main(int argc, char *argv[]) +{ + dev_t dev; + int i; + + for (i = 1; i < argc; i++) { + dev = strtoul(argv[i], NULL, 0); + + printf("0x%08x = %s\n", (unsigned int)dev, bdevname(dev)); + } + + return 0; +} + +#endif /* TEST */ diff --git a/usr/kinit/do_mounts.c b/usr/kinit/do_mounts.c new file mode 100644 index 0000000..9daa006 --- /dev/null +++ b/usr/kinit/do_mounts.c @@ -0,0 +1,221 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "do_mounts.h" +#include "kinit.h" +#include "fstype.h" +#include "zlib.h" + +/* Create the device node "name" */ +int create_dev(const char *name, dev_t dev) +{ + unlink(name); + return mknod(name, S_IFBLK | 0600, dev); +} + +/* mount a filesystem, possibly trying a set of different types */ +const char *mount_block(const char *source, const char *target, + const char *type, unsigned long flags, + const void *data) +{ + char *fslist, *p, *ep; + const char *rp; + ssize_t fsbytes; + int fd; + + if (type) { + DEBUG(("kinit: trying to mount %s on %s with type %s\n", + source, target, type)); + int rv = mount(source, target, type, flags, data); + /* Mount readonly if necessary */ + if (rv == -1 && errno == EACCES && !(flags & MS_RDONLY)) + rv = mount(source, target, type, flags | MS_RDONLY, + data); + return rv ? NULL : type; + } + + /* If no type given, try to identify the type first; this + also takes care of specific ordering requirements, like + ext3 before ext2... */ + fd = open(source, O_RDONLY); + if (fd >= 0) { + int err = identify_fs(fd, &type, NULL, 0); + close(fd); + + if (!err && type) { + DEBUG(("kinit: %s appears to be a %s filesystem\n", + source, type)); + type = mount_block(source, target, type, flags, data); + if (type) + return type; + } + } + + DEBUG(("kinit: failed to identify filesystem %s, trying all\n", + source)); + + fsbytes = readfile("/proc/filesystems", &fslist); + + errno = EINVAL; + if (fsbytes < 0) + return NULL; + + p = fslist; + ep = fslist + fsbytes; + + rp = NULL; + + while (p < ep) { + type = p; + p = strchr(p, '\n'); + if (!p) + break; + *p++ = '\0'; + if (*type != '\t') /* We can't mount a block device as a "nodev" fs */ + continue; + + type++; + rp = mount_block(source, target, type, flags, data); + if (rp) + break; + if (errno != EINVAL) + break; + } + + free(fslist); + return rp; +} + +/* mount the root filesystem from a block device */ +static int +mount_block_root(int argc, char *argv[], dev_t root_dev, + const char *type, unsigned long flags) +{ + const char *data, *rp; + + data = get_arg(argc, argv, "rootflags="); + create_dev("/dev/root", root_dev); + + errno = 0; + + if (type) { + if ((rp = mount_block("/dev/root", "/root", type, flags, data))) + goto ok; + if (errno != EINVAL) + goto bad; + } + + if (!errno + && (rp = mount_block("/dev/root", "/root", NULL, flags, data))) + goto ok; + + bad: + if (errno != EINVAL) { + /* + * Allow the user to distinguish between failed open + * and bad superblock on root device. + */ + fprintf(stderr, "%s: Cannot open root device %s\n", + progname, bdevname(root_dev)); + return -errno; + } else { + fprintf(stderr, "%s: Unable to mount root fs on device %s\n", + progname, bdevname(root_dev)); + return -ESRCH; + } + + ok: + printf("%s: Mounted root (%s filesystem)%s.\n", + progname, rp, (flags & MS_RDONLY) ? " readonly" : ""); + return 0; +} + +int +mount_root(int argc, char *argv[], dev_t root_dev, const char *root_dev_name) +{ + unsigned long flags = MS_RDONLY | MS_VERBOSE; + int ret; + const char *type = get_arg(argc, argv, "rootfstype="); + + if (get_flag(argc, argv, "rw") > get_flag(argc, argv, "ro")) { + DEBUG(("kinit: mounting root rw\n")); + flags &= ~MS_RDONLY; + } + + if (type) { + if (!strcmp(type, "nfs")) + root_dev = Root_NFS; + else if (!strcmp(type, "jffs2") && !major(root_dev)) + root_dev = Root_MTD; + } + + switch (root_dev) { + case Root_NFS: + ret = mount_nfs_root(argc, argv, flags); + break; + case Root_MTD: + ret = mount_mtd_root(argc, argv, root_dev_name, type, flags); + break; + default: + ret = mount_block_root(argc, argv, root_dev, type, flags); + break; + } + + if (!ret) + chdir("/root"); + + return ret; +} + +int do_mounts(int argc, char *argv[]) +{ + const char *root_dev_name = get_arg(argc, argv, "root="); + const char *root_delay = get_arg(argc, argv, "rootdelay="); + const char *load_ramdisk = get_arg(argc, argv, "load_ramdisk="); + dev_t root_dev = 0; + + DEBUG(("kinit: do_mounts\n")); + + if (root_delay) { + int delay = atoi(root_delay); + fprintf(stderr, "Waiting %d s before mounting root device...\n", + delay); + sleep(delay); + } + + md_run(argc, argv); + + if (root_dev_name) { + root_dev = name_to_dev_t(root_dev_name); + } else if (get_arg(argc, argv, "nfsroot=") || + get_arg(argc, argv, "nfsaddrs=")) { + root_dev = Root_NFS; + } else { + long rootdev; + getintfile("/proc/sys/kernel/real-root-dev", &rootdev); + root_dev = (dev_t) rootdev; + } + + DEBUG(("kinit: root_dev = %s\n", bdevname(root_dev))); + + if (initrd_load(argc, argv, root_dev)) { + DEBUG(("initrd loaded\n")); + return 0; + } + + if (load_ramdisk && atoi(load_ramdisk)) { + if (ramdisk_load(argc, argv, root_dev)) + root_dev = Root_RAM0; + } + + return mount_root(argc, argv, root_dev, root_dev_name); +} diff --git a/usr/kinit/do_mounts.h b/usr/kinit/do_mounts.h new file mode 100644 index 0000000..2024e86 --- /dev/null +++ b/usr/kinit/do_mounts.h @@ -0,0 +1,48 @@ +/* + * do_mounts.h + */ + +#ifndef DO_MOUNTS_H +#define DO_MOUNTS_H + +#include +#include +#include + +#define Root_RAM0 __makedev(1,0) + +/* These device numbers are only used internally */ +#define Root_NFS __makedev(0,255) +#define Root_MTD __makedev(0,254) + +int create_dev(const char *name, dev_t dev); + +dev_t name_to_dev_t(const char *name); + +const char *mount_block(const char *source, const char *target, + const char *type, unsigned long flags, + const void *data); + +int mount_root(int argc, char *argv[], dev_t root_dev, + const char *root_dev_name); + +int mount_mtd_root(int argc, char *argv[], const char *root_dev_name, + const char *type, unsigned long flags); + +int do_mounts(int argc, char *argv[]); + +int initrd_load(int argc, char *argv[], dev_t root_dev); + +static inline dev_t bstat(const char *name) +{ + struct stat st; + + if (stat(name, &st) || !S_ISBLK(st.st_mode)) + return 0; + return st.st_rdev; +} + +int load_ramdisk_compressed(const char *devpath, FILE * wfd, + off_t ramdisk_start); + +#endif /* DO_MOUNTS_H */ diff --git a/usr/kinit/do_mounts_md.c b/usr/kinit/do_mounts_md.c new file mode 100644 index 0000000..b386180 --- /dev/null +++ b/usr/kinit/do_mounts_md.c @@ -0,0 +1,398 @@ +/* + * usr/kinit/do_mounts_md.c + * + * Handle autoconfiguration of md devices. This is ugly, partially since + * it still relies on a sizable kernel component. + * + * This file is derived from the Linux kernel. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "do_mounts.h" + +#define LEVEL_NONE (-1000000) + +/* + * When md (and any require personalities) are compiled into the kernel + * (not a module), arrays can be assembles are boot time using with AUTODETECT + * where specially marked partitions are registered with md_autodetect_dev(), + * and with MD_BOOT where devices to be collected are given on the boot line + * with md=..... + * The code for that is here. + */ + +static int raid_noautodetect, raid_autopart; + +static struct { + int minor; + int partitioned; + int level; + int chunk; + char *device_names; +} md_setup_args[MAX_MD_DEVS]; + +static int md_setup_ents; + +/** + * get_option - Parse integer from an option string + * @str: option string + * @pint: (output) integer value parsed from @str + * + * Read an int from an option string; if available accept a subsequent + * comma as well. + * + * Return values: + * 0 : no int in string + * 1 : int found, no subsequent comma + * 2 : int found including a subsequent comma + */ + +static int get_option(char **str, int *pint) +{ + char *cur = *str; + + if (!cur || !(*cur)) + return 0; + *pint = strtol(cur, str, 0); + if (cur == *str) + return 0; + if (**str == ',') { + (*str)++; + return 2; + } + + return 1; +} + +/* + * Find the partitioned md device major number... of course this *HAD* + * to be done dynamically instead of using a registered number. + * Sigh. Double sigh. + */ +static int mdp_major(void) +{ + static int found = 0; + FILE *f; + char line[512], *p; + int is_blk, major_no; + + if (found) + return found; + + f = fopen("/proc/devices", "r"); + is_blk = 0; + while (fgets(line, sizeof line, f)) { + if (!strcmp(line, "Block devices:\n")) + is_blk = 1; + if (is_blk) { + major_no = strtol(line, &p, 10); + while (*p && isspace(*p)) + p++; + + if (major_no == 0) /* Not a number */ + is_blk = 0; + else if (major_no > 0 && !strcmp(p, "mdp")) { + found = major_no; + break; + } + } + } + fclose(f); + + if (!found) { + fprintf(stderr, + "Error: mdp devices detected but no mdp device found!\n"); + exit(1); + } + + return found; +} + +/* + * Parse the command-line parameters given our kernel, but do not + * actually try to invoke the MD device now; that is handled by + * md_setup_drive after the low-level disk drivers have initialised. + * + * 27/11/1999: Fixed to work correctly with the 2.3 kernel (which + * assigns the task of parsing integer arguments to the + * invoked program now). Added ability to initialise all + * the MD devices (by specifying multiple "md=" lines) + * instead of just one. -- KTK + * 18May2000: Added support for persistent-superblock arrays: + * md=n,0,factor,fault,device-list uses RAID0 for device n + * md=n,-1,factor,fault,device-list uses LINEAR for device n + * md=n,device-list reads a RAID superblock from the devices + * elements in device-list are read by name_to_kdev_t so can be + * a hex number or something like /dev/hda1 /dev/sdb + * 2001-06-03: Dave Cinege + * Shifted name_to_kdev_t() and related operations to md_set_drive() + * for later execution. Rewrote section to make devfs compatible. + */ +static int md_setup(char *str) +{ + int minor, level, factor, fault, partitioned = 0; + char *pername = ""; + char *str1; + int ent; + + if (*str == 'd') { + partitioned = 1; + str++; + } + if (get_option(&str, &minor) != 2) { /* MD Number */ + fprintf(stderr, "md: Too few arguments supplied to md=.\n"); + return 0; + } + str1 = str; + if (minor >= MAX_MD_DEVS) { + fprintf(stderr, "md: md=%d, Minor device number too high.\n", + minor); + return 0; + } + for (ent = 0; ent < md_setup_ents; ent++) + if (md_setup_args[ent].minor == minor && + md_setup_args[ent].partitioned == partitioned) { + fprintf(stderr, + "md: md=%s%d, Specified more than once. " + "Replacing previous definition.\n", + partitioned ? "d" : "", minor); + break; + } + if (ent >= MAX_MD_DEVS) { + fprintf(stderr, "md: md=%s%d - too many md initialisations\n", + partitioned ? "d" : "", minor); + return 0; + } + if (ent >= md_setup_ents) + md_setup_ents++; + switch (get_option(&str, &level)) { /* RAID level */ + case 2: /* could be 0 or -1.. */ + if (level == 0 || level == LEVEL_LINEAR) { + if (get_option(&str, &factor) != 2 || /* Chunk Size */ + get_option(&str, &fault) != 2) { + fprintf(stderr, + "md: Too few arguments supplied to md=.\n"); + return 0; + } + md_setup_args[ent].level = level; + md_setup_args[ent].chunk = 1 << (factor + 12); + if (level == LEVEL_LINEAR) + pername = "linear"; + else + pername = "raid0"; + break; + } + /* FALL THROUGH */ + case 1: /* the first device is numeric */ + str = str1; + /* FALL THROUGH */ + case 0: + md_setup_args[ent].level = LEVEL_NONE; + pername = "super-block"; + } + + fprintf(stderr, "md: Will configure md%s%d (%s) from %s, below.\n", + partitioned?"_d":"", minor, pername, str); + md_setup_args[ent].device_names = str; + md_setup_args[ent].partitioned = partitioned; + md_setup_args[ent].minor = minor; + + return 1; +} + +#define MdpMinorShift 6 + +static void md_setup_drive(void) +{ + int dev_minor, i, ent, partitioned; + dev_t dev; + dev_t devices[MD_SB_DISKS + 1]; + + for (ent = 0; ent < md_setup_ents; ent++) { + int fd; + int err = 0; + char *devname; + mdu_disk_info_t dinfo; + char name[16]; + + dev_minor = md_setup_args[ent].minor; + partitioned = md_setup_args[ent].partitioned; + devname = md_setup_args[ent].device_names; + + snprintf(name, sizeof name, + "/dev/md%s%d", partitioned ? "_d" : "", dev_minor); + + if (partitioned) + dev = makedev(mdp_major(), dev_minor << MdpMinorShift); + else + dev = makedev(MD_MAJOR, dev_minor); + create_dev(name, dev); + for (i = 0; i < MD_SB_DISKS && devname != 0; i++) { + char *p; + + p = strchr(devname, ','); + if (p) + *p++ = 0; + + dev = name_to_dev_t(devname); + if (!dev) { + fprintf(stderr, "md: Unknown device name: %s\n", + devname); + break; + } + + devices[i] = dev; + + devname = p; + } + devices[i] = 0; + + if (!i) + continue; + + fprintf(stderr, "md: Loading md%s%d: %s\n", + partitioned ? "_d" : "", dev_minor, + md_setup_args[ent].device_names); + + fd = open(name, 0, 0); + if (fd < 0) { + fprintf(stderr, "md: open failed - cannot start " + "array %s\n", name); + continue; + } + if (ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) { + fprintf(stderr, + "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n", + dev_minor); + close(fd); + continue; + } + + if (md_setup_args[ent].level != LEVEL_NONE) { + /* non-persistent */ + mdu_array_info_t ainfo; + ainfo.level = md_setup_args[ent].level; + ainfo.size = 0; + ainfo.nr_disks = 0; + ainfo.raid_disks = 0; + while (devices[ainfo.raid_disks]) + ainfo.raid_disks++; + ainfo.md_minor = dev_minor; + ainfo.not_persistent = 1; + + ainfo.state = (1 << MD_SB_CLEAN); + ainfo.layout = 0; + ainfo.chunk_size = md_setup_args[ent].chunk; + err = ioctl(fd, SET_ARRAY_INFO, &ainfo); + for (i = 0; !err && i <= MD_SB_DISKS; i++) { + dev = devices[i]; + if (!dev) + break; + dinfo.number = i; + dinfo.raid_disk = i; + dinfo.state = + (1 << MD_DISK_ACTIVE) | (1 << MD_DISK_SYNC); + dinfo.major = major(dev); + dinfo.minor = minor(dev); + err = ioctl(fd, ADD_NEW_DISK, &dinfo); + } + } else { + /* persistent */ + for (i = 0; i <= MD_SB_DISKS; i++) { + dev = devices[i]; + if (!dev) + break; + dinfo.major = major(dev); + dinfo.minor = minor(dev); + ioctl(fd, ADD_NEW_DISK, &dinfo); + } + } + if (!err) + err = ioctl(fd, RUN_ARRAY, 0); + if (err) + fprintf(stderr, "md: starting md%d failed\n", + dev_minor); + else { + /* reread the partition table. + * I (neilb) and not sure why this is needed, but I cannot + * boot a kernel with devfs compiled in from partitioned md + * array without it + */ + close(fd); + fd = open(name, 0, 0); + ioctl(fd, BLKRRPART, 0); + } + close(fd); + } +} + +static int raid_setup(char *str) +{ + int len, pos; + + len = strlen(str) + 1; + pos = 0; + + while (pos < len) { + char *comma = strchr(str + pos, ','); + int wlen; + if (comma) + wlen = (comma - str) - pos; + else + wlen = (len - 1) - pos; + + if (!strncmp(str, "noautodetect", wlen)) + raid_noautodetect = 1; + if (strncmp(str, "partitionable", wlen) == 0) + raid_autopart = 1; + if (strncmp(str, "part", wlen) == 0) + raid_autopart = 1; + pos += wlen + 1; + } + return 1; +} + +static void md_run_setup(void) +{ + create_dev("/dev/md0", makedev(MD_MAJOR, 0)); + if (raid_noautodetect) + fprintf(stderr, + "md: Skipping autodetection of RAID arrays. (raid=noautodetect)\n"); + else { + int fd = open("/dev/md0", 0, 0); + if (fd >= 0) { + ioctl(fd, RAID_AUTORUN, + (void *)(intptr_t) raid_autopart); + close(fd); + } + } + md_setup_drive(); +} + +void md_run(int argc, char *argv[]) +{ + char **pp, *p; + + for (pp = argv; (p = *pp); pp++) { + if (!strncmp(p, "raid=", 5)) + raid_setup(p + 5); + else if (!strncmp(p, "md=", 3)) + md_setup(p + 3); + } + + md_run_setup(); +} diff --git a/usr/kinit/do_mounts_mtd.c b/usr/kinit/do_mounts_mtd.c new file mode 100644 index 0000000..a77bd18 --- /dev/null +++ b/usr/kinit/do_mounts_mtd.c @@ -0,0 +1,44 @@ +/* + * usr/kinit/do_mounts_mtd.c + * + * Mount an MTD device as a character device. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kinit.h" +#include "do_mounts.h" + +int mount_mtd_root(int argc, char *argv[], const char *root_dev_name, + const char *type, unsigned long flags) +{ + const char *data = get_arg(argc, argv, "rootflags="); + + if (!type) + type = "jffs2"; + + DEBUG(("Trying to mount MTD %s as root (%s filesystem)\n", + root_dev_name, type)); + + if (mount(root_dev_name, "/root", type, flags, data)) { + int err = errno; + fprintf(stderr, + "%s: Unable to mount MTD %s (%s filesystem) " + "as root: %s\n", + progname, root_dev_name, type, strerror(err)); + return -err; + } else { + fprintf(stderr, "%s: Mounted root (%s filesystem)%s.\n", + progname, type, (flags & MS_RDONLY) ? " readonly" : ""); + return 0; + } + +} diff --git a/usr/kinit/fstype/Kbuild b/usr/kinit/fstype/Kbuild new file mode 100644 index 0000000..9b20db1 --- /dev/null +++ b/usr/kinit/fstype/Kbuild @@ -0,0 +1,25 @@ +# +# Kbuild file for fstype +# + +static-y := static/fstype +shared-y := shared/fstype + +# common .o files +objs := main.o fstype.o + +# TODO - do we want a stripped version +# TODO - do we want the static.g + shared.g directories? + +# Create built-in.o with all object files (used by kinit) +lib-y := $(objs) + +# .o files used to built executables +static/fstype-y := $(objs) +shared/fstype-y := $(objs) + +# Cleaning +clean-dirs := static shared + +# install binary +install-y := $(shared-y) diff --git a/usr/kinit/fstype/cramfs_fs.h b/usr/kinit/fstype/cramfs_fs.h new file mode 100644 index 0000000..6f5ad4f --- /dev/null +++ b/usr/kinit/fstype/cramfs_fs.h @@ -0,0 +1,85 @@ +#ifndef __CRAMFS_H +#define __CRAMFS_H + +#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ +#define CRAMFS_SIGNATURE "Compressed ROMFS" + +/* + * Width of various bitfields in struct cramfs_inode. + * Primarily used to generate warnings in mkcramfs. + */ +#define CRAMFS_MODE_WIDTH 16 +#define CRAMFS_UID_WIDTH 16 +#define CRAMFS_SIZE_WIDTH 24 +#define CRAMFS_GID_WIDTH 8 +#define CRAMFS_NAMELEN_WIDTH 6 +#define CRAMFS_OFFSET_WIDTH 26 + +/* + * Since inode.namelen is a unsigned 6-bit number, the maximum cramfs + * path length is 63 << 2 = 252. + */ +#define CRAMFS_MAXPATHLEN (((1 << CRAMFS_NAMELEN_WIDTH) - 1) << 2) + +/* + * Reasonably terse representation of the inode data. + */ +struct cramfs_inode { + __u32 mode:CRAMFS_MODE_WIDTH, uid:CRAMFS_UID_WIDTH; + /* SIZE for device files is i_rdev */ + __u32 size:CRAMFS_SIZE_WIDTH, gid:CRAMFS_GID_WIDTH; + /* NAMELEN is the length of the file name, divided by 4 and + rounded up. (cramfs doesn't support hard links.) */ + /* OFFSET: For symlinks and non-empty regular files, this + contains the offset (divided by 4) of the file data in + compressed form (starting with an array of block pointers; + see README). For non-empty directories it is the offset + (divided by 4) of the inode of the first file in that + directory. For anything else, offset is zero. */ + __u32 namelen:CRAMFS_NAMELEN_WIDTH, offset:CRAMFS_OFFSET_WIDTH; +}; + +struct cramfs_info { + __u32 crc; + __u32 edition; + __u32 blocks; + __u32 files; +}; + +/* + * Superblock information at the beginning of the FS. + */ +struct cramfs_super { + __u32 magic; /* 0x28cd3d45 - random number */ + __u32 size; /* length in bytes */ + __u32 flags; /* feature flags */ + __u32 future; /* reserved for future use */ + __u8 signature[16]; /* "Compressed ROMFS" */ + struct cramfs_info fsid; /* unique filesystem info */ + __u8 name[16]; /* user-defined name */ + struct cramfs_inode root; /* root inode data */ +}; + +/* + * Feature flags + * + * 0x00000000 - 0x000000ff: features that work for all past kernels + * 0x00000100 - 0xffffffff: features that don't work for past kernels + */ +#define CRAMFS_FLAG_FSID_VERSION_2 0x00000001 /* fsid version #2 */ +#define CRAMFS_FLAG_SORTED_DIRS 0x00000002 /* sorted dirs */ +#define CRAMFS_FLAG_HOLES 0x00000100 /* support for holes */ +#define CRAMFS_FLAG_WRONG_SIGNATURE 0x00000200 /* reserved */ +#define CRAMFS_FLAG_SHIFTED_ROOT_OFFSET 0x00000400 /* shifted root fs */ + +/* + * Valid values in super.flags. Currently we refuse to mount + * if (flags & ~CRAMFS_SUPPORTED_FLAGS). Maybe that should be + * changed to test super.future instead. + */ +#define CRAMFS_SUPPORTED_FLAGS ( 0x000000ff \ + | CRAMFS_FLAG_HOLES \ + | CRAMFS_FLAG_WRONG_SIGNATURE \ + | CRAMFS_FLAG_SHIFTED_ROOT_OFFSET ) + +#endif diff --git a/usr/kinit/fstype/ext2_fs.h b/usr/kinit/fstype/ext2_fs.h new file mode 100644 index 0000000..293c5d6 --- /dev/null +++ b/usr/kinit/fstype/ext2_fs.h @@ -0,0 +1,79 @@ +/* + * The second extended file system magic number + */ +#define EXT2_SUPER_MAGIC 0xEF53 + +/* + * Structure of the super block + */ +struct ext2_super_block { + __le32 s_inodes_count; /* Inodes count */ + __le32 s_blocks_count; /* Blocks count */ + __le32 s_r_blocks_count; /* Reserved blocks count */ + __le32 s_free_blocks_count; /* Free blocks count */ + __le32 s_free_inodes_count; /* Free inodes count */ + __le32 s_first_data_block; /* First Data Block */ + __le32 s_log_block_size; /* Block size */ + __le32 s_log_frag_size; /* Fragment size */ + __le32 s_blocks_per_group; /* # Blocks per group */ + __le32 s_frags_per_group; /* # Fragments per group */ + __le32 s_inodes_per_group; /* # Inodes per group */ + __le32 s_mtime; /* Mount time */ + __le32 s_wtime; /* Write time */ + __le16 s_mnt_count; /* Mount count */ + __le16 s_max_mnt_count; /* Maximal mount count */ + __le16 s_magic; /* Magic signature */ + __le16 s_state; /* File system state */ + __le16 s_errors; /* Behaviour when detecting errors */ + __le16 s_minor_rev_level; /* minor revision level */ + __le32 s_lastcheck; /* time of last check */ + __le32 s_checkinterval; /* max. time between checks */ + __le32 s_creator_os; /* OS */ + __le32 s_rev_level; /* Revision level */ + __le16 s_def_resuid; /* Default uid for reserved blocks */ + __le16 s_def_resgid; /* Default gid for reserved blocks */ + /* + * These fields are for EXT2_DYNAMIC_REV superblocks only. + * + * Note: the difference between the compatible feature set and + * the incompatible feature set is that if there is a bit set + * in the incompatible feature set that the kernel doesn't + * know about, it should refuse to mount the filesystem. + * + * e2fsck's requirements are more strict; if it doesn't know + * about a feature in either the compatible or incompatible + * feature set, it must abort and not try to meddle with + * things it doesn't understand... + */ + __le32 s_first_ino; /* First non-reserved inode */ + __le16 s_inode_size; /* size of inode structure */ + __le16 s_block_group_nr; /* block group # of this superblock */ + __le32 s_feature_compat; /* compatible feature set */ + __le32 s_feature_incompat; /* incompatible feature set */ + __le32 s_feature_ro_compat; /* readonly-compatible feature set */ + __u8 s_uuid[16]; /* 128-bit uuid for volume */ + char s_volume_name[16]; /* volume name */ + char s_last_mounted[64]; /* directory where last mounted */ + __le32 s_algorithm_usage_bitmap; /* For compression */ + /* + * Performance hints. Directory preallocation should only + * happen if the EXT2_COMPAT_PREALLOC flag is on. + */ + __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate */ + __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ + __u16 s_padding1; + /* + * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set. + */ + __u8 s_journal_uuid[16]; /* uuid of journal superblock */ + __u32 s_journal_inum; /* inode number of journal file */ + __u32 s_journal_dev; /* device number of journal file */ + __u32 s_last_orphan; /* start of list of inodes to delete */ + __u32 s_hash_seed[4]; /* HTREE hash seed */ + __u8 s_def_hash_version; /* Default hash version to use */ + __u8 s_reserved_char_pad; + __u16 s_reserved_word_pad; + __le32 s_default_mount_opts; + __le32 s_first_meta_bg; /* First metablock block group */ + __u32 s_reserved[190]; /* Padding to the end of the block */ +}; diff --git a/usr/kinit/fstype/ext3_fs.h b/usr/kinit/fstype/ext3_fs.h new file mode 100644 index 0000000..cec1bdd --- /dev/null +++ b/usr/kinit/fstype/ext3_fs.h @@ -0,0 +1,92 @@ +/* + * The second extended file system magic number + */ +#define EXT3_SUPER_MAGIC 0xEF53 + +#define EXT3_FEATURE_COMPAT_HAS_JOURNAL 0x0004 + +/* + * Structure of the super block + */ +struct ext3_super_block { + /*00*/ __u32 s_inodes_count; + /* Inodes count */ + __u32 s_blocks_count; /* Blocks count */ + __u32 s_r_blocks_count; /* Reserved blocks count */ + __u32 s_free_blocks_count; /* Free blocks count */ + /*10*/ __u32 s_free_inodes_count; + /* Free inodes count */ + __u32 s_first_data_block; /* First Data Block */ + __u32 s_log_block_size; /* Block size */ + __s32 s_log_frag_size; /* Fragment size */ + /*20*/ __u32 s_blocks_per_group; + /* # Blocks per group */ + __u32 s_frags_per_group; /* # Fragments per group */ + __u32 s_inodes_per_group; /* # Inodes per group */ + __u32 s_mtime; /* Mount time */ + /*30*/ __u32 s_wtime; + /* Write time */ + __u16 s_mnt_count; /* Mount count */ + __s16 s_max_mnt_count; /* Maximal mount count */ + __u16 s_magic; /* Magic signature */ + __u16 s_state; /* File system state */ + __u16 s_errors; /* Behaviour when detecting errors */ + __u16 s_minor_rev_level; /* minor revision level */ + /*40*/ __u32 s_lastcheck; + /* time of last check */ + __u32 s_checkinterval; /* max. time between checks */ + __u32 s_creator_os; /* OS */ + __u32 s_rev_level; /* Revision level */ + /*50*/ __u16 s_def_resuid; + /* Default uid for reserved blocks */ + __u16 s_def_resgid; /* Default gid for reserved blocks */ + /* + * These fields are for EXT3_DYNAMIC_REV superblocks only. + * + * Note: the difference between the compatible feature set and + * the incompatible feature set is that if there is a bit set + * in the incompatible feature set that the kernel doesn't + * know about, it should refuse to mount the filesystem. + * + * e2fsck's requirements are more strict; if it doesn't know + * about a feature in either the compatible or incompatible + * feature set, it must abort and not try to meddle with + * things it doesn't understand... + */ + __u32 s_first_ino; /* First non-reserved inode */ + __u16 s_inode_size; /* size of inode structure */ + __u16 s_block_group_nr; /* block group # of this superblock */ + __u32 s_feature_compat; /* compatible feature set */ + /*60*/ __u32 s_feature_incompat; + /* incompatible feature set */ + __u32 s_feature_ro_compat; /* readonly-compatible feature set */ + /*68*/ __u8 s_uuid[16]; + /* 128-bit uuid for volume */ + /*78*/ char s_volume_name[16]; + /* volume name */ + /*88*/ char s_last_mounted[64]; + /* directory where last mounted */ + /*C8*/ __u32 s_algorithm_usage_bitmap; + /* For compression */ + /* + * Performance hints. Directory preallocation should only + * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on. + */ + __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate */ + __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ + __u16 s_padding1; + /* + * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set. + */ + /*D0*/ __u8 s_journal_uuid[16]; + /* uuid of journal superblock */ + /*E0*/ __u32 s_journal_inum; + /* inode number of journal file */ + __u32 s_journal_dev; /* device number of journal file */ + __u32 s_last_orphan; /* start of list of inodes to delete */ + __u32 s_hash_seed[4]; /* HTREE hash seed */ + __u8 s_def_hash_version; /* Default hash version to use */ + __u8 s_reserved_char_pad; + __u16 s_reserved_word_pad; + __u32 s_reserved[192]; /* Padding to the end of the block */ +}; diff --git a/usr/kinit/fstype/fstype.c b/usr/kinit/fstype/fstype.c new file mode 100644 index 0000000..cea219e --- /dev/null +++ b/usr/kinit/fstype/fstype.c @@ -0,0 +1,296 @@ +/* + * fstype.c, by rmk + * + * Detect filesystem type (on stdin) and output strings for two + * environment variables: + * FSTYPE - filesystem type + * FSSIZE - filesystem size (if known) + * + * We currently detect the filesystems listed below in the struct + * "imagetype images" (in the order they are listed). + * + * MINIX, ext3 and Reiserfs bits are currently untested. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define cpu_to_be32(x) __cpu_to_be32(x) /* Needed by romfs_fs.h */ + +#include "romfs_fs.h" +#include "cramfs_fs.h" +#include "minix_fs.h" +#include "ext2_fs.h" +#include "ext3_fs.h" +#include "xfs_sb.h" +#include "luks_fs.h" +#include "lvm2_sb.h" + +/* + * Slightly cleaned up version of jfs_superblock to + * avoid pulling in other kernel header files. + */ +#include "jfs_superblock.h" + +/* + * reiserfs_fs.h is too sick to include directly. + * Use a cleaned up version. + */ +#include "reiserfs_fs.h" + +#include "fstype.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +#define BLOCK_SIZE 1024 + +/* Swap needs the definition of block size */ +#include "swap_fs.h" + +static int gzip_image(const void *buf, unsigned long long *bytes) +{ + const unsigned char *p = buf; + + if (p[0] == 037 && (p[1] == 0213 || p[1] == 0236)) { + /* The length of a gzip stream can only be determined + by processing the whole stream */ + *bytes = 0ULL; + return 1; + } + return 0; +} + +static int cramfs_image(const void *buf, unsigned long long *bytes) +{ + const struct cramfs_super *sb = (const struct cramfs_super *)buf; + + if (sb->magic == CRAMFS_MAGIC) { + if (sb->flags & CRAMFS_FLAG_FSID_VERSION_2) + *bytes = (unsigned long long)sb->fsid.blocks << 10; + else + *bytes = 0; + return 1; + } + return 0; +} + +static int romfs_image(const void *buf, unsigned long long *bytes) +{ + const struct romfs_super_block *sb = + (const struct romfs_super_block *)buf; + + if (sb->word0 == ROMSB_WORD0 && sb->word1 == ROMSB_WORD1) { + *bytes = __be32_to_cpu(sb->size); + return 1; + } + return 0; +} + +static int minix_image(const void *buf, unsigned long long *bytes) +{ + const struct minix_super_block *sb = + (const struct minix_super_block *)buf; + + if (sb->s_magic == MINIX_SUPER_MAGIC || + sb->s_magic == MINIX_SUPER_MAGIC2) { + *bytes = (unsigned long long)sb->s_nzones + << (sb->s_log_zone_size + 10); + return 1; + } + return 0; +} + +static int ext3_image(const void *buf, unsigned long long *bytes) +{ + const struct ext3_super_block *sb = + (const struct ext3_super_block *)buf; + + if (sb->s_magic == __cpu_to_le16(EXT2_SUPER_MAGIC) && + sb-> + s_feature_compat & __cpu_to_le32(EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { + *bytes = (unsigned long long)__le32_to_cpu(sb->s_blocks_count) + << (10 + sb->s_log_block_size); + return 1; + } + return 0; +} + +static int ext2_image(const void *buf, unsigned long long *bytes) +{ + const struct ext2_super_block *sb = + (const struct ext2_super_block *)buf; + + if (sb->s_magic == __cpu_to_le16(EXT2_SUPER_MAGIC)) { + *bytes = (unsigned long long)__le32_to_cpu(sb->s_blocks_count) + << (10 + sb->s_log_block_size); + return 1; + } + return 0; +} + +static int reiserfs_image(const void *buf, unsigned long long *bytes) +{ + const struct reiserfs_super_block *sb = + (const struct reiserfs_super_block *)buf; + + if (memcmp(REISERFS_MAGIC(sb), REISERFS_SUPER_MAGIC_STRING, + sizeof(REISERFS_SUPER_MAGIC_STRING) - 1) == 0 || + memcmp(REISERFS_MAGIC(sb), REISER2FS_SUPER_MAGIC_STRING, + sizeof(REISER2FS_SUPER_MAGIC_STRING) - 1) == 0 || + memcmp(REISERFS_MAGIC(sb), REISER2FS_JR_SUPER_MAGIC_STRING, + sizeof(REISER2FS_JR_SUPER_MAGIC_STRING) - 1) == 0) { + *bytes = (unsigned long long)REISERFS_BLOCK_COUNT(sb) * + REISERFS_BLOCKSIZE(sb); + return 1; + } + return 0; +} + +static int xfs_image(const void *buf, unsigned long long *bytes) +{ + const struct xfs_sb *sb = (const struct xfs_sb *)buf; + + if (__be32_to_cpu(sb->sb_magicnum) == XFS_SB_MAGIC) { + *bytes = __be64_to_cpu(sb->sb_dblocks) * + __be32_to_cpu(sb->sb_blocksize); + return 1; + } + return 0; +} + +static int jfs_image(const void *buf, unsigned long long *bytes) +{ + const struct jfs_superblock *sb = (const struct jfs_superblock *)buf; + + if (!memcmp(sb->s_magic, JFS_MAGIC, 4)) { + *bytes = __le32_to_cpu(sb->s_size); + return 1; + } + return 0; +} + +static int luks_image(const void *buf, unsigned long long *blocks) +{ + const struct luks_partition_header *lph = + (const struct luks_partition_header *)buf; + + if (!memcmp(lph->magic, LUKS_MAGIC, LUKS_MAGIC_L)) { + /* FSSIZE is dictated by the underlying fs, not by LUKS */ + *blocks = 0; + return 1; + } + return 0; +} + +static int swap_image(const void *buf, unsigned long long *blocks) +{ + const struct swap_super_block *ssb = + (const struct swap_super_block *)buf; + + if (!memcmp(ssb->magic, SWAP_MAGIC_1, SWAP_MAGIC_L) || + !memcmp(ssb->magic, SWAP_MAGIC_2, SWAP_MAGIC_L)) { + *blocks = 0; + return 1; + } + return 0; +} + +static int lvm2_image(const void *buf, unsigned long long *blocks) +{ + const struct lvm2_super_block *lsb; + int i; + + /* We must check every 512 byte sector */ + for (i = 0; i < BLOCK_SIZE; i += 0x200) { + lsb = (const struct lvm2_super_block *)(buf + i); + + if (!memcmp(lsb->magic, LVM2_MAGIC, LVM2_MAGIC_L) && + !memcmp(lsb->type, LVM2_TYPE, LVM2_TYPE_L)) { + /* This is just one of possibly many PV's */ + *blocks = 0; + return 1; + } + } + + return 0; +} + +struct imagetype { + off_t block; + const char name[12]; + int (*identify) (const void *, unsigned long long *); +}; + +/* + * Note: + * + * Minix test needs to come after ext3/ext2, since it's possible for + * ext3/ext2 to look like minix by pure random chance. + * + * LVM comes after all other filesystems since it's possible + * that an old lvm signature is left on the disk if pvremove + * is not used before creating the new fs. + */ +static struct imagetype images[] = { + {0, "gzip", gzip_image}, + {0, "cramfs", cramfs_image}, + {0, "romfs", romfs_image}, + {0, "xfs", xfs_image}, + {0, "luks", luks_image}, + {1, "ext3", ext3_image}, + {1, "ext2", ext2_image}, + {1, "minix", minix_image}, + {8, "reiserfs", reiserfs_image}, + {64, "reiserfs", reiserfs_image}, + {32, "jfs", jfs_image}, + {0, "lvm2", lvm2_image}, + {1, "lvm2", lvm2_image}, + {-1, "swap", swap_image}, + {0, "", NULL} +}; + +int identify_fs(int fd, const char **fstype, + unsigned long long *bytes, off_t offset) +{ + uint64_t buf[BLOCK_SIZE >> 3]; /* 64-bit worst case alignment */ + off_t cur_block = (off_t) - 1; + struct imagetype *ip; + int ret; + unsigned long long dummy; + + if (!bytes) + bytes = &dummy; + + *fstype = NULL; + *bytes = 0; + + for (ip = images; ip->identify; ip++) { + /* Hack for swap, which apparently is dependent on page size */ + if (ip->block == -1) + ip->block = SWAP_OFFSET(); + + if (cur_block != ip->block) { + /* + * Read block. + */ + cur_block = ip->block; + ret = pread(fd, buf, BLOCK_SIZE, + offset + cur_block * BLOCK_SIZE); + if (ret != BLOCK_SIZE) + return -1; /* error */ + } + + if (ip->identify(buf, bytes)) { + *fstype = ip->name; + return 0; + } + } + + return 1; /* Unknown filesystem */ +} diff --git a/usr/kinit/fstype/fstype.h b/usr/kinit/fstype/fstype.h new file mode 100644 index 0000000..b966aac --- /dev/null +++ b/usr/kinit/fstype/fstype.h @@ -0,0 +1,23 @@ +/* + * fstype.c, by rmk + * + * Detect filesystem type (on stdin) and output strings for two + * environment variables: + * FSTYPE - filesystem type + * FSSIZE - filesystem size (if known) + * + * We currently detect (in order): + * gzip, cramfs, romfs, xfs, minix, ext3, ext2, reiserfs, jfs + * + * MINIX, ext3 and Reiserfs bits are currently untested. + */ + +#ifndef FSTYPE_H +#define FSTYPE_H + +#include + +int identify_fs(int fd, const char **fstype, + unsigned long long *bytes, off_t offset); + +#endif diff --git a/usr/kinit/fstype/jfs_superblock.h b/usr/kinit/fstype/jfs_superblock.h new file mode 100644 index 0000000..63132a0 --- /dev/null +++ b/usr/kinit/fstype/jfs_superblock.h @@ -0,0 +1,114 @@ +/* + * Copyright (C) International Business Machines Corp., 2000-2003 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _H_JFS_SUPERBLOCK +#define _H_JFS_SUPERBLOCK + +struct timestruc_t { + __le32 tv_sec; + __le32 tv_nsec; +}; + +/* + * make the magic number something a human could read + */ +#define JFS_MAGIC "JFS1" /* Magic word */ + +#define JFS_VERSION 2 /* Version number: Version 2 */ + +#define LV_NAME_SIZE 11 /* MUST BE 11 for OS/2 boot sector */ + +/* + * aggregate superblock + * + * The name superblock is too close to super_block, so the name has been + * changed to jfs_superblock. The utilities are still using the old name. + */ +struct jfs_superblock { + char s_magic[4]; /* 4: magic number */ + __le32 s_version; /* 4: version number */ + + __le64 s_size; /* 8: aggregate size in hardware/LVM blocks; + * VFS: number of blocks + */ + __le32 s_bsize; /* 4: aggregate block size in bytes; + * VFS: fragment size + */ + __le16 s_l2bsize; /* 2: log2 of s_bsize */ + __le16 s_l2bfactor; /* 2: log2(s_bsize/hardware block size) */ + __le32 s_pbsize; /* 4: hardware/LVM block size in bytes */ + __le16 s_l2pbsize; /* 2: log2 of s_pbsize */ + __le16 pad; /* 2: padding necessary for alignment */ + + __le32 s_agsize; /* 4: allocation group size in aggr. blocks */ + + __le32 s_flag; /* 4: aggregate attributes: + * see jfs_filsys.h + */ + __le32 s_state; /* 4: mount/unmount/recovery state: + * see jfs_filsys.h + */ + __le32 s_compress; /* 4: > 0 if data compression */ + + __le64 s_ait2; /* 8: first extent of secondary + * aggregate inode table + */ + + __le64 s_aim2; /* 8: first extent of secondary + * aggregate inode map + */ + __le32 s_logdev; /* 4: device address of log */ + __le32 s_logserial; /* 4: log serial number at aggregate mount */ + __le64 s_logpxd; /* 8: inline log extent */ + + __le64 s_fsckpxd; /* 8: inline fsck work space extent */ + + struct timestruc_t s_time; /* 8: time last updated */ + + __le32 s_fsckloglen; /* 4: Number of filesystem blocks reserved for + * the fsck service log. + * N.B. These blocks are divided among the + * versions kept. This is not a per + * version size. + * N.B. These blocks are included in the + * length field of s_fsckpxd. + */ + char s_fscklog; /* 1: which fsck service log is most recent + * 0 => no service log data yet + * 1 => the first one + * 2 => the 2nd one + */ + char s_fpack[11]; /* 11: file system volume name + * N.B. This must be 11 bytes to + * conform with the OS/2 BootSector + * requirements + * Only used when s_version is 1 + */ + + /* extendfs() parameter under s_state & FM_EXTENDFS */ + __le64 s_xsize; /* 8: extendfs s_size */ + __le64 s_xfsckpxd; /* 8: extendfs fsckpxd */ + __le64 s_xlogpxd; /* 8: extendfs logpxd */ + /* - 128 byte boundary - */ + + char s_uuid[16]; /* 16: 128-bit uuid for volume */ + char s_label[16]; /* 16: volume label */ + char s_loguuid[16]; /* 16: 128-bit uuid for log device */ + +}; + +#endif /*_H_JFS_SUPERBLOCK */ diff --git a/usr/kinit/fstype/luks_fs.h b/usr/kinit/fstype/luks_fs.h new file mode 100644 index 0000000..fd8de31 --- /dev/null +++ b/usr/kinit/fstype/luks_fs.h @@ -0,0 +1,44 @@ +#ifndef __LINUX_LUKS_FS_H +#define __LINUX_LUKS_FS_H + +/* The basic structures of the luks partition header */ +#define LUKS_MAGIC_L 6 +#define LUKS_CIPHERNAME_L 32 +#define LUKS_CIPHERMODE_L 32 +#define LUKS_HASHSPEC_L 32 +#define LUKS_UUID_STRING_L 40 + +#define LUKS_MAGIC "LUKS\xBA\xBE" +#define LUKS_DIGESTSIZE 20 +#define LUKS_SALTSIZE 32 +#define LUKS_NUMKEYS 8 +#define LUKS_MKD_ITER 10 +#define LUKS_KEY_DISABLED 0x0000DEAD +#define LUKS_KEY_ENABLED 0x00AC71F3 +#define LUKS_STRIPES 4000 + +/* On-disk "super block" */ +struct luks_partition_header { + char magic[LUKS_MAGIC_L]; + __be16 version; + char cipherName[LUKS_CIPHERNAME_L]; + char cipherMode[LUKS_CIPHERMODE_L]; + char hashSpec[LUKS_HASHSPEC_L]; + __be32 payloadOffset; + __be32 keyBytes; + char mkDigest[LUKS_DIGESTSIZE]; + char mkDigestSalt[LUKS_SALTSIZE]; + __be32 mkDigestIterations; + char uuid[LUKS_UUID_STRING_L]; + + struct { + __be32 active; + /* Parameters for PBKDF2 processing */ + __be32 passwordIterations; + char passwordSalt[LUKS_SALTSIZE]; + __be32 keyMaterialOffset; + __be32 stripes; + } keyblock[LUKS_NUMKEYS]; +}; + +#endif diff --git a/usr/kinit/fstype/lvm2_sb.h b/usr/kinit/fstype/lvm2_sb.h new file mode 100644 index 0000000..75dfc10 --- /dev/null +++ b/usr/kinit/fstype/lvm2_sb.h @@ -0,0 +1,18 @@ +#ifndef __LVM2_SB_H +#define __LVM2_SB_H + +/* LVM2 super block definitions */ +#define LVM2_MAGIC_L 8 +#define LVM2_MAGIC "LABELONE" +#define LVM2_TYPE_L 8 +#define LVM2_TYPE "LVM2 001" + +struct lvm2_super_block { + char magic[LVM2_MAGIC_L]; + __be64 sector; + __be32 crc; + __be32 offset; + char type[LVM2_TYPE_L]; +}; + +#endif diff --git a/usr/kinit/fstype/main.c b/usr/kinit/fstype/main.c new file mode 100644 index 0000000..83dbcc5 --- /dev/null +++ b/usr/kinit/fstype/main.c @@ -0,0 +1,58 @@ +/* + * fstype.c, by rmk + * + * Detect filesystem type (on stdin) and output strings for two + * environment variables: + * FSTYPE - filesystem type + * FSSIZE - filesystem size (if known) + * + * We currently detect (in order): + * gzip, cramfs, romfs, xfs, minix, ext3, ext2, reiserfs, jfs + * + * MINIX, ext3 and Reiserfs bits are currently untested. + */ + +#include +#include +#include +#include +#include +#include +#include "fstype.h" + +char *progname; + +int main(int argc, char *argv[]) +{ + int fd = 0; + int rv; + const char *fstype; + const char *file = "stdin"; + unsigned long long bytes; + + progname = argv[0]; + + if (argc > 2) { + fprintf(stderr, "Usage: %s [file]\n", progname); + return 1; + } + + if (argc > 1 && argv[1][0] == '-' && argv[1][1] == '\0') { + fd = open(file = argv[1], O_RDONLY); + if (fd < 0) { + perror(argv[1]); + return 2; + } + } + + rv = identify_fs(fd, &fstype, &bytes, 0); + if (rv == -1) { + perror(file); + return 2; + } + + fstype = fstype ? fstype : "unknown"; + + fprintf(stdout, "FSTYPE=%s\nFSSIZE=%llu\n", fstype, bytes); + return rv; +} diff --git a/usr/kinit/fstype/minix_fs.h b/usr/kinit/fstype/minix_fs.h new file mode 100644 index 0000000..e2899f0 --- /dev/null +++ b/usr/kinit/fstype/minix_fs.h @@ -0,0 +1,85 @@ +#ifndef _LINUX_MINIX_FS_H +#define _LINUX_MINIX_FS_H + +/* + * The minix filesystem constants/structures + */ + +/* + * Thanks to Kees J Bot for sending me the definitions of the new + * minix filesystem (aka V2) with bigger inodes and 32-bit block + * pointers. + */ + +#define MINIX_ROOT_INO 1 + +/* Not the same as the bogus LINK_MAX in . Oh well. */ +#define MINIX_LINK_MAX 250 +#define MINIX2_LINK_MAX 65530 + +#define MINIX_I_MAP_SLOTS 8 +#define MINIX_Z_MAP_SLOTS 64 +#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */ +#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */ +#define MINIX2_SUPER_MAGIC 0x2468 /* minix V2 fs */ +#define MINIX2_SUPER_MAGIC2 0x2478 /* minix V2 fs, 30 char names */ +#define MINIX_VALID_FS 0x0001 /* Clean fs. */ +#define MINIX_ERROR_FS 0x0002 /* fs has errors. */ + +#define MINIX_INODES_PER_BLOCK ((BLOCK_SIZE)/(sizeof (struct minix_inode))) +#define MINIX2_INODES_PER_BLOCK ((BLOCK_SIZE)/(sizeof (struct minix2_inode))) + +/* + * This is the original minix inode layout on disk. + * Note the 8-bit gid and atime and ctime. + */ +struct minix_inode { + __u16 i_mode; + __u16 i_uid; + __u32 i_size; + __u32 i_time; + __u8 i_gid; + __u8 i_nlinks; + __u16 i_zone[9]; +}; + +/* + * The new minix inode has all the time entries, as well as + * long block numbers and a third indirect block (7+1+1+1 + * instead of 7+1+1). Also, some previously 8-bit values are + * now 16-bit. The inode is now 64 bytes instead of 32. + */ +struct minix2_inode { + __u16 i_mode; + __u16 i_nlinks; + __u16 i_uid; + __u16 i_gid; + __u32 i_size; + __u32 i_atime; + __u32 i_mtime; + __u32 i_ctime; + __u32 i_zone[10]; +}; + +/* + * minix super-block data on disk + */ +struct minix_super_block { + __u16 s_ninodes; + __u16 s_nzones; + __u16 s_imap_blocks; + __u16 s_zmap_blocks; + __u16 s_firstdatazone; + __u16 s_log_zone_size; + __u32 s_max_size; + __u16 s_magic; + __u16 s_state; + __u32 s_zones; +}; + +struct minix_dir_entry { + __u16 inode; + char name[0]; +}; + +#endif diff --git a/usr/kinit/fstype/reiserfs_fs.h b/usr/kinit/fstype/reiserfs_fs.h new file mode 100644 index 0000000..dd47995 --- /dev/null +++ b/usr/kinit/fstype/reiserfs_fs.h @@ -0,0 +1,69 @@ +struct journal_params { + __u32 jp_journal_1st_block; /* where does journal start from on its + * device */ + __u32 jp_journal_dev; /* journal device st_rdev */ + __u32 jp_journal_size; /* size of the journal */ + __u32 jp_journal_trans_max; /* max number of blocks in a transaction. */ + __u32 jp_journal_magic; /* random value made on fs creation (this + * was sb_journal_block_count) */ + __u32 jp_journal_max_batch; /* max number of blocks to batch into a + * trans */ + __u32 jp_journal_max_commit_age; /* in seconds, how old can an async + * commit be */ + __u32 jp_journal_max_trans_age; /* in seconds, how old can a transaction + * be */ +}; + +/* this is the super from 3.5.X, where X >= 10 */ +struct reiserfs_super_block_v1 { + __u32 s_block_count; /* blocks count */ + __u32 s_free_blocks; /* free blocks count */ + __u32 s_root_block; /* root block number */ + struct journal_params s_journal; + __u16 s_blocksize; /* block size */ + __u16 s_oid_maxsize; /* max size of object id array, see + * get_objectid() commentary */ + __u16 s_oid_cursize; /* current size of object id array */ + __u16 s_umount_state; /* this is set to 1 when filesystem was + * umounted, to 2 - when not */ + char s_magic[10]; /* reiserfs magic string indicates that + * file system is reiserfs: + * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */ + __u16 s_fs_state; /* it is set to used by fsck to mark which + * phase of rebuilding is done */ + __u32 s_hash_function_code; /* indicate, what hash function is being use + * to sort names in a directory*/ + __u16 s_tree_height; /* height of disk tree */ + __u16 s_bmap_nr; /* amount of bitmap blocks needed to address + * each block of file system */ + __u16 s_version; /* this field is only reliable on filesystem + * with non-standard journal */ + __u16 s_reserved_for_journal; /* size in blocks of journal area on main + * device, we need to keep after + * making fs with non-standard journal */ +} __attribute__ ((__packed__)); + +/* this is the on disk super block */ +struct reiserfs_super_block { + struct reiserfs_super_block_v1 s_v1; + __u32 s_inode_generation; + __u32 s_flags; /* Right now used only by inode-attributes, if enabled */ + unsigned char s_uuid[16]; /* filesystem unique identifier */ + unsigned char s_label[16]; /* filesystem volume label */ + char s_unused[88]; /* zero filled by mkreiserfs and + * reiserfs_convert_objectid_map_v1() + * so any additions must be updated + * there as well. */ +} __attribute__ ((__packed__)); + +#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" +#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" +#define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" + +#define SB_V1_DISK_SUPER_BLOCK(s) (&((s)->s_v1)) +#define REISERFS_BLOCKSIZE(s) \ + __le32_to_cpu((SB_V1_DISK_SUPER_BLOCK(s)->s_blocksize)) +#define REISERFS_BLOCK_COUNT(s) \ + __le32_to_cpu((SB_V1_DISK_SUPER_BLOCK(s)->s_block_count)) +#define REISERFS_MAGIC(s) \ + (SB_V1_DISK_SUPER_BLOCK(s)->s_magic) diff --git a/usr/kinit/fstype/romfs_fs.h b/usr/kinit/fstype/romfs_fs.h new file mode 100644 index 0000000..c490fbc --- /dev/null +++ b/usr/kinit/fstype/romfs_fs.h @@ -0,0 +1,56 @@ +#ifndef __LINUX_ROMFS_FS_H +#define __LINUX_ROMFS_FS_H + +/* The basic structures of the romfs filesystem */ + +#define ROMBSIZE BLOCK_SIZE +#define ROMBSBITS BLOCK_SIZE_BITS +#define ROMBMASK (ROMBSIZE-1) +#define ROMFS_MAGIC 0x7275 + +#define ROMFS_MAXFN 128 + +#define __mkw(h,l) (((h)&0x00ff)<< 8|((l)&0x00ff)) +#define __mkl(h,l) (((h)&0xffff)<<16|((l)&0xffff)) +#define __mk4(a,b,c,d) cpu_to_be32(__mkl(__mkw(a,b),__mkw(c,d))) +#define ROMSB_WORD0 __mk4('-','r','o','m') +#define ROMSB_WORD1 __mk4('1','f','s','-') + +/* On-disk "super block" */ + +struct romfs_super_block { + __be32 word0; + __be32 word1; + __be32 size; + __be32 checksum; + char name[0]; /* volume name */ +}; + +/* On disk inode */ + +struct romfs_inode { + __be32 next; /* low 4 bits see ROMFH_ */ + __be32 spec; + __be32 size; + __be32 checksum; + char name[0]; +}; + +#define ROMFH_TYPE 7 +#define ROMFH_HRD 0 +#define ROMFH_DIR 1 +#define ROMFH_REG 2 +#define ROMFH_SYM 3 +#define ROMFH_BLK 4 +#define ROMFH_CHR 5 +#define ROMFH_SCK 6 +#define ROMFH_FIF 7 +#define ROMFH_EXEC 8 + +/* Alignment */ + +#define ROMFH_SIZE 16 +#define ROMFH_PAD (ROMFH_SIZE-1) +#define ROMFH_MASK (~ROMFH_PAD) + +#endif diff --git a/usr/kinit/fstype/swap_fs.h b/usr/kinit/fstype/swap_fs.h new file mode 100644 index 0000000..33431df --- /dev/null +++ b/usr/kinit/fstype/swap_fs.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_SWAP_FS_H +#define __LINUX_SWAP_FS_H + +/* The basic structures of the swap super block */ +#define SWAP_MAGIC_L 10 +#define SWAP_RESERVED_L (1024 - SWAP_MAGIC_L) +#define SWAP_MAGIC_1 "SWAP-SPACE" +#define SWAP_MAGIC_2 "SWAPSPACE2" +/* The superblock is the last block in the first page */ +#define SWAP_OFFSET() ((getpagesize() - 1024) >> 10) + +/* On-disk "super block" */ +struct swap_super_block { + char reserved[SWAP_RESERVED_L]; + char magic[SWAP_MAGIC_L]; +}; + +#endif diff --git a/usr/kinit/fstype/xfs_sb.h b/usr/kinit/fstype/xfs_sb.h new file mode 100644 index 0000000..ac10ee9 --- /dev/null +++ b/usr/kinit/fstype/xfs_sb.h @@ -0,0 +1,16 @@ +/* + * Super block + * Fits into a sector-sized buffer at address 0 of each allocation group. + * Only the first of these is ever updated except during growfs. + */ + +struct xfs_buf; +struct xfs_mount; + +#define XFS_SB_MAGIC 0x58465342 /* 'XFSB' */ + +typedef struct xfs_sb { + __u32 sb_magicnum; /* magic number == XFS_SB_MAGIC */ + __u32 sb_blocksize; /* logical block size, bytes */ + __u64 sb_dblocks; /* number of data blocks */ +} xfs_sb_t; diff --git a/usr/kinit/getarg.c b/usr/kinit/getarg.c new file mode 100644 index 0000000..fcce247 --- /dev/null +++ b/usr/kinit/getarg.c @@ -0,0 +1,57 @@ +#include +#include "kinit.h" + +/* + * Routines that hunt for a specific argument. Please note that + * they actually search the array backwards. That is because on the + * kernel command lines, it's legal to override an earlier argument + * with a later argument. + */ + +/* + * Was this boolean argument passed? If so return the index in the + * argv array for it. For conflicting boolean options, use the + * one with the higher index. The only case when the return value + * can be equal, is when they're both zero; so equality can be used + * as the default option choice. + * + * In other words, if two options "a" and "b" are opposites, and "a" + * is the default, this can be coded as: + * + * if (get_flag(argc,argv,"a") >= get_flag(argc,argv,"b")) + * do_a_stuff(); + * else + * do_b_stuff(); + */ +int get_flag(int argc, char *argv[], const char *name) +{ + int i; + + for (i = argc-1; i > 0; i--) { + if (!strcmp(argv[i], name)) + return i; + } + return 0; +} + +/* + * Was this textual parameter (foo=option) passed? + * + * This returns the latest instance of such an option in the argv array. + */ +char *get_arg(int argc, char *argv[], const char *name) +{ + int len = strlen(name); + char *ret = NULL; + int i; + + for (i = argc-1; i > 0; i--) { + if (argv[i] && strncmp(argv[i], name, len) == 0 && + (argv[i][len] != '\0')) { + ret = argv[i] + len; + break; + } + } + + return ret; +} diff --git a/usr/kinit/getintfile.c b/usr/kinit/getintfile.c new file mode 100644 index 0000000..172c762 --- /dev/null +++ b/usr/kinit/getintfile.c @@ -0,0 +1,30 @@ +/* + * getintfile.c + * + * Open a file and read it, assuming it contains a single long value. + * Return 0 if we read a valid value, otherwise -1. + */ + +#include +#include + +int getintfile(const char *path, long *val) +{ + char buffer[64]; + char *ep; + FILE *f; + + f = fopen(path, "r"); + if (!f) + return -1; + + ep = buffer + fread(buffer, 1, sizeof buffer - 1, f); + fclose(f); + *ep = '\0'; + + *val = strtol(buffer, &ep, 0); + if (*ep && *ep != '\n') + return -1; + else + return 0; +} diff --git a/usr/kinit/initrd.c b/usr/kinit/initrd.c new file mode 100644 index 0000000..1371333 --- /dev/null +++ b/usr/kinit/initrd.c @@ -0,0 +1,199 @@ +/* + * kinit/initrd.c + * + * Handle initrd, thus putting the backwards into backwards compatible + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "do_mounts.h" +#include "kinit.h" +#include "xpio.h" + +#define BUF_SIZE 65536 /* Should be a power of 2 */ + +/* + * Copy the initrd to /dev/ram0, copy from the end to the beginning + * to avoid taking 2x the memory. + */ +static int rd_copy_uncompressed(int ffd, int dfd) +{ + char buffer[BUF_SIZE]; + off_t bytes; + struct stat st; + + DEBUG(("kinit: uncompressed initrd\n")); + + if (ffd < 0 || fstat(ffd, &st) || !S_ISREG(st.st_mode) || + (bytes = st.st_size) == 0) + return -1; + + while (bytes) { + ssize_t blocksize = ((bytes - 1) & (BUF_SIZE - 1)) + 1; + off_t offset = bytes - blocksize; + + DEBUG(("kinit: copying %zd bytes at offset %llu\n", + blocksize, offset)); + + if (xpread(ffd, buffer, blocksize, offset) != blocksize || + xpwrite(dfd, buffer, blocksize, offset) != blocksize) + return -1; + + ftruncate(ffd, offset); /* Free up memory */ + bytes = offset; + } + return 0; +} + +static int rd_copy_image(const char *path) +{ + int ffd = open(path, O_RDONLY); + int rv = -1; + unsigned char gzip_magic[2]; + + if (ffd < 0) + goto barf; + + if (xpread(ffd, gzip_magic, 2, 0) == 2 && + gzip_magic[0] == 037 && gzip_magic[1] == 0213) { + FILE *wfd = fopen("/dev/ram0", "w"); + if (!wfd) + goto barf; + rv = load_ramdisk_compressed(path, wfd, 0); + fclose(wfd); + } else { + int dfd = open("/dev/ram0", O_WRONLY); + if (dfd < 0) + goto barf; + rv = rd_copy_uncompressed(ffd, dfd); + close(dfd); + } + +barf: + if (ffd >= 0) + close(ffd); + return rv; +} + +/* + * Run /linuxrc, for emulation of old-style initrd + */ +static int run_linuxrc(int argc, char *argv[], dev_t root_dev) +{ + int root_fd, old_fd; + pid_t pid; + long realroot = Root_RAM0; + const char *ramdisk_name = "/dev/ram0"; + FILE *fp; + + DEBUG(("kinit: mounting initrd\n")); + mkdir("/root", 0700); + if (!mount_block(ramdisk_name, "/root", NULL, MS_VERBOSE, NULL)) + return -errno; + + /* Write the current "real root device" out to procfs */ + DEBUG(("kinit: real_root_dev = %#x\n", root_dev)); + fp = fopen("/proc/sys/kernel/real-root-dev", "w"); + fprintf(fp, "%u", root_dev); + fclose(fp); + + mkdir("/old", 0700); + root_fd = open_cloexec("/", O_RDONLY | O_DIRECTORY, 0); + old_fd = open_cloexec("/old", O_RDONLY | O_DIRECTORY, 0); + + if (root_fd < 0 || old_fd < 0) + return -errno; + + if (chdir("/root") || + mount(".", "/", NULL, MS_MOVE, NULL) || chroot(".")) + return -errno; + + pid = fork(); + if (pid == 0) { + setsid(); + /* Looks like linuxrc doesn't get the init environment + or parameters. Weird, but so is the whole linuxrc bit. */ + execl("/linuxrc", "linuxrc", NULL); + _exit(255); + } else if (pid > 0) { + DEBUG(("kinit: Waiting for linuxrc to complete...\n")); + while (waitpid(pid, NULL, 0) != pid) ; + DEBUG(("kinit: linuxrc done\n")); + } else { + return -errno; + } + + if (fchdir(old_fd) || + mount("/", ".", NULL, MS_MOVE, NULL) || + fchdir(root_fd) || chroot(".")) + return -errno; + + close(root_fd); + close(old_fd); + + getintfile("/proc/sys/kernel/real-root-dev", &realroot); + + /* If realroot is Root_RAM0, then the initrd did any necessary work */ + if (realroot == Root_RAM0) { + if (mount("/old", "/root", NULL, MS_MOVE, NULL)) + return -errno; + } else { + mount_root(argc, argv, (dev_t) realroot, NULL); + + /* If /root/initrd exists, move the initrd there, otherwise discard */ + if (!mount("/old", "/root/initrd", NULL, MS_MOVE, NULL)) { + /* We're good */ + } else { + int olddev = open(ramdisk_name, O_RDWR); + umount2("/old", MNT_DETACH); + if (olddev < 0 || + ioctl(olddev, BLKFLSBUF, (long)0) || + close(olddev)) { + fprintf(stderr, + "%s: Cannot flush initrd contents\n", + progname); + } + } + } + + rmdir("/old"); + return 0; +} + +int initrd_load(int argc, char *argv[], dev_t root_dev) +{ + if (access("/initrd.image", R_OK)) + return 0; /* No initrd */ + + DEBUG(("kinit: initrd found\n")); + + create_dev("/dev/ram0", Root_RAM0); + + if (rd_copy_image("/initrd.image") || unlink("/initrd.image")) { + fprintf(stderr, "%s: initrd installation failed (too big?)\n", + progname); + return 0; /* Failed to copy initrd */ + } + + DEBUG(("kinit: initrd copied\n")); + + if (root_dev != Root_RAM0) { + int err; + DEBUG(("kinit: running linuxrc\n")); + err = run_linuxrc(argc, argv, root_dev); + if (err) + fprintf(stderr, "%s: running linuxrc: %s\n", progname, + strerror(-err)); + return 1; /* initrd is root, or run_linuxrc took care of it */ + } else { + DEBUG(("kinit: permament (or pivoting) initrd, not running linuxrc\n")); + return 0; /* Mounting initrd as ordinary root */ + } +} diff --git a/usr/kinit/ipconfig/Kbuild b/usr/kinit/ipconfig/Kbuild new file mode 100644 index 0000000..7f8d181 --- /dev/null +++ b/usr/kinit/ipconfig/Kbuild @@ -0,0 +1,31 @@ +# +# Kbuild file for ipconfig +# + +static-y := static/ipconfig +shared-y := shared/ipconfig + +# common .o files +objs := main.o netdev.o packet.o +# dhcp +objs += dhcp_proto.o +# bootp +objs += bootp_proto.o + + +# TODO - do we want a stripped version +# TODO - do we want the static.g + shared.g directories? + + +# Create built-in.o with all object files (used by kinit) +lib-y := $(objs) + +# .o files used to built executables +static/ipconfig-y := $(objs) +shared/ipconfig-y := $(objs) + +# Cleaning +clean-dirs := static shared + +# install binary +install-y := $(shared-y) diff --git a/usr/kinit/ipconfig/README b/usr/kinit/ipconfig/README new file mode 100644 index 0000000..46c03fa --- /dev/null +++ b/usr/kinit/ipconfig/README @@ -0,0 +1,103 @@ +BOOTP/DHCP client for klibc +--------------------------- + +Usage: + +ipconfig [-c proto] [-d interface] [-n] [-p port] [-t timeout] [interface ...] + +-c proto Use PROTO as the configuration protocol for all + interfaces, unless overridden by specific interfaces. +-d interface Either the name of an interface, or a long spec. +-n Do nothing - just print the configuration that would + be performed. +-p port Send bootp/dhcp broadcasts from PORT, to PORT - 1. +-t timeout Give up on all unconfigured interfaces after TIMEOUT secs. + +You can configure multiple interfaces by passing multiple interface +specs on the command line, or by using the special interface name +"all". If you're autoconfiguring any interfaces, ipconfig will wait +until either all such interfaces have been configured, or the timeout +passes. + +PROTO can be one of the following, which selects the autoconfiguration +protocol to use: + +not specified use all protocols (the default) +dhcp use bootp and dhcp +bootp use bootp only +rarp use rarp (not currently supported) +none no autoconfiguration - either static config, or none at all + +An interface spec can be either short form, which is just the name of +an interface (eth0 or whatever), or long form. The long form consists +of up to seven elements, separated by colons: + +:::::: + + IP address of the client. If empty, the address will + either be determined by RARP/BOOTP/DHCP. What protocol + is used de- pends on the parameter. If this + parameter is not empty, autoconf will be used. + + IP address of the NFS server. If RARP is used to + determine the client address and this parameter is NOT + empty only replies from the specified server are + accepted. To use different RARP and NFS server, + specify your RARP server here (or leave it blank), and + specify your NFS server in the `nfsroot' parameter + (see above). If this entry is blank the address of the + server is used which answered the RARP/BOOTP/DHCP + request. + + IP address of a gateway if the server is on a different + subnet. If this entry is empty no gateway is used and the + server is assumed to be on the local network, unless a + value has been received by BOOTP/DHCP. + + Netmask for local network interface. If this is empty, + the netmask is derived from the client IP address assuming + classful addressing, unless overridden in BOOTP/DHCP reply. + + Name of the client. If empty, the client IP address is + used in ASCII notation, or the value received by + BOOTP/DHCP. + + Name of network device to use. If this is empty, all + devices are used for RARP/BOOTP/DHCP requests, and the + first one we receive a reply on is configured. If you + have only one device, you can safely leave this blank. + + Method to use for autoconfiguration. If this is either + 'rarp', 'bootp', or 'dhcp' the specified protocol is + used. If the value is 'both', 'all' or empty, all + protocols are used. 'off', 'static' or 'none' means + no autoconfiguration. + +IP addresses and netmasks must be either absent (defaulting to zero) +or presented in dotted-quad notation. + +An interface spec can be prefixed with either "ip=", "nfsaddrs=", both +of which are ignored. These (along with the ugliness of the long +form) are present for compatibility with the in-kernel ipconfig code +from 2.4 and earlier kernels. + +Here are a few examples of valid ipconfig command lines. + +Enable the loopback interface: + ipconfig 127.0.0.1:::::lo:none + +Try to configure eth0 using bootp for up to 30 seconds: + ipconfig -t 30 -c bootp eth0 + +Configure eth0 and eth1 using dhcp or bootp, and eth2 statically: + ipconfig -c any eth0 eth1 192.168.1.1:::::eth2:none + +-- + +From Russell's original README, and still true: + +The code in main.c is yucky imho. Needs cleaning. + +-- +Russell King (2002/10/22) +Bryan O'Sullivan (2003/04/29) diff --git a/usr/kinit/ipconfig/bootp_packet.h b/usr/kinit/ipconfig/bootp_packet.h new file mode 100644 index 0000000..1328c5c --- /dev/null +++ b/usr/kinit/ipconfig/bootp_packet.h @@ -0,0 +1,34 @@ +/* + * ipconfig/bootp_packet.h + */ +#ifndef BOOTP_PACKET_H +#define BOOTP_PACKET_H + +#include + +struct netdev; + +/* packet ops */ +#define BOOTP_REQUEST 1 +#define BOOTP_REPLY 2 + +/* your basic bootp packet */ +struct bootp_hdr { + uint8_t op; + uint8_t htype; + uint8_t hlen; + uint8_t hops; + uint32_t xid; + uint16_t secs; + uint16_t flags; + uint32_t ciaddr; + uint32_t yiaddr; + uint32_t siaddr; + uint32_t giaddr; + uint8_t chaddr[16]; + char server_name[64]; + char boot_file[128]; + /* 312 bytes of extensions */ +}; + +#endif diff --git a/usr/kinit/ipconfig/bootp_proto.c b/usr/kinit/ipconfig/bootp_proto.c new file mode 100644 index 0000000..137847c --- /dev/null +++ b/usr/kinit/ipconfig/bootp_proto.c @@ -0,0 +1,213 @@ +/* + * ipconfig/bootp_proto.c + * + * BOOTP packet protocol handling. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ipconfig.h" +#include "netdev.h" +#include "bootp_packet.h" +#include "bootp_proto.h" +#include "packet.h" + +static uint8_t bootp_options[312] = { + [ 0] = 99, 130, 83, 99,/* RFC1048 magic cookie */ + [ 4] = 1, 4, /* 4- 9 subnet mask */ + [ 10] = 3, 4, /* 10- 15 default gateway */ + [ 16] = 5, 8, /* 16- 25 nameserver */ + [ 26] = 12, 32, /* 26- 59 host name */ + [ 60] = 40, 32, /* 60- 95 nis domain name */ + [ 96] = 17, 40, /* 96-137 boot path */ + [138] = 57, 2, 1, 150, /* 138-141 extension buffer */ + [142] = 255, /* end of list */ +}; + +/* + * Send a plain bootp request packet with options + */ +int bootp_send_request(struct netdev *dev) +{ + struct bootp_hdr bootp; + struct iovec iov[] = { + /* [0] = ip + udp headers */ + [1] = {&bootp, sizeof(bootp)}, + [2] = {bootp_options, 312} + }; + + memset(&bootp, 0, sizeof(struct bootp_hdr)); + + bootp.op = BOOTP_REQUEST, bootp.htype = dev->hwtype; + bootp.hlen = dev->hwlen; + bootp.xid = dev->bootp.xid; + bootp.ciaddr = dev->ip_addr; + bootp.secs = htons(time(NULL) - dev->open_time); + memcpy(bootp.chaddr, dev->hwaddr, 16); + + DEBUG(("-> bootp xid 0x%08x secs 0x%08x ", + bootp.xid, ntohs(bootp.secs))); + + return packet_send(dev, iov, 2); +} + +/* + * Parse a bootp reply packet + */ +int bootp_parse(struct netdev *dev, struct bootp_hdr *hdr, + uint8_t *exts, int extlen) +{ + dev->bootp.gateway = hdr->giaddr; + dev->ip_addr = hdr->yiaddr; + dev->ip_server = hdr->siaddr; + dev->ip_netmask = INADDR_ANY; + dev->ip_broadcast = INADDR_ANY; + dev->ip_gateway = hdr->giaddr; + dev->ip_nameserver[0] = INADDR_ANY; + dev->ip_nameserver[1] = INADDR_ANY; + dev->hostname[0] = '\0'; + dev->nisdomainname[0] = '\0'; + dev->bootpath[0] = '\0'; + + if (extlen >= 4 && exts[0] == 99 && exts[1] == 130 && + exts[2] == 83 && exts[3] == 99) { + uint8_t *ext; + + for (ext = exts + 4; ext - exts < extlen;) { + int len; + uint8_t opt = *ext++; + + if (opt == 0) + continue; + else if (opt == 255) + break; + + len = *ext++; + + switch (opt) { + case 1: /* subnet mask */ + if (len == 4) + memcpy(&dev->ip_netmask, ext, 4); + break; + case 3: /* default gateway */ + if (len >= 4) + memcpy(&dev->ip_gateway, ext, 4); + break; + case 6: /* DNS server */ + if (len >= 4) + memcpy(&dev->ip_nameserver, ext, + len >= 8 ? 8 : 4); + break; + case 12: /* host name */ + if (len > sizeof(dev->hostname) - 1) + len = sizeof(dev->hostname) - 1; + memcpy(&dev->hostname, ext, len); + dev->hostname[len] = '\0'; + break; + case 15: /* domain name */ + if (len > sizeof(dev->dnsdomainname) - 1) + len = sizeof(dev->dnsdomainname) - 1; + memcpy(&dev->dnsdomainname, ext, len); + dev->dnsdomainname[len] = '\0'; + break; + case 17: /* root path */ + if (len > sizeof(dev->bootpath) - 1) + len = sizeof(dev->bootpath) - 1; + memcpy(&dev->bootpath, ext, len); + dev->bootpath[len] = '\0'; + break; + case 26: /* interface MTU */ + if (len == 2) + dev->mtu = (ext[0] << 8) + ext[1]; + break; + case 28: /* broadcast addr */ + if (len == 4) + memcpy(&dev->ip_broadcast, ext, 4); + break; + case 40: /* NIS domain name */ + if (len > sizeof(dev->nisdomainname) - 1) + len = sizeof(dev->nisdomainname) - 1; + memcpy(&dev->nisdomainname, ext, len); + dev->nisdomainname[len] = '\0'; + break; + case 54: /* server identifier */ + if (len == 4 && !dev->ip_server) + memcpy(&dev->ip_server, ext, 4); + break; + } + + ext += len; + } + } + + /* + * Got packet. + */ + return 1; +} + +/* + * Receive a bootp reply and parse packet + */ +int bootp_recv_reply(struct netdev *dev) +{ + struct bootp_hdr bootp; + uint8_t bootp_options[312]; + struct iovec iov[] = { + /* [0] = ip + udp headers */ + [1] = {&bootp, sizeof(struct bootp_hdr)}, + [2] = {bootp_options, 312} + }; + int ret; + + ret = packet_recv(iov, 3); + if (ret <= 0) + return ret; + + if (ret < sizeof(struct bootp_hdr) || + bootp.op != BOOTP_REPLY || /* RFC951 7.5 */ + bootp.xid != dev->bootp.xid || + memcmp(bootp.chaddr, dev->hwaddr, 16)) + return 0; + + ret -= sizeof(struct bootp_hdr); + + return bootp_parse(dev, &bootp, bootp_options, ret); +} + +/* + * Initialise interface for bootp. + */ +int bootp_init_if(struct netdev *dev) +{ + short flags; + + /* + * Get the device flags + */ + if (netdev_getflags(dev, &flags)) + return -1; + + /* + * We can't do DHCP nor BOOTP if this device + * doesn't support broadcast. + */ + if (dev->mtu < 364 || (flags & IFF_BROADCAST) == 0) { + dev->caps &= ~(CAP_BOOTP | CAP_DHCP); + return 0; + } + + /* + * Get a random XID + */ + dev->bootp.xid = (uint32_t) lrand48(); + dev->open_time = time(NULL); + + return 0; +} diff --git a/usr/kinit/ipconfig/bootp_proto.h b/usr/kinit/ipconfig/bootp_proto.h new file mode 100644 index 0000000..71ed09e --- /dev/null +++ b/usr/kinit/ipconfig/bootp_proto.h @@ -0,0 +1,8 @@ +/* + * bootp_proto.h + */ +int bootp_send_request(struct netdev *dev); +int bootp_recv_reply(struct netdev *dev); +int bootp_parse(struct netdev *dev, struct bootp_hdr *hdr, uint8_t * exts, + int extlen); +int bootp_init_if(struct netdev *dev); diff --git a/usr/kinit/ipconfig/dhcp_proto.c b/usr/kinit/ipconfig/dhcp_proto.c new file mode 100644 index 0000000..9a30660 --- /dev/null +++ b/usr/kinit/ipconfig/dhcp_proto.c @@ -0,0 +1,212 @@ +/* + * ipconfig/dhcp_proto.c + * + * DHCP RFC 2131 and 2132 + */ +#include +#include +#include +#include +#include +#include + +#include "ipconfig.h" +#include "netdev.h" +#include "bootp_packet.h" +#include "bootp_proto.h" +#include "dhcp_proto.h" +#include "packet.h" + +static uint8_t dhcp_params[] = { + 1, /* subnet mask */ + 3, /* default gateway */ + 6, /* DNS server */ + 12, /* host name */ + 15, /* domain name */ + 17, /* root path */ + 26, /* interface mtu */ + 28, /* broadcast addr */ + 40, /* NIS domain name (why?) */ +}; + +static uint8_t dhcp_discover_hdr[] = { + 99, 130, 83, 99, /* bootp cookie */ + 53, 1, DHCPDISCOVER, /* dhcp message type */ + 55, sizeof(dhcp_params), /* parameter list */ +}; + +static uint8_t dhcp_request_hdr[] = { + 99, 130, 83, 99, /* boot cookie */ + 53, 1, DHCPREQUEST, /* dhcp message type */ +#define SERVER_IP_OFF 9 + 54, 4, 0, 0, 0, 0, /* server IP */ +#define REQ_IP_OFF 15 + 50, 4, 0, 0, 0, 0, /* requested IP address */ + 55, sizeof(dhcp_params), /* parameter list */ +}; + +static uint8_t dhcp_end[] = { + 255, +}; + +static struct iovec dhcp_discover_iov[] = { + /* [0] = ip + udp header */ + /* [1] = bootp header */ + [2] = {dhcp_discover_hdr, sizeof(dhcp_discover_hdr)}, + [3] = {dhcp_params, sizeof(dhcp_params)}, + [4] = {dhcp_end, sizeof(dhcp_end)} +}; + +static struct iovec dhcp_request_iov[] = { + /* [0] = ip + udp header */ + /* [1] = bootp header */ + [2] = {dhcp_request_hdr, sizeof(dhcp_request_hdr)}, + [3] = {dhcp_params, sizeof(dhcp_params)}, + [4] = {dhcp_end, sizeof(dhcp_end)} +}; + +/* + * Parse a DHCP response packet + */ +static int +dhcp_parse(struct netdev *dev, struct bootp_hdr *hdr, uint8_t * exts, int extlen) +{ + uint8_t type = 0; + uint32_t serverid = INADDR_NONE; + int ret = 0; + + if (extlen >= 4 && exts[0] == 99 && exts[1] == 130 && + exts[2] == 83 && exts[3] == 99) { + uint8_t *ext; + + for (ext = exts + 4; ext - exts < extlen;) { + uint8_t len, *opt = ext++; + if (*opt == 0) + continue; + + len = *ext++; + + ext += len; + + if (*opt == 53) + type = opt[2]; + if (*opt == 54) + memcpy(&serverid, opt + 2, 4); + } + } + + switch (type) { + case DHCPOFFER: + ret = bootp_parse(dev, hdr, exts, extlen); + if (ret == 1 && serverid != INADDR_NONE) + dev->serverid = serverid; + DEBUG(("\n dhcp offer\n")); + break; + + case DHCPACK: + ret = bootp_parse(dev, hdr, exts, extlen); + DEBUG(("\n dhcp ack\n")); + break; + + case DHCPNAK: + ret = 2; + DEBUG(("\n dhcp nak\n")); + break; + } + return ret; +} + +/* + * Receive and parse a DHCP packet + */ +static int dhcp_recv(struct netdev *dev) +{ + struct bootp_hdr bootp; + uint8_t dhcp_options[1500]; + struct iovec iov[] = { + /* [0] = ip + udp header */ + [1] = {&bootp, sizeof(struct bootp_hdr)}, + [2] = {dhcp_options, sizeof(dhcp_options)} + }; + int ret; + + ret = packet_recv(iov, 3); + if (ret <= 0) + return ret; + + DEBUG(("\n dhcp xid %08x ", dev->bootp.xid)); + + if (ret < sizeof(struct bootp_hdr) || bootp.op != BOOTP_REPLY || /* RFC951 7.5 */ + bootp.xid != dev->bootp.xid || + memcmp(bootp.chaddr, dev->hwaddr, 16)) + return 0; + + ret -= sizeof(struct bootp_hdr); + + return dhcp_parse(dev, &bootp, dhcp_options, ret); +} + +static int dhcp_send(struct netdev *dev, struct iovec *vec, int len) +{ + struct bootp_hdr bootp; + + memset(&bootp, 0, sizeof(struct bootp_hdr)); + + bootp.op = BOOTP_REQUEST; + bootp.htype = dev->hwtype; + bootp.hlen = dev->hwlen; + bootp.xid = dev->bootp.xid; + bootp.ciaddr = dev->ip_addr; + bootp.giaddr = dev->bootp.gateway; + bootp.secs = htons(time(NULL) - dev->open_time); + memcpy(bootp.chaddr, dev->hwaddr, 16); + + vec[1].iov_base = &bootp; + vec[1].iov_len = sizeof(struct bootp_hdr); + + DEBUG(("xid %08x secs %d ", bootp.xid, ntohs(bootp.secs))); + + return packet_send(dev, vec, len); +} + +/* + * Send a DHCP discover packet + */ +int dhcp_send_discover(struct netdev *dev) +{ + dev->ip_addr = INADDR_ANY; + dev->ip_gateway = INADDR_ANY; + + DEBUG(("-> dhcp discover ")); + + return dhcp_send(dev, dhcp_discover_iov, 5); +} + +/* + * Receive a DHCP offer packet + */ +int dhcp_recv_offer(struct netdev *dev) +{ + return dhcp_recv(dev); +} + +/* + * Send a DHCP request packet + */ +int dhcp_send_request(struct netdev *dev) +{ + memcpy(&dhcp_request_hdr[SERVER_IP_OFF], &dev->serverid, 4); + memcpy(&dhcp_request_hdr[REQ_IP_OFF], &dev->ip_addr, 4); + + DEBUG(("-> dhcp request ")); + + return dhcp_send(dev, dhcp_request_iov, 5); +} + +/* + * Receive a DHCP ack packet + */ +int dhcp_recv_ack(struct netdev *dev) +{ + return dhcp_recv(dev); +} diff --git a/usr/kinit/ipconfig/dhcp_proto.h b/usr/kinit/ipconfig/dhcp_proto.h new file mode 100644 index 0000000..3b2465b --- /dev/null +++ b/usr/kinit/ipconfig/dhcp_proto.h @@ -0,0 +1,18 @@ +/* + * ipconfig/dhcp_proto.h + */ + +/* DHCP message types */ +#define DHCPDISCOVER 1 +#define DHCPOFFER 2 +#define DHCPREQUEST 3 +#define DHCPDECLINE 4 +#define DHCPACK 5 +#define DHCPNAK 6 +#define DHCPRELEASE 7 +#define DHCPINFORM 8 + +int dhcp_send_discover(struct netdev *dev); +int dhcp_recv_offer(struct netdev *dev); +int dhcp_send_request(struct netdev *dev); +int dhcp_recv_ack(struct netdev *dev); diff --git a/usr/kinit/ipconfig/ipconfig.h b/usr/kinit/ipconfig/ipconfig.h new file mode 100644 index 0000000..597f51e --- /dev/null +++ b/usr/kinit/ipconfig/ipconfig.h @@ -0,0 +1,35 @@ +/* + * ipconfig/ipconfig.h + */ + +#ifndef IPCONFIG_IPCONFIG_H +#define IPCONFIG_IPCONFIG_H + +#include +#include + +#define LOCAL_PORT 68 +#define REMOTE_PORT (LOCAL_PORT - 1) + +extern uint16_t cfg_local_port; +extern uint16_t cfg_remote_port; + +int ipconfig_main(int argc, char *argv[]); +uint32_t ipconfig_server_address(void *next); + +/* + * Note for gcc 3.2.2: + * + * If you're turning on debugging, make sure you get rid of -Os from + * the gcc command line, or else ipconfig will fail to link. + */ +#define IPC_DEBUG + +#undef DEBUG +#ifdef IPC_DEBUG +#define DEBUG(x) printf x +#else +#define DEBUG(x) do { } while(0) +#endif + +#endif /* IPCONFIG_IPCONFIG_H */ diff --git a/usr/kinit/ipconfig/main.c b/usr/kinit/ipconfig/main.c new file mode 100644 index 0000000..c9d3b0e --- /dev/null +++ b/usr/kinit/ipconfig/main.c @@ -0,0 +1,758 @@ +/* + * ipconfig/main.c + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for getopts */ + +#include + +#include "ipconfig.h" +#include "netdev.h" +#include "bootp_packet.h" +#include "bootp_proto.h" +#include "dhcp_proto.h" +#include "packet.h" + +static const char sysfs_class_net[] = "/sys/class/net"; +static const char *progname; +static jmp_buf abort_buf; +static char do_not_config; +static unsigned int default_caps = CAP_DHCP | CAP_BOOTP | CAP_RARP; +static int loop_timeout = -1; +static int configured; +static int bringup_first = 0; + +struct state { + int state; + int restart_state; + time_t expire; + int retry_period; + + struct netdev *dev; + struct state *next; +}; + +static inline const char *my_inet_ntoa(uint32_t addr) +{ + struct in_addr a; + + a.s_addr = addr; + + return inet_ntoa(a); +} + +static void print_device_config(struct netdev *dev) +{ + printf("IP-Config: %s complete (from %s):\n", dev->name, + my_inet_ntoa(dev->serverid ? dev->serverid : dev->ip_server)); + printf(" address: %-16s ", my_inet_ntoa(dev->ip_addr)); + printf("broadcast: %-16s ", my_inet_ntoa(dev->ip_broadcast)); + printf("netmask: %-16s\n", my_inet_ntoa(dev->ip_netmask)); + printf(" gateway: %-16s ", my_inet_ntoa(dev->ip_gateway)); + printf("dns0 : %-16s ", my_inet_ntoa(dev->ip_nameserver[0])); + printf("dns1 : %-16s\n", my_inet_ntoa(dev->ip_nameserver[1])); + if (dev->hostname[0]) + printf(" host : %-64s\n", dev->hostname); + if (dev->dnsdomainname[0]) + printf(" domain : %-64s\n", dev->dnsdomainname); + if (dev->nisdomainname[0]) + printf(" nisdomain: %-64s\n", dev->nisdomainname); + printf(" rootserver: %s ", my_inet_ntoa(dev->ip_server)); + printf("rootpath: %s\n", dev->bootpath); +} + +static void configure_device(struct netdev *dev) +{ + if (do_not_config) + return; + + if (netdev_setmtu(dev)) + printf("IP-Config: failed to set MTU on %s to %u\n", + dev->name, dev->mtu); + + if (netdev_setaddress(dev)) + printf("IP-Config: failed to set addresses on %s\n", dev->name); + if (netdev_setdefaultroute(dev)) + printf("IP-Config: failed to set default route on %s\n", + dev->name); +} + +static void dump_device_config(struct netdev *dev) +{ + char fn[40]; + FILE *f; + + snprintf(fn, sizeof(fn), "/tmp/net-%s.conf", dev->name); + f = fopen(fn, "w"); + if (f) { + fprintf(f, "DEVICE=%s\n", dev->name); + fprintf(f, "IPV4ADDR=%s\n", my_inet_ntoa(dev->ip_addr)); + fprintf(f, "IPV4BROADCAST=%s\n", + my_inet_ntoa(dev->ip_broadcast)); + fprintf(f, "IPV4NETMASK=%s\n", my_inet_ntoa(dev->ip_netmask)); + fprintf(f, "IPV4GATEWAY=%s\n", my_inet_ntoa(dev->ip_gateway)); + fprintf(f, "IPV4DNS0=%s\n", + my_inet_ntoa(dev->ip_nameserver[0])); + fprintf(f, "IPV4DNS1=%s\n", + my_inet_ntoa(dev->ip_nameserver[1])); + fprintf(f, "HOSTNAME=%s\n", dev->hostname); + fprintf(f, "DNSDOMAIN=%s\n", dev->dnsdomainname); + fprintf(f, "NISDOMAIN=%s\n", dev->nisdomainname); + fprintf(f, "ROOTSERVER=%s\n", my_inet_ntoa(dev->ip_server)); + fprintf(f, "ROOTPATH=%s\n", dev->bootpath); + fclose(f); + } +} + +static uint32_t inet_class_netmask(uint32_t ip) +{ + ip = ntohl(ip); + if (IN_CLASSA(ip)) + return htonl(IN_CLASSA_NET); + if (IN_CLASSB(ip)) + return htonl(IN_CLASSB_NET); + if (IN_CLASSC(ip)) + return htonl(IN_CLASSC_NET); + return INADDR_ANY; +} + +static void postprocess_device(struct netdev *dev) +{ + if (dev->ip_netmask == INADDR_ANY) { + dev->ip_netmask = inet_class_netmask(dev->ip_addr); + printf("IP-Config: %s guessed netmask %s\n", + dev->name, my_inet_ntoa(dev->ip_netmask)); + } + if (dev->ip_broadcast == INADDR_ANY) { + dev->ip_broadcast = + (dev->ip_addr & dev->ip_netmask) | ~dev->ip_netmask; + printf("IP-Config: %s guessed broadcast address %s\n", + dev->name, my_inet_ntoa(dev->ip_broadcast)); + } + if (dev->ip_nameserver[0] == INADDR_ANY) { + dev->ip_nameserver[0] = dev->ip_server; + printf("IP-Config: %s guessed nameserver address %s\n", + dev->name, my_inet_ntoa(dev->ip_nameserver[0])); + } +} + +static void complete_device(struct netdev *dev) +{ + postprocess_device(dev); + configure_device(dev); + dump_device_config(dev); + print_device_config(dev); + + ++configured; + + dev->next = ifaces; + ifaces = dev; +} + +static int process_receive_event(struct state *s, time_t now) +{ + int handled = 1; + + switch (s->state) { + case DEVST_BOOTP: + s->restart_state = DEVST_BOOTP; + switch (bootp_recv_reply(s->dev)) { + case -1: + s->state = DEVST_ERROR; + break; + case 1: + s->state = DEVST_COMPLETE; + DEBUG(("\n bootp reply\n")); + break; + } + break; + + case DEVST_DHCPDISC: + s->restart_state = DEVST_DHCPDISC; + switch (dhcp_recv_offer(s->dev)) { + case -1: + s->state = DEVST_ERROR; + break; + case 1: /* Offer received */ + s->state = DEVST_DHCPREQ; + dhcp_send_request(s->dev); + break; + } + break; + + case DEVST_DHCPREQ: + s->restart_state = DEVST_DHCPDISC; + switch (dhcp_recv_ack(s->dev)) { + case -1: /* error */ + s->state = DEVST_ERROR; + break; + case 1: /* ACK received */ + s->state = DEVST_COMPLETE; + break; + case 2: /* NAK received */ + s->state = DEVST_DHCPDISC; + break; + } + break; + } + + switch (s->state) { + case DEVST_COMPLETE: + complete_device(s->dev); + break; + + case DEVST_ERROR: + /* error occurred, try again in 10 seconds */ + s->expire = now + 10; + default: + DEBUG(("\n")); + handled = 0; + break; + } + + return handled; +} + +static void process_timeout_event(struct state *s, time_t now) +{ + int ret = 0; + + /* + * Is the link up? If not, try again in 1 second. + */ + if (!netdev_running(s->dev)) { + s->expire = now + 1; + s->state = s->restart_state; + return; + } + + /* + * If we had an error, restore a sane state to + * restart from. + */ + if (s->state == DEVST_ERROR) + s->state = s->restart_state; + + /* + * Now send a packet depending on our state. + */ + switch (s->state) { + case DEVST_BOOTP: + ret = bootp_send_request(s->dev); + s->restart_state = DEVST_BOOTP; + break; + + case DEVST_DHCPDISC: + ret = dhcp_send_discover(s->dev); + s->restart_state = DEVST_DHCPDISC; + break; + + case DEVST_DHCPREQ: + ret = dhcp_send_request(s->dev); + s->restart_state = DEVST_DHCPDISC; + break; + } + + if (ret == -1) { + s->state = DEVST_ERROR; + s->expire = now + 10; + } else { + s->expire = now + s->retry_period; + + s->retry_period *= 2; + if (s->retry_period > 60) + s->retry_period = 60; + } +} + +static struct state *slist; +struct netdev *ifaces; + +static int do_pkt_recv(int pkt_fd, time_t now) +{ + int ifindex, ret; + struct state *s; + + ret = packet_peek(&ifindex); + if (ret < 0) + goto bail; + + for (s = slist; s; s = s->next) { + if (s->dev->ifindex == ifindex) { + ret |= process_receive_event(s, now); + break; + } + } + + bail: + return ret; +} + +static int loop(void) +{ +#define NR_FDS 1 + struct pollfd fds[NR_FDS]; + struct state *s; + int pkt_fd; + int nr = 0; + struct timeval now, prev; + time_t start; + + pkt_fd = packet_open(); + if (pkt_fd == -1) { + perror("packet_open"); + return -1; + } + + fds[0].fd = pkt_fd; + fds[0].events = POLLRDNORM; + + gettimeofday(&now, NULL); + start = now.tv_sec; + while (1) { + int timeout = 60; + int pending = 0; + int done = 0; + int timeout_ms; + int x; + + for (s = slist; s; s = s->next) { + DEBUG(("%s: state = %d\n", s->dev->name, s->state)); + + if (s->state == DEVST_COMPLETE) { + done++; + continue; + } + + pending++; + + if (s->expire - now.tv_sec <= 0) { + DEBUG(("timeout\n")); + process_timeout_event(s, now.tv_sec); + } + + if (timeout > s->expire - now.tv_sec) + timeout = s->expire - now.tv_sec; + } + + if (pending == 0 || (bringup_first && done)) + break; + + timeout_ms = timeout * 1000; + + for (x = 0; x < 2; x++) { + int delta_ms; + + if (timeout_ms <= 0) + timeout_ms = 100; + + nr = poll(fds, NR_FDS, timeout_ms); + prev = now; + gettimeofday(&now, NULL); + + if ((fds[0].revents & POLLRDNORM)) { + nr = do_pkt_recv(pkt_fd, now.tv_sec); + if (nr == 1) + break; + else if (nr == 0) + packet_discard(); + } + + if (loop_timeout >= 0 && + now.tv_sec - start >= loop_timeout) { + printf("IP-Config: no response after %d " + "secs - giving up\n", loop_timeout); + goto bail; + } + + delta_ms = (now.tv_sec - prev.tv_sec) * 1000; + delta_ms += (now.tv_usec - prev.tv_usec) / 1000; + + DEBUG(("Delta: %d ms\n", delta_ms)); + + timeout_ms -= delta_ms; + } + } + bail: + packet_close(); + + return 0; +} + +static int add_one_dev(struct netdev *dev) +{ + struct state *state; + + state = malloc(sizeof(struct state)); + if (!state) + return -1; + + state->dev = dev; + state->expire = time(NULL); + state->retry_period = 1; + + /* + * Select the state that we start from. + */ + if (dev->caps & CAP_DHCP && dev->ip_addr == INADDR_ANY) { + state->restart_state = state->state = DEVST_DHCPDISC; + } else if (dev->caps & CAP_DHCP) { + state->restart_state = state->state = DEVST_DHCPREQ; + } else if (dev->caps & CAP_BOOTP) { + state->restart_state = state->state = DEVST_BOOTP; + } + + state->next = slist; + slist = state; + + return 0; +} + +static void parse_addr(uint32_t * addr, const char *ip) +{ + struct in_addr in; + if (inet_aton(ip, &in) == 0) { + fprintf(stderr, "%s: can't parse IP address '%s'\n", + progname, ip); + longjmp(abort_buf, 1); + } + *addr = in.s_addr; +} + +static unsigned int parse_proto(const char *ip) +{ + unsigned int caps = 0; + + if (*ip == '\0' || strcmp(ip, "on") == 0 || strcmp(ip, "any") == 0) + caps = CAP_BOOTP | CAP_DHCP | CAP_RARP; + else if (strcmp(ip, "both") == 0) + caps = CAP_BOOTP | CAP_RARP; + else if (strcmp(ip, "dhcp") == 0) + caps = CAP_BOOTP | CAP_DHCP; + else if (strcmp(ip, "bootp") == 0) + caps = CAP_BOOTP; + else if (strcmp(ip, "rarp") == 0) + caps = CAP_RARP; + else if (strcmp(ip, "none") == 0 || strcmp(ip, "static") == 0 + || strcmp(ip, "off") == 0) + goto bail; + else { + fprintf(stderr, "%s: invalid protocol '%s'\n", progname, ip); + longjmp(abort_buf, 1); + } + bail: + return caps; +} + +static int add_all_devices(struct netdev *template); + +static int parse_device(struct netdev *dev, const char *ip) +{ + char *cp; + int i, opt; + int is_ip = 0; + + DEBUG(("IP-Config: parse_device: \"%s\"\n", ip)); + + if (strncmp(ip, "ip=", 3) == 0) { + ip += 3; + is_ip = 1; + } else if (strncmp(ip, "nfsaddrs=", 9) == 0) { + ip += 9; + is_ip = 1; /* Not sure about this...? */ + } + + if (!strchr(ip, ':')) { + /* Only one option, e.g. "ip=dhcp", or it's an interface name */ + if (is_ip) { + dev->caps = parse_proto(ip); + bringup_first = 1; + } else { + dev->name = ip; + } + } else { + for (i = opt = 0; ip && *ip; ip = cp, opt++) { + if ((cp = strchr(ip, ':'))) { + *cp++ = '\0'; + } + if (opt > 6) { + fprintf(stderr, "%s: too many options for %s\n", + progname, dev->name); + longjmp(abort_buf, 1); + } + + if (*ip == '\0') + continue; + DEBUG(("IP-Config: opt #%d: '%s'\n", opt, ip)); + switch (opt) { + case 0: + parse_addr(&dev->ip_addr, ip); + dev->caps = 0; + break; + case 1: + parse_addr(&dev->ip_server, ip); + break; + case 2: + parse_addr(&dev->ip_gateway, ip); + break; + case 3: + parse_addr(&dev->ip_netmask, ip); + break; + case 4: + strncpy(dev->hostname, ip, SYS_NMLN - 1); + dev->hostname[SYS_NMLN - 1] = '\0'; + break; + case 5: + dev->name = ip; + break; + case 6: + dev->caps = parse_proto(ip); + break; + } + } + } + + if (dev->name == NULL || + dev->name[0] == '\0' || strcmp(dev->name, "all") == 0) { + add_all_devices(dev); + return 0; + } + return 1; +} + +static void bringup_device(struct netdev *dev) +{ + if (netdev_up(dev) == 0) { + if (dev->caps) { + add_one_dev(dev); + } else { + complete_device(dev); + } + } +} + +static void bringup_one_dev(struct netdev *template, struct netdev *dev) +{ + if (template->ip_addr != INADDR_NONE) + dev->ip_addr = template->ip_addr; + if (template->ip_server != INADDR_NONE) + dev->ip_server = template->ip_server; + if (template->ip_gateway != INADDR_NONE) + dev->ip_gateway = template->ip_gateway; + if (template->ip_netmask != INADDR_NONE) + dev->ip_netmask = template->ip_netmask; + if (template->ip_nameserver[0] != INADDR_NONE) + dev->ip_nameserver[0] = template->ip_nameserver[0]; + if (template->ip_nameserver[1] != INADDR_NONE) + dev->ip_nameserver[1] = template->ip_nameserver[1]; + if (template->hostname[0] != '\0') + strcpy(dev->hostname, template->hostname); + dev->caps &= template->caps; + + bringup_device(dev); +} + +static struct netdev *add_device(const char *info) +{ + struct netdev *dev; + int i; + + dev = malloc(sizeof(struct netdev)); + if (dev == NULL) { + fprintf(stderr, "%s: out of memory\n", progname); + longjmp(abort_buf, 1); + } + + memset(dev, 0, sizeof(struct netdev)); + dev->caps = default_caps; + + if (parse_device(dev, info) == 0) + goto bail; + + if (netdev_init_if(dev) == -1) + goto bail; + + if (bootp_init_if(dev) == -1) + goto bail; + + printf("IP-Config: %s hardware address", dev->name); + for (i = 0; i < dev->hwlen; i++) + printf("%c%02x", i == 0 ? ' ' : ':', dev->hwaddr[i]); + printf(" mtu %d%s%s\n", dev->mtu, + dev->caps & CAP_DHCP ? " DHCP" : + dev->caps & CAP_BOOTP ? " BOOTP" : "", + dev->caps & CAP_RARP ? " RARP" : ""); + return dev; + bail: + free(dev); + return NULL; +} + +static int add_all_devices(struct netdev *template) +{ + DIR *d; + struct dirent *de; + struct netdev *dev; + char t[PATH_MAX], p[255]; + int i, fd; + unsigned long flags; + + d = opendir(sysfs_class_net); + if (!d) + return 0; + + while ((de = readdir(d)) != NULL) { + /* This excludes devices beginning with dots or "dummy", as well as . or .. */ + if (de->d_name[0] == '.' || !strcmp(de->d_name, "..")) + continue; + i = snprintf(t, PATH_MAX - 1, "%s/%s/flags", sysfs_class_net, + de->d_name); + if (i < 0 || i >= PATH_MAX - 1) + continue; + t[i] = '\0'; + fd = open(t, O_RDONLY); + if (fd < 0) { + perror(t); + continue; + } + i = read(fd, &p, sizeof(p) - 1); + close(fd); + if (i < 0) { + perror(t); + continue; + } + p[i] = '\0'; + flags = strtoul(p, NULL, 0); + /* Heuristic for if this is a reasonable boot interface. + This is the same + logic the in-kernel ipconfig uses... */ + if (!(flags & IFF_LOOPBACK) && + (flags & (IFF_BROADCAST | IFF_POINTOPOINT))) { + DEBUG(("Trying to bring up %s\n", de->d_name)); + + if (!(dev = add_device(de->d_name))) + continue; + bringup_one_dev(template, dev); + } + } + closedir(d); + return 1; +} + +static int check_autoconfig(void) +{ + int ndev = 0, nauto = 0; + struct state *s; + + for (s = slist; s; s = s->next) { + ndev++; + if (s->dev->caps) + nauto++; + } + + if (ndev == 0) { + if (configured == 0) { + fprintf(stderr, "%s: no devices to configure\n", + progname); + longjmp(abort_buf, 1); + } + } + + return nauto; +} + +int main(int argc, char *argv[]) + __attribute__ ((weak, alias("ipconfig_main"))); + +int ipconfig_main(int argc, char *argv[]) +{ + struct netdev *dev; + int c, port; + int err; + + /* If progname is set we're invoked from another program */ + if (!progname) { + struct timeval now; + progname = argv[0]; + gettimeofday(&now, NULL); + srand48(now.tv_usec ^ (now.tv_sec << 24)); + } + + if ((err = setjmp(abort_buf))) + return err; + + do { + c = getopt(argc, argv, "c:d:onp:t:"); + if (c == EOF) + break; + + switch (c) { + case 'c': + default_caps = parse_proto(optarg); + break; + case 'p': + port = atoi(optarg); + if (port <= 0 || port > USHRT_MAX) { + fprintf(stderr, + "%s: invalid port number %d\n", + progname, port); + longjmp(abort_buf, 1); + } + cfg_local_port = port; + cfg_remote_port = cfg_local_port - 1; + break; + case 't': + loop_timeout = atoi(optarg); + if (loop_timeout < 0) { + fprintf(stderr, + "%s: invalid timeout %d\n", + progname, loop_timeout); + longjmp(abort_buf, 1); + } + break; + case 'o': + bringup_first = 1; + break; + case 'n': + do_not_config = 1; + break; + case 'd': + dev = add_device(optarg); + if (dev) + bringup_device(dev); + break; + case '?': + fprintf(stderr, "%s: invalid option -%c\n", + progname, optopt); + longjmp(abort_buf, 1); + } + } while (1); + + for (c = optind; c < argc; c++) { + dev = add_device(argv[c]); + if (dev) + bringup_device(dev); + } + + if (check_autoconfig()) { + if (cfg_local_port != LOCAL_PORT) { + printf("IP-Config: binding source port to %d, " + "dest to %d\n", cfg_local_port, cfg_remote_port); + } + loop(); + } + + return 0; +} diff --git a/usr/kinit/ipconfig/netdev.c b/usr/kinit/ipconfig/netdev.c new file mode 100644 index 0000000..f032bed --- /dev/null +++ b/usr/kinit/ipconfig/netdev.c @@ -0,0 +1,251 @@ +/* + * ipconfig/netdev.c + * + * ioctl-based device configuration + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "netdev.h" + +static int cfd = -1; + +static void copy_name(struct netdev *dev, struct ifreq *ifr) +{ + strncpy(ifr->ifr_name, dev->name, sizeof(ifr->ifr_name)); + ifr->ifr_name[sizeof(ifr->ifr_name) - 1] = '\0'; +} + +int netdev_getflags(struct netdev *dev, short *flags) +{ + struct ifreq ifr; + + copy_name(dev, &ifr); + + if (ioctl(cfd, SIOCGIFFLAGS, &ifr) == -1) { + perror("SIOCGIFFLAGS"); + return -1; + } + + *flags = ifr.ifr_flags; + return 0; +} + +static int netdev_sif_addr(struct ifreq *ifr, int cmd, uint32_t addr) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr; + + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = addr; + + return ioctl(cfd, cmd, ifr); +} + +int netdev_setaddress(struct netdev *dev) +{ + struct ifreq ifr; + + copy_name(dev, &ifr); + + if (dev->ip_addr != INADDR_ANY && + netdev_sif_addr(&ifr, SIOCSIFADDR, dev->ip_addr) == -1) { + perror("SIOCSIFADDR"); + return -1; + } + + if (dev->ip_broadcast != INADDR_ANY && + netdev_sif_addr(&ifr, SIOCSIFBRDADDR, dev->ip_broadcast) == -1) { + perror("SIOCSIFBRDADDR"); + return -1; + } + + if (dev->ip_netmask != INADDR_ANY && + netdev_sif_addr(&ifr, SIOCSIFNETMASK, dev->ip_netmask) == -1) { + perror("SIOCSIFNETMASK"); + return -1; + } + + return 0; +} + +int netdev_setdefaultroute(struct netdev *dev) +{ + struct rtentry r; + + if (dev->ip_gateway == INADDR_ANY) + return 0; + + memset(&r, 0, sizeof(r)); + + ((struct sockaddr_in *)&r.rt_dst)->sin_family = AF_INET; + ((struct sockaddr_in *)&r.rt_dst)->sin_addr.s_addr = INADDR_ANY; + ((struct sockaddr_in *)&r.rt_gateway)->sin_family = AF_INET; + ((struct sockaddr_in *)&r.rt_gateway)->sin_addr.s_addr = + dev->ip_gateway; + ((struct sockaddr_in *)&r.rt_genmask)->sin_family = AF_INET; + ((struct sockaddr_in *)&r.rt_genmask)->sin_addr.s_addr = INADDR_ANY; + r.rt_flags = RTF_UP | RTF_GATEWAY; + + if (ioctl(cfd, SIOCADDRT, &r) == -1 && errno != EEXIST) { + perror("SIOCADDRT"); + return -1; + } + return 0; +} + +int netdev_setmtu(struct netdev *dev) +{ + struct ifreq ifr; + + copy_name(dev, &ifr); + ifr.ifr_mtu = dev->mtu; + + return ioctl(cfd, SIOCSIFMTU, &ifr); +} + +static int netdev_gif_addr(struct ifreq *ifr, int cmd, uint32_t * ptr) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr; + + if (ioctl(cfd, cmd, ifr) == -1) + return -1; + + *ptr = sin->sin_addr.s_addr; + + return 0; +} + +int netdev_up(struct netdev *dev) +{ + struct ifreq ifr; + + copy_name(dev, &ifr); + + if (ioctl(cfd, SIOCGIFFLAGS, &ifr) == -1) { + perror("SIOCGIFFLAGS"); + return -1; + } + + ifr.ifr_flags |= IFF_UP; + + if (ioctl(cfd, SIOCSIFFLAGS, &ifr) == -1) { + perror("SIOCSIFFLAGS"); + return -1; + } + return 0; +} + +int netdev_down(struct netdev *dev) +{ + struct ifreq ifr; + + copy_name(dev, &ifr); + + if (ioctl(cfd, SIOCGIFFLAGS, &ifr) == -1) { + perror("SIOCGIFFLAGS"); + return -1; + } + + ifr.ifr_flags &= ~IFF_UP; + + if (ioctl(cfd, SIOCSIFFLAGS, &ifr) == -1) { + perror("SIOCSIFFLAGS"); + return -1; + } + return 0; +} + +int netdev_init_if(struct netdev *dev) +{ + struct ifreq ifr; + + if (cfd == -1) + cfd = socket(AF_INET, SOCK_DGRAM, 0); + if (cfd == -1) { + fprintf(stderr, "ipconfig: %s: socket(AF_INET): %s\n", + dev->name, strerror(errno)); + return -1; + } + + copy_name(dev, &ifr); + + if (ioctl(cfd, SIOCGIFINDEX, &ifr) == -1) { + fprintf(stderr, "ipconfig: %s: SIOCGIFINDEX: %s\n", + dev->name, strerror(errno)); + return -1; + } + + dev->ifindex = ifr.ifr_ifindex; + + if (ioctl(cfd, SIOCGIFMTU, &ifr) == -1) { + fprintf(stderr, "ipconfig: %s: SIOCGIFMTU: %s\n", + dev->name, strerror(errno)); + return -1; + } + + dev->mtu = ifr.ifr_mtu; + + if (ioctl(cfd, SIOCGIFHWADDR, &ifr) == -1) { + fprintf(stderr, "ipconfig: %s: SIOCGIFHWADDR: %s\n", + dev->name, strerror(errno)); + return -1; + } + + dev->hwtype = ifr.ifr_hwaddr.sa_family; + dev->hwlen = 0; + + switch (dev->hwtype) { + case ARPHRD_ETHER: + dev->hwlen = 6; + break; + case ARPHRD_EUI64: + dev->hwlen = 8; + break; + case ARPHRD_LOOPBACK: + dev->hwlen = 0; + break; + default: + return -1; + } + + memcpy(dev->hwaddr, ifr.ifr_hwaddr.sa_data, dev->hwlen); + memset(dev->hwbrd, 0xff, dev->hwlen); + + /* + * Try to get the current interface information. + */ + if (dev->ip_addr == INADDR_NONE && + netdev_gif_addr(&ifr, SIOCGIFADDR, &dev->ip_addr) == -1) { + fprintf(stderr, "ipconfig: %s: SIOCGIFADDR: %s\n", + dev->name, strerror(errno)); + dev->ip_addr = 0; + dev->ip_broadcast = 0; + dev->ip_netmask = 0; + return 0; + } + + if (dev->ip_broadcast == INADDR_NONE && + netdev_gif_addr(&ifr, SIOCGIFBRDADDR, &dev->ip_broadcast) == -1) { + fprintf(stderr, "ipconfig: %s: SIOCGIFBRDADDR: %s\n", + dev->name, strerror(errno)); + dev->ip_broadcast = 0; + } + + if (dev->ip_netmask == INADDR_NONE && + netdev_gif_addr(&ifr, SIOCGIFNETMASK, &dev->ip_netmask) == -1) { + fprintf(stderr, "ipconfig: %s: SIOCGIFNETMASK: %s\n", + dev->name, strerror(errno)); + dev->ip_netmask = 0; + } + + return 0; +} diff --git a/usr/kinit/ipconfig/netdev.h b/usr/kinit/ipconfig/netdev.h new file mode 100644 index 0000000..ac8c8be --- /dev/null +++ b/usr/kinit/ipconfig/netdev.h @@ -0,0 +1,83 @@ +/* + * ipconfig/netdev.h + */ + +#ifndef IPCONFIG_NETDEV_H +#define IPCONFIG_NETDEV_H + +#include +#include + +#define BPLEN 40 + +struct netdev { + const char *name; /* Device name */ + unsigned int ifindex; /* interface index */ + unsigned int hwtype; /* ARPHRD_xxx */ + unsigned int hwlen; /* HW address length */ + uint8_t hwaddr[16]; /* HW address */ + uint8_t hwbrd[16]; /* Broadcast HW address */ + unsigned int mtu; /* Device mtu */ + unsigned int caps; /* Capabilities */ + time_t open_time; + + struct { /* BOOTP/DHCP info */ + int fd; + uint32_t xid; + uint32_t gateway; /* BOOTP/DHCP gateway */ + } bootp; + + struct { /* RARP information */ + int fd; + } rarp; + + uint32_t ip_addr; /* my address */ + uint32_t ip_broadcast; /* broadcast address */ + uint32_t ip_server; /* server address */ + uint32_t ip_netmask; /* my subnet mask */ + uint32_t ip_gateway; /* my gateway */ + uint32_t ip_nameserver[2]; /* two nameservers */ + uint32_t serverid; /* dhcp serverid */ + char hostname[SYS_NMLN]; /* hostname */ + char dnsdomainname[SYS_NMLN]; /* dns domain name */ + char nisdomainname[SYS_NMLN]; /* nis domain name */ + char bootpath[BPLEN]; /* boot path */ + struct netdev *next; /* next configured i/f */ +}; + +extern struct netdev *ifaces; + +/* + * Device capabilities + */ +#define CAP_BOOTP (1<<0) +#define CAP_DHCP (1<<1) +#define CAP_RARP (1<<2) + +/* + * Device states + */ +#define DEVST_UP 0 +#define DEVST_BOOTP 1 +#define DEVST_DHCPDISC 2 +#define DEVST_DHCPREQ 3 +#define DEVST_COMPLETE 4 +#define DEVST_ERROR 5 + +int netdev_getflags(struct netdev *dev, short *flags); +int netdev_setaddress(struct netdev *dev); +int netdev_setdefaultroute(struct netdev *dev); +int netdev_up(struct netdev *dev); +int netdev_down(struct netdev *dev); +int netdev_init_if(struct netdev *dev); +int netdev_setmtu(struct netdev *dev); + +static inline int netdev_running(struct netdev *dev) +{ + short flags; + int ret = netdev_getflags(dev, &flags); + + return ret ? 0 : !!(flags & IFF_RUNNING); +} + +#endif /* IPCONFIG_NETDEV_H */ diff --git a/usr/kinit/ipconfig/packet.c b/usr/kinit/ipconfig/packet.c new file mode 100644 index 0000000..e03cd00 --- /dev/null +++ b/usr/kinit/ipconfig/packet.c @@ -0,0 +1,286 @@ +/* + * ipconfig/packet.c + * + * Packet socket handling glue. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ipconfig.h" +#include "netdev.h" +#include "packet.h" + +static int pkt_fd = -1; + +uint16_t cfg_local_port = LOCAL_PORT; +uint16_t cfg_remote_port = REMOTE_PORT; + +int packet_open(void) +{ + int fd, one = 1; + + if (pkt_fd != -1) + return pkt_fd; + + /* + * Get a PACKET socket for IP traffic. + */ + fd = socket(AF_PACKET, SOCK_DGRAM, htons(ETH_P_IP)); + if (fd == -1) { + perror("socket"); + return -1; + } + + /* + * We want to broadcast + */ + if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one)) == -1) { + perror("SO_BROADCAST"); + close(fd); + fd = -1; + } + + pkt_fd = fd; + + return fd; +} + +void packet_close(void) +{ + close(pkt_fd); + pkt_fd = -1; +} + +static unsigned int ip_checksum(uint16_t *hdr, int len) +{ + unsigned int chksum = 0; + + while (len) { + chksum += *hdr++; + chksum += *hdr++; + len--; + } + chksum = (chksum & 0xffff) + (chksum >> 16); + chksum = (chksum & 0xffff) + (chksum >> 16); + return (~chksum) & 0xffff; +} + +struct header { + struct iphdr ip; + struct udphdr udp; +} __attribute__ ((packed)); + +static struct header ipudp_hdrs = { + .ip = { + .ihl = 5, + .version = IPVERSION, + .frag_off = __constant_htons(IP_DF), + .ttl = 64, + .protocol = IPPROTO_UDP, + .saddr = INADDR_ANY, + .daddr = INADDR_BROADCAST, + }, + .udp = { + .source = __constant_htons(LOCAL_PORT), + .dest = __constant_htons(REMOTE_PORT), + .len = 0, + .check = 0, + }, +}; + +#ifdef IPC_DEBUG /* Only used by DEBUG(()) */ +static char *ntoa(uint32_t addr) +{ + struct in_addr in = { addr }; + return inet_ntoa(in); +} +#endif + +/* + * Send a packet. The options are listed in iov[1...iov_len]. + * iov[0] is reserved for the bootp packet header. + */ +int packet_send(struct netdev *dev, struct iovec *iov, int iov_len) +{ + struct sockaddr_ll sll; + struct msghdr msg = { + .msg_name = &sll, + .msg_namelen = sizeof(sll), + .msg_iov = iov, + .msg_iovlen = iov_len, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = 0 + }; + int i, len = 0; + + if (cfg_local_port != LOCAL_PORT) { + ipudp_hdrs.udp.source = htons(cfg_local_port); + ipudp_hdrs.udp.dest = htons(cfg_remote_port); + } + + DEBUG(("\n udp src %d dst %d", ntohs(ipudp_hdrs.udp.source), + ntohs(ipudp_hdrs.udp.dest))); + + DEBUG(("\n ip src %s ", ntoa(ipudp_hdrs.ip.saddr))); + DEBUG(("dst %s ", ntoa(ipudp_hdrs.ip.daddr))); + + /* + * Glue in the ip+udp header iovec + */ + iov[0].iov_base = &ipudp_hdrs; + iov[0].iov_len = sizeof(struct header); + + for (i = 0; i < iov_len; i++) + len += iov[i].iov_len; + + sll.sll_family = AF_PACKET; + sll.sll_protocol = htons(ETH_P_IP); + sll.sll_ifindex = dev->ifindex; + sll.sll_hatype = dev->hwtype; + sll.sll_pkttype = PACKET_BROADCAST; + sll.sll_halen = dev->hwlen; + memcpy(sll.sll_addr, dev->hwbrd, dev->hwlen); + + ipudp_hdrs.ip.tot_len = htons(len); + ipudp_hdrs.ip.check = 0; + ipudp_hdrs.ip.check = ip_checksum((uint16_t *) & ipudp_hdrs.ip, + ipudp_hdrs.ip.ihl); + + ipudp_hdrs.udp.len = htons(len - sizeof(struct iphdr)); + + DEBUG(("\n bytes %d\n", len)); + + return sendmsg(pkt_fd, &msg, 0); +} + +int packet_peek(int *ifindex) +{ + struct sockaddr_ll sll; + struct iphdr iph; + int ret, sllen = sizeof(struct sockaddr_ll); + + /* + * Peek at the IP header. + */ + ret = recvfrom(pkt_fd, &iph, sizeof(struct iphdr), + MSG_PEEK, (struct sockaddr *)&sll, &sllen); + if (ret == -1) + return -1; + + if (sll.sll_family != AF_PACKET) + goto discard_pkt; + + if (iph.ihl < 5 || iph.version != IPVERSION) + goto discard_pkt; + + *ifindex = sll.sll_ifindex; + + return 0; + +discard_pkt: + packet_discard(); + return 0; +} + +void packet_discard(void) +{ + struct iphdr iph; + struct sockaddr_ll sll; + socklen_t sllen = sizeof(sll); + + recvfrom(pkt_fd, &iph, sizeof(iph), 0, (struct sockaddr *)&sll, &sllen); +} + +/* + * Receive a bootp packet. The options are listed in iov[1...iov_len]. + * iov[0] must point to the bootp packet header. + */ +int packet_recv(struct iovec *iov, int iov_len) +{ + struct iphdr *ip, iph; + struct udphdr *udp; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = iov, + .msg_iovlen = iov_len, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = 0 + }; + int ret, iphl; + + ret = recvfrom(pkt_fd, &iph, sizeof(struct iphdr), + MSG_PEEK, NULL, NULL); + if (ret == -1) + return -1; + + if (iph.ihl < 5 || iph.version != IPVERSION) + goto discard_pkt; + + iphl = iph.ihl * 4; + + ip = malloc(iphl + sizeof(struct udphdr)); + if (!ip) + goto discard_pkt; + + udp = (struct udphdr *)((char *)ip + iphl); + + iov[0].iov_base = ip; + iov[0].iov_len = iphl + sizeof(struct udphdr); + + ret = recvmsg(pkt_fd, &msg, 0); + if (ret == -1) + goto free_pkt; + + DEBUG(("<- bytes %d ", ret)); + + if (ip_checksum((uint16_t *) ip, ip->ihl) != 0) + goto free_pkt; + + DEBUG(("\n ip src %s ", ntoa(ip->saddr))); + DEBUG(("dst %s ", ntoa(ip->daddr))); + + if (ntohs(ip->tot_len) > ret || ip->protocol != IPPROTO_UDP) + goto free_pkt; + + ret -= 4 * ip->ihl; + + DEBUG(("\n udp src %d dst %d ", ntohs(udp->source), + ntohs(udp->dest))); + + if (udp->source != htons(cfg_remote_port) || + udp->dest != htons(cfg_local_port)) + goto free_pkt; + + if (ntohs(udp->len) > ret) + goto free_pkt; + + ret -= sizeof(struct udphdr); + + free(ip); + + return ret; + +free_pkt: + free(ip); + return 0; + +discard_pkt: + DEBUG(("discarded\n")); + packet_discard(); + return 0; +} diff --git a/usr/kinit/ipconfig/packet.h b/usr/kinit/ipconfig/packet.h new file mode 100644 index 0000000..292991c --- /dev/null +++ b/usr/kinit/ipconfig/packet.h @@ -0,0 +1,9 @@ +/* + * ipconfig/packet.h + */ +int packet_open(void); +void packet_close(void); +int packet_send(struct netdev *dev, struct iovec *iov, int iov_len); +int packet_peek(int *ifindex); +void packet_discard(void); +int packet_recv(struct iovec *iov, int iov_len); diff --git a/usr/kinit/kinit.c b/usr/kinit/kinit.c new file mode 100644 index 0000000..ce4725c --- /dev/null +++ b/usr/kinit/kinit.c @@ -0,0 +1,330 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kinit.h" +#include "ipconfig.h" +#include "run-init.h" + +const char *progname = "kinit"; +int mnt_procfs; +int mnt_sysfs; + +#ifdef INI_DEBUG +void dump_args(int argc, char *argv[]) +{ + int i; + + printf(" argc == %d\n", argc); + + for (i = 0; i < argc; i++) { + printf(" argv[%d]: \"%s\"\n", i, argv[i]); + } + + if (argv[argc] != NULL) { + printf(" argv[%d]: \"%s\" (SHOULD BE NULL)\n", + argc, argv[argc]); + } +} +#endif + + +static int do_ipconfig(int argc, char *argv[]) +{ + int i, a = 0; + char **args = alloca((argc + 1) * sizeof(char *)); + + if (!args) + return -1; + + args[a++] = (char *)"IP-Config"; + + DEBUG(("Running ipconfig\n")); + + for (i = 1; i < argc; i++) { + if (strncmp(argv[i], "ip=", 3) == 0 || + strncmp(argv[i], "nfsaddrs=", 9) == 0) { + args[a++] = argv[i]; + } + } + + if (a > 1) { + args[a] = NULL; + dump_args(a, args); + return ipconfig_main(a, args); + } + + return 0; +} + +static int split_cmdline(int cmdcmax, char *cmdv[], char *argv0, + char *cmdlines[], char *args[]) +{ + int was_space; + char c, *p; + int vmax = cmdcmax; + int v = 1; + int space; + + if (cmdv) + cmdv[0] = argv0; + + /* First, add the parsable command lines */ + + while (*cmdlines) { + p = *cmdlines++; + was_space = 1; + while (v < vmax) { + c = *p; + space = isspace(c); + if ((space || !c) && !was_space) { + if (cmdv) + *p = '\0'; + v++; + } else if (was_space) { + if (cmdv) + cmdv[v] = p; + } + + if (!c) + break; + + was_space = space; + p++; + } + } + + /* Second, add the explicit command line arguments */ + + while (*args && v < vmax) { + if (cmdv) + cmdv[v] = *args; + v++; + args++; + } + + if (cmdv) + cmdv[v] = NULL; + + return v; +} + +static int mount_sys_fs(const char *check, const char *fsname, + const char *fstype) +{ + struct stat st; + + if (stat(check, &st) == 0) { + return 0; + } + + mkdir(fsname, 0555); + + if (mount("none", fsname, fstype, 0, NULL) == -1) { + fprintf(stderr, "%s: could not mount %s as %s\n", + progname, fsname, fstype); + return -1; + } + + return 1; +} + +static void check_path(const char *path) +{ + struct stat st; + + if (stat(path, &st) == -1) { + if (errno != ENOENT) { + perror("stat"); + exit(1); + } + if (mkdir(path, 0755) == -1) { + perror("mkdir"); + exit(1); + } + } else if (!S_ISDIR(st.st_mode)) { + fprintf(stderr, "%s: '%s' not a directory\n", progname, path); + exit(1); + } +} + +static const char *find_init(const char *root, const char *user) +{ + const char *init_paths[] = { + "/sbin/init", "/bin/init", "/etc/init", "/bin/sh", NULL + }; + const char **p; + const char *path; + + if (chdir(root)) { + perror("chdir"); + exit(1); + } + + if (user) + DEBUG(("Checking for init: %s\n", user)); + + if (user && user[0] == '/' && !access(user+1, X_OK)) { + path = user; + } else { + for (p = init_paths; *p; p++) { + DEBUG(("Checking for init: %s\n", *p)); + if (!access(*p+1, X_OK)) + break; + } + path = *p; + } + chdir("/"); + return path; +} + +/* This is the argc and argv we pass to init */ +const char *init_path; +int init_argc; +char **init_argv; + +extern ssize_t readfile(const char *, char **); + +int main(int argc, char *argv[]) +{ + char **cmdv, **args; + char *cmdlines[3]; + int i; + const char *errmsg; + int ret = 0; + int cmdc; + int fd; + struct timeval now; + + gettimeofday(&now, NULL); + srand48(now.tv_usec ^ (now.tv_sec << 24)); + + /* Default parameters for anything init-like we execute */ + init_argc = argc; + init_argv = alloca((argc+1)*sizeof(char *)); + memcpy(init_argv, argv, (argc+1)*sizeof(char *)); + + if ((fd = open("/dev/console", O_RDWR)) != -1) { + dup2(fd, STDIN_FILENO); + dup2(fd, STDOUT_FILENO); + dup2(fd, STDERR_FILENO); + + if (fd > STDERR_FILENO) { + close(fd); + } + } + + mnt_procfs = mount_sys_fs("/proc/cmdline", "/proc", "proc") >= 0; + if (!mnt_procfs) { + ret = 1; + goto bail; + } + + mnt_sysfs = mount_sys_fs("/sys/bus", "/sys", "sysfs") >= 0; + if (!mnt_sysfs) { + ret = 1; + goto bail; + } + + /* Construct the effective kernel command line. The + effective kernel command line consists of /arch.cmd, if + it exists, /proc/cmdline, plus any arguments after an -- + argument on the proper command line, in that order. */ + + ret = readfile("/arch.cmd", &cmdlines[0]); + if (ret < 0) + cmdlines[0] = ""; + + ret = readfile("/proc/cmdline", &cmdlines[1]); + if (ret < 0) { + fprintf(stderr, "%s: cannot read /proc/cmdline\n", progname); + ret = 1; + goto bail; + } + + cmdlines[2] = NULL; + + /* Find an -- argument, and if so append to the command line */ + for (i = 1; i < argc; i++) { + if (!strcmp(argv[i], "--")) { + i++; + break; + } + } + args = &argv[i]; /* Points either to first argument past -- or + to the final NULL */ + + /* Count the number of arguments */ + cmdc = split_cmdline(INT_MAX, NULL, argv[0], cmdlines, args); + + /* Actually generate the cmdline array */ + cmdv = (char **)alloca((cmdc+1)*sizeof(char *)); + if (split_cmdline(cmdc, cmdv, argv[0], cmdlines, args) != cmdc) { + ret = 1; + goto bail; + } + + /* Debugging... */ + dump_args(cmdc, cmdv); + + /* Resume from suspend-to-disk, if appropriate */ + /* If successful, does not return */ + do_resume(cmdc, cmdv); + + /* Initialize networking, if applicable */ + do_ipconfig(cmdc, cmdv); + + check_path("/root"); + do_mounts(cmdc, cmdv); + + if (mnt_procfs) { + umount2("/proc", 0); + mnt_procfs = 0; + } + + if (mnt_sysfs) { + umount2("/sys", 0); + mnt_sysfs = 0; + } + + init_path = find_init("/root", get_arg(cmdc, cmdv, "init=")); + if (!init_path) { + fprintf(stderr, "%s: init not found!\n", progname); + ret = 2; + goto bail; + } + + init_argv[0] = strrchr(init_path, '/') + 1; + + errmsg = run_init("/root", "/dev/console", init_path, init_argv); + + /* If run_init returned, something went bad */ + fprintf(stderr, "%s: %s: %s\n", progname, errmsg, strerror(errno)); + ret = 2; + goto bail; + +bail: + if (mnt_procfs) + umount2("/proc", 0); + + if (mnt_sysfs) + umount2("/sys", 0); + + /* + * If we get here, something bad probably happened, and the kernel + * will most likely panic. Drain console output so the user can + * figure out what happened. + */ + tcdrain(2); + tcdrain(1); + + return ret; +} diff --git a/usr/kinit/kinit.h b/usr/kinit/kinit.h new file mode 100644 index 0000000..23d974b --- /dev/null +++ b/usr/kinit/kinit.h @@ -0,0 +1,73 @@ +/* + * kinit/kinit.h + */ + +#ifndef KINIT_H +#define KINIT_H + +#include +#include +#include + +int do_mounts(int argc, char *argv[]); +int mount_nfs_root(int argc, char *argv[], int flags); +int ramdisk_load(int argc, char *argv[], dev_t root_dev); +void md_run(int argc, char *argv[]); +int do_resume(int argc, char *argv[]); +const char *bdevname(dev_t dev); + +extern int mnt_procfs; +extern int mnt_sysfs; + +extern int init_argc; +extern char **init_argv; +extern const char *progname; + +char *get_arg(int argc, char *argv[], const char *name); +int get_flag(int argc, char *argv[], const char *name); + +int open_cloexec(const char *path, int flags, mode_t mode); + +int getintfile(const char *path, long *val); + +ssize_t readfile(const char *path, char **pptr); +ssize_t freadfile(FILE *f, char **pptr); + +/* + * min()/max() macros that also do + * strict type-checking.. See the + * "unnecessary" pointer comparison. + * From the Linux kernel. + */ +#define min(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + +#define max(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x > _y ? _x : _y; }) + +#define INI_DEBUG + +#undef DEBUG +#ifdef INI_DEBUG +#define DEBUG(x) printf x +#else +#define DEBUG(x) do { } while (0) +#endif + +#ifdef INI_DEBUG +void dump_args(int argc, char *argv[]); +#else +static inline void dump_args(int argc, char *argv[]) +{ + (void)argc; + (void)argv; +} +#endif + +#endif /* KINIT_H */ diff --git a/usr/kinit/name_to_dev.c b/usr/kinit/name_to_dev.c new file mode 100644 index 0000000..593bc12 --- /dev/null +++ b/usr/kinit/name_to_dev.c @@ -0,0 +1,202 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "do_mounts.h" +#include "kinit.h" + +#define BUF_SZ 65536 + +/* Find dev_t for e.g. "hda,NULL" or "hdb,2" */ +static dev_t try_name(char *name, int part) +{ + char path[BUF_SZ]; + char buf[BUF_SZ]; + int range; + unsigned int major, minor; + dev_t res; + char *s; + int len; + int fd; + + /* read device number from /sys/block/.../dev */ + snprintf(path, sizeof(path), "/sys/block/%s/dev", name); + fd = open(path, 0, 0); + if (fd < 0) + goto fail; + len = read(fd, buf, BUF_SZ); + close(fd); + + if (len <= 0 || len == BUF_SZ || buf[len - 1] != '\n') + goto fail; + buf[len - 1] = '\0'; + major = strtoul(buf, &s, 10); + if (*s != ':') + goto fail; + minor = strtoul(s + 1, &s, 10); + if (*s) + goto fail; + res = makedev(major, minor); + + /* if it's there and we are not looking for a partition - that's it */ + if (!part) + return res; + + /* otherwise read range from .../range */ + snprintf(path, sizeof(path), "/sys/block/%s/range", name); + fd = open(path, 0, 0); + if (fd < 0) + goto fail; + len = read(fd, buf, 32); + close(fd); + if (len <= 0 || len == 32 || buf[len - 1] != '\n') + goto fail; + buf[len - 1] = '\0'; + range = strtoul(buf, &s, 10); + if (*s) + goto fail; + + /* if partition is within range - we got it */ + if (part < range) { + DEBUG(("kinit: try_name %s,%d = %s\n", name, part, + bdevname(res + part))); + return res + part; + } + + fail: + return (dev_t) 0; +} + +/* + * Convert a name into device number. We accept the following variants: + * + * 1) device number in hexadecimal represents itself + * 2) device number in major:minor decimal represents itself + * 3) /dev/nfs represents Root_NFS + * 4) /dev/ represents the device number of disk + * 5) /dev/ represents the device number + * of partition - device number of disk plus the partition number + * 6) /dev/p - same as the above, that form is + * used when disk name of partitioned disk ends on a digit. + * 7) an actual block device node in the initramfs filesystem + * + * If name doesn't have fall into the categories above, we return 0. + * Driverfs is used to check if something is a disk name - it has + * all known disks under bus/block/devices. If the disk name + * contains slashes, name of driverfs node has them replaced with + * dots. try_name() does the actual checks, assuming that driverfs + * is mounted on rootfs /sys. + */ + +static inline dev_t name_to_dev_t_real(const char *name) +{ + char *p; + dev_t res = 0; + char *s; + int part; + struct stat st; + int len; + const char *devname; + char *cptr, *e1, *e2; + int major, minor; + + + if (name[0] == '/') { + devname = name; + } else { + char *dname = alloca(strlen(name) + 6); + sprintf(dname, "/dev/%s", name); + devname = dname; + } + + if (!stat(devname, &st) && S_ISBLK(st.st_mode)) + return st.st_rdev; + + if (strncmp(name, "/dev/", 5)) { + if ((cptr = strchr(devname+5, ':')) && + cptr[1] != '\0') { + /* Colon-separated decimal device number */ + *cptr = '\0'; + major = strtoul(devname+5, &e1, 10); + minor = strtoul(cptr+1, &e2, 10); + if (!*e1 && !*e2) + return makedev(major, minor); + *cptr = ':'; + } else { + /* Hexadecimal device number */ + res = (dev_t) strtoul(name, &p, 16); + if (!*p) + return res; + } + } else { + name += 5; + } + + if (!strcmp(name, "nfs")) + return Root_NFS; + + if (!strcmp(name, "ram")) /* /dev/ram - historic alias for /dev/ram0 */ + return Root_RAM0; + + if (!strncmp(name, "mtd", 3)) + return Root_MTD; + + len = strlen(name); + s = alloca(len + 1); + memcpy(s, name, len + 1); + + for (p = s; *p; p++) + if (*p == '/') + *p = '!'; + res = try_name(s, 0); + if (res) + return res; + + while (p > s && isdigit(p[-1])) + p--; + if (p == s || !*p || *p == '0') + goto fail; + part = strtoul(p, NULL, 10); + *p = '\0'; + res = try_name(s, part); + if (res) + return res; + + if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') + goto fail; + p[-1] = '\0'; + res = try_name(s, part); + return res; + +fail: + return (dev_t) 0; +} + +dev_t name_to_dev_t(const char *name) +{ + dev_t dev = name_to_dev_t_real(name); + + DEBUG(("kinit: name_to_dev_t(%s) = %s\n", name, bdevname(dev))); + return dev; +} + +#ifdef TEST_NAMETODEV /* Standalone test */ + +int main(int argc, char *argv[]) +{ + int i; + + for (i = 1; i < argc; i++) + name_to_dev_t(argv[i]); + + return 0; +} + +#endif diff --git a/usr/kinit/nfsmount/Kbuild b/usr/kinit/nfsmount/Kbuild new file mode 100644 index 0000000..461e6f3 --- /dev/null +++ b/usr/kinit/nfsmount/Kbuild @@ -0,0 +1,27 @@ +# +# kbuild file for nfsmount +# + +static-y := static/nfsmount +#FIXME - build is broken static-y := dummypmap +shared-y := shared/nfsmount + +objs := main.o mount.o portmap.o dummypmap.o sunrpc.o + +# Create built-in.o with all .o files (used by kinit) +lib-y := $(objs) + +# .o files used for executables +static/nfsmount-y := $(objs) +shared/nfsmount-y := $(objs) + +# dummypmap uses a single .o file (rename src file?) +dummypmap-y := dummypmap_test.o + +# TODO - do we want a stripped version +# TODO - do we want the static.g + shared.g directories? + +clean-dirs := static shared + +# Install binary +install-y := $(shared-y) diff --git a/usr/kinit/nfsmount/README.locking b/usr/kinit/nfsmount/README.locking new file mode 100644 index 0000000..bf2e8e7 --- /dev/null +++ b/usr/kinit/nfsmount/README.locking @@ -0,0 +1,26 @@ +I have implemented portmap spoofing in klibc nfsmount (released as +klibc-0.144) This is basically a vestigial portmap daemon which gets +launched before the mount() call and then just records any +transactions it gets to a file and sends back an affirmative reply. + +There are two ways to use it (this belongs in a README file, but it's +too late at night right now): + +a) Set a fixed portnumber in /proc/sys/nfs/nlm_tcpport and +/proc/sys/nfs/nlm_udpport before calling nfsmount; once the portmapper +starts feed that fixed portnumber to pmap_set(8). In this case the +pmap_file can be /dev/null. + +b) Allow the kernel to bind to any port and use the file produced by +nfsroot to feed to pmap_set (it should be directly compatible); this +means the file needs to be transferred to a place where the "real +root" can find it before run-init. + +In either case, it is imperative that the real portmapper is launched +before any program actually tries to create locks! + +To use it: + + # We need the loopback device to be up before we do this! + ipconfig 127.0.0.1:::::lo:none + nfsroot -p pmap_file -o lock server:/pathname /realpath diff --git a/usr/kinit/nfsmount/dummypmap.c b/usr/kinit/nfsmount/dummypmap.c new file mode 100644 index 0000000..a91602b --- /dev/null +++ b/usr/kinit/nfsmount/dummypmap.c @@ -0,0 +1,188 @@ +/* + * dummypmap.c + * + * Enough portmapper functionality that mount doesn't hang trying + * to start lockd. Enables nfsroot with locking functionality. + * + * Note: the kernel will only speak to the local portmapper + * using RPC over UDP. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sunrpc.h" + +extern const char *progname; + +struct portmap_call { + struct rpc_call rpc; + uint32_t program; + uint32_t version; + uint32_t proto; + uint32_t port; +}; + +struct portmap_reply { + struct rpc_reply rpc; + uint32_t port; +}; + +static int bind_portmap(void) +{ + int sock = socket(PF_INET, SOCK_DGRAM, 0); + struct sockaddr_in sin; + + if (sock < 0) + return -1; + + memset(&sin, 0, sizeof sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(0x7f000001); /* 127.0.0.1 */ + sin.sin_port = htons(RPC_PMAP_PORT); + if (bind(sock, (struct sockaddr *)&sin, sizeof sin) < 0) { + int err = errno; + close(sock); + errno = err; + return -1; + } + + return sock; +} + +static const char *protoname(uint32_t proto) +{ + switch (ntohl(proto)) { + case IPPROTO_TCP: + return "tcp"; + case IPPROTO_UDP: + return "udp"; + default: + return NULL; + } +} + +static int dummy_portmap(int sock, FILE *portmap_file) +{ + struct sockaddr_in sin; + int pktlen, addrlen; + union { + struct portmap_call c; + unsigned char b[65536]; /* Max UDP packet size */ + } pkt; + struct portmap_reply rply; + + for (;;) { + addrlen = sizeof sin; + pktlen = recvfrom(sock, &pkt.c.rpc.hdr.udp, sizeof pkt, 0, + (struct sockaddr *)&sin, &addrlen); + + if (pktlen < 0) { + if (errno == EINTR) + continue; + + return -1; + } + + /* +4 to skip the TCP fragment header */ + if (pktlen + 4 < sizeof(struct portmap_call)) + continue; /* Bad packet */ + + if (pkt.c.rpc.hdr.udp.msg_type != htonl(RPC_CALL)) + continue; /* Bad packet */ + + memset(&rply, 0, sizeof rply); + + rply.rpc.hdr.udp.xid = pkt.c.rpc.hdr.udp.xid; + rply.rpc.hdr.udp.msg_type = htonl(RPC_REPLY); + + if (pkt.c.rpc.rpc_vers != htonl(2)) { + rply.rpc.reply_state = htonl(REPLY_DENIED); + /* state <- RPC_MISMATCH == 0 */ + } else if (pkt.c.rpc.program != htonl(PORTMAP_PROGRAM)) { + rply.rpc.reply_state = htonl(PROG_UNAVAIL); + } else if (pkt.c.rpc.prog_vers != htonl(2)) { + rply.rpc.reply_state = htonl(PROG_MISMATCH); + } else if (pkt.c.rpc.cred_len != 0 || pkt.c.rpc.vrf_len != 0) { + /* Can't deal with credentials data; the kernel + won't send them */ + rply.rpc.reply_state = htonl(SYSTEM_ERR); + } else { + switch (ntohl(pkt.c.rpc.proc)) { + case PMAP_PROC_NULL: + break; + case PMAP_PROC_SET: + if (pkt.c.proto == htonl(IPPROTO_TCP) || + pkt.c.proto == htonl(IPPROTO_UDP)) { + if (portmap_file) + fprintf(portmap_file, + "%u %u %s %u\n", + ntohl(pkt.c.program), + ntohl(pkt.c.version), + protoname(pkt.c.proto), + ntohl(pkt.c.port)); + rply.port = htonl(1); /* TRUE = success */ + } + break; + case PMAP_PROC_UNSET: + rply.port = htonl(1); /* TRUE = success */ + break; + case PMAP_PROC_GETPORT: + break; + case PMAP_PROC_DUMP: + break; + default: + rply.rpc.reply_state = htonl(PROC_UNAVAIL); + break; + } + } + + sendto(sock, &rply.rpc.hdr.udp, sizeof rply - 4, 0, + (struct sockaddr *)&sin, addrlen); + } +} + +pid_t start_dummy_portmap(const char *file) +{ + FILE *portmap_filep; + int sock; + pid_t spoof_portmap; + + portmap_filep = fopen(file, "w"); + if (!portmap_filep) { + fprintf(stderr, "%s: cannot write portmap file: %s\n", + progname, file); + return -1; + } + + sock = bind_portmap(); + if (sock == -1) { + if (errno == EINVAL || errno == EADDRINUSE) + return 0; /* Assume not needed */ + else { + fprintf(stderr, "%s: portmap spoofing failed\n", + progname); + return -1; + } + } + + spoof_portmap = fork(); + if (spoof_portmap == -1) { + fprintf(stderr, "%s: cannot fork\n", progname); + return -1; + } else if (spoof_portmap == 0) { + /* Child process */ + dummy_portmap(sock, portmap_filep); + _exit(255); /* Error */ + } else { + /* Parent process */ + close(sock); + return spoof_portmap; + } +} diff --git a/usr/kinit/nfsmount/dummypmap.h b/usr/kinit/nfsmount/dummypmap.h new file mode 100644 index 0000000..5ff10cf --- /dev/null +++ b/usr/kinit/nfsmount/dummypmap.h @@ -0,0 +1,13 @@ +/* + * dummyportmap.h + * + * Functions for the portmap spoofer + */ + +#ifndef NFSMOUNT_DUMMYPORTMAP_H +#define NFSMOUNT_DUMMYPORTMAP_H + +#include +pid_t start_dummy_portmap(const char *file); + +#endif /* NFSMOUNT_DUMMYPORTMAP_H */ diff --git a/usr/kinit/nfsmount/dummypmap_test.c b/usr/kinit/nfsmount/dummypmap_test.c new file mode 100644 index 0000000..d81a141 --- /dev/null +++ b/usr/kinit/nfsmount/dummypmap_test.c @@ -0,0 +1,2 @@ +#define TEST +#include "dummypmap.c" diff --git a/usr/kinit/nfsmount/main.c b/usr/kinit/nfsmount/main.c new file mode 100644 index 0000000..4c8a3b6 --- /dev/null +++ b/usr/kinit/nfsmount/main.c @@ -0,0 +1,263 @@ +/* + * nfsmount/main.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* For _KLIBC_NO_MMU */ + +#include + +#include "nfsmount.h" +#include "sunrpc.h" +#include "dummypmap.h" + +const char *progname; +static jmp_buf abort_buf; + +static struct nfs_mount_data mount_data = { + .version = NFS_MOUNT_VERSION, + .flags = NFS_MOUNT_NONLM | NFS_MOUNT_VER3 | NFS_MOUNT_TCP, + .rsize = 0, /* Server's choice */ + .wsize = 0, /* Server's choice */ + .timeo = 7, + .retrans = 3, + .acregmin = 3, + .acregmax = 60, + .acdirmin = 30, + .acdirmax = 60, + .namlen = NAME_MAX, +}; + +int nfs_port; + +static struct int_opts { + char *name; + int *val; +} int_opts[] = { + {"port", &nfs_port}, + {"rsize", &mount_data.rsize}, + {"wsize", &mount_data.wsize}, + {"timeo", &mount_data.timeo}, + {"retrans", &mount_data.retrans}, + {"acregmin", &mount_data.acregmin}, + {"acregmax", &mount_data.acregmax}, + {"acdirmin", &mount_data.acdirmin}, + {"acdirmax", &mount_data.acdirmax}, + {NULL, NULL} +}; + +static struct bool_opts { + char *name; + int and_mask; + int or_mask; +} bool_opts[] = { + {"soft", ~NFS_MOUNT_SOFT, NFS_MOUNT_SOFT}, + {"hard", ~NFS_MOUNT_SOFT, 0}, + {"intr", ~NFS_MOUNT_INTR, NFS_MOUNT_INTR}, + {"nointr", ~NFS_MOUNT_INTR, 0}, + {"posix", ~NFS_MOUNT_POSIX, NFS_MOUNT_POSIX}, + {"noposix", ~NFS_MOUNT_POSIX, 0}, + {"cto", ~NFS_MOUNT_NOCTO, 0}, + {"nocto", ~NFS_MOUNT_NOCTO, NFS_MOUNT_NOCTO}, + {"ac", ~NFS_MOUNT_NOAC, 0}, + {"noac", ~NFS_MOUNT_NOAC, NFS_MOUNT_NOAC}, + {"lock", ~NFS_MOUNT_NONLM, 0}, + {"nolock", ~NFS_MOUNT_NONLM, NFS_MOUNT_NONLM}, + {"v2", ~NFS_MOUNT_VER3, 0}, + {"v3", ~NFS_MOUNT_VER3, NFS_MOUNT_VER3}, + {"udp", ~NFS_MOUNT_TCP, 0}, + {"tcp", ~NFS_MOUNT_TCP, NFS_MOUNT_TCP}, + {"broken_suid", ~NFS_MOUNT_BROKEN_SUID, NFS_MOUNT_BROKEN_SUID}, + {"ro", ~NFS_MOUNT_KLIBC_RONLY, NFS_MOUNT_KLIBC_RONLY}, + {"rw", ~NFS_MOUNT_KLIBC_RONLY, 0}, + {NULL, 0, 0} +}; + +static int parse_int(const char *val, const char *ctx) +{ + char *end; + int ret; + + ret = (int)strtoul(val, &end, 0); + if (*val == '\0' || *end != '\0') { + fprintf(stderr, "%s: invalid value for %s\n", val, ctx); + longjmp(abort_buf, 1); + } + return ret; +} + +static void parse_opts(char *opts) +{ + char *cp, *val; + + while ((cp = strsep(&opts, ",")) != NULL) { + if (*cp == '\0') + continue; + if ((val = strchr(cp, '=')) != NULL) { + struct int_opts *opts = int_opts; + *val++ = '\0'; + while (opts->name && strcmp(opts->name, cp) != 0) + opts++; + if (opts->name) + *(opts->val) = parse_int(val, opts->name); + else { + fprintf(stderr, "%s: bad option '%s'\n", + progname, cp); + longjmp(abort_buf, 1); + } + } else { + struct bool_opts *opts = bool_opts; + while (opts->name && strcmp(opts->name, cp) != 0) + opts++; + if (opts->name) { + mount_data.flags &= opts->and_mask; + mount_data.flags |= opts->or_mask; + } else { + fprintf(stderr, "%s: bad option '%s'\n", + progname, cp); + longjmp(abort_buf, 1); + } + } + } +} + +static uint32_t parse_addr(const char *ip) +{ + struct in_addr in; + if (inet_aton(ip, &in) == 0) { + fprintf(stderr, "%s: can't parse IP address '%s'\n", + progname, ip); + longjmp(abort_buf, 1); + } + return in.s_addr; +} + +static void check_path(const char *path) +{ + struct stat st; + + if (stat(path, &st) == -1) { + perror("stat"); + longjmp(abort_buf, 1); + } else if (!S_ISDIR(st.st_mode)) { + fprintf(stderr, "%s: '%s' not a directory\n", progname, path); + longjmp(abort_buf, 1); + } +} + +int main(int argc, char *argv[]) + __attribute__ ((weak, alias("nfsmount_main"))); + +int nfsmount_main(int argc, char *argv[]) +{ + uint32_t server; + char *rem_name; + char *rem_path; + char *hostname; + char *path; + int c; + const char *portmap_file; + pid_t spoof_portmap; + int err; + + if ((err = setjmp(abort_buf))) + return err; + + /* Set these here to avoid longjmp warning */ + portmap_file = NULL; + spoof_portmap = 0; + server = 0; + + /* If progname is set we're invoked from another program */ + if (!progname) { + struct timeval now; + progname = argv[0]; + gettimeofday(&now, NULL); + srand48(now.tv_usec ^ (now.tv_sec << 24)); + } + + while ((c = getopt(argc, argv, "o:p:")) != EOF) { + switch (c) { + case 'o': + parse_opts(optarg); + break; + case 'p': + portmap_file = optarg; + break; + case '?': + fprintf(stderr, "%s: invalid option -%c\n", + progname, optopt); + return 1; + } + } + + if (optind == argc) { + fprintf(stderr, "%s: need a path\n", progname); + return 1; + } + + hostname = rem_path = argv[optind]; + + if ((rem_name = strdup(rem_path)) == NULL) { + perror("strdup"); + return 1; + } + + if ((rem_path = strchr(rem_path, ':')) == NULL) { + fprintf(stderr, "%s: need a server\n", progname); + return 1; + } + + *rem_path++ = '\0'; + + if (*rem_path != '/') { + fprintf(stderr, "%s: need a path\n", progname); + return 1; + } + + server = parse_addr(hostname); + + if (optind <= argc - 2) + path = argv[optind + 1]; + else + path = "/nfs_root"; + + check_path(path); + +#if! _KLIBC_NO_MMU + /* Note: uClinux can't fork(), so the spoof portmapper is not + available on uClinux. */ + if (portmap_file) + spoof_portmap = start_dummy_portmap(portmap_file); + + if (spoof_portmap == -1) + return 1; +#endif + + if (nfs_mount(rem_name, hostname, server, rem_path, path, + &mount_data) != 0) + return 1; + + /* If we set up the spoofer, tear it down now */ + if (spoof_portmap) { + kill(SIGTERM, spoof_portmap); + while (waitpid(spoof_portmap, NULL, 0) == -1 && + errno == EINTR) + ; + } + + free(rem_name); + + return 0; +} diff --git a/usr/kinit/nfsmount/mount.c b/usr/kinit/nfsmount/mount.c new file mode 100644 index 0000000..066f6cd --- /dev/null +++ b/usr/kinit/nfsmount/mount.c @@ -0,0 +1,357 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nfsmount.h" +#include "sunrpc.h" + +static uint32_t mount_port; + +struct mount_call { + struct rpc_call rpc; + uint32_t path_len; + char path[0]; +}; + +/* + * The following structure is the NFS v3 on-the-wire file handle, + * as defined in rfc1813. + * This differs from the structure used by the kernel, + * defined in : rfc has a long in network order, + * kernel has a short in native order. + * Both kernel and rfc use the name nfs_fh; kernel name is + * visible to user apps in some versions of libc. + * Use different name to avoid clashes. + */ +#define NFS_MAXFHSIZE_WIRE 64 +struct nfs_fh_wire { + uint32_t size; + char data[NFS_MAXFHSIZE_WIRE]; +} __attribute__ ((packed)); + +struct mount_reply { + struct rpc_reply reply; + uint32_t status; + struct nfs_fh_wire fh; +} __attribute__ ((packed)); + +#define MNT_REPLY_MINSIZE (sizeof(struct rpc_reply) + sizeof(uint32_t)) + +static int get_ports(uint32_t server, const struct nfs_mount_data *data) +{ + uint32_t nfs_ver, mount_ver; + uint32_t proto; + + if (data->flags & NFS_MOUNT_VER3) { + nfs_ver = NFS3_VERSION; + mount_ver = NFS_MNT3_VERSION; + } else { + nfs_ver = NFS2_VERSION; + mount_ver = NFS_MNT_VERSION; + } + + proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; + + if (nfs_port == 0) { + nfs_port = portmap(server, NFS_PROGRAM, nfs_ver, proto); + if (nfs_port == 0) { + if (proto == IPPROTO_TCP) { + struct in_addr addr = { server }; + fprintf(stderr, "NFS over TCP not " + "available from %s\n", inet_ntoa(addr)); + return -1; + } + nfs_port = NFS_PORT; + } + } + + if (mount_port == 0) { + mount_port = portmap(server, NFS_MNT_PROGRAM, mount_ver, proto); + if (mount_port == 0) + mount_port = MOUNT_PORT; + } + return 0; +} + +static inline int pad_len(int len) +{ + return (len + 3) & ~3; +} + +static inline void dump_params(uint32_t server, + const char *path, + const struct nfs_mount_data *data) +{ + (void)server; + (void)path; + (void)data; + +#ifdef NFS_DEBUG + struct in_addr addr = { server }; + + printf("NFS params:\n"); + printf(" server = %s, path = \"%s\", ", inet_ntoa(addr), path); + printf("version = %d, proto = %s\n", + data->flags & NFS_MOUNT_VER3 ? 3 : 2, + (data->flags & NFS_MOUNT_TCP) ? "tcp" : "udp"); + printf(" mount_port = %d, nfs_port = %d, flags = %08x\n", + mount_port, nfs_port, data->flags); + printf(" rsize = %d, wsize = %d, timeo = %d, retrans = %d\n", + data->rsize, data->wsize, data->timeo, data->retrans); + printf(" acreg (min, max) = (%d, %d), acdir (min, max) = (%d, %d)\n", + data->acregmin, data->acregmax, data->acdirmin, data->acdirmax); + printf(" soft = %d, intr = %d, posix = %d, nocto = %d, noac = %d\n", + (data->flags & NFS_MOUNT_SOFT) != 0, + (data->flags & NFS_MOUNT_INTR) != 0, + (data->flags & NFS_MOUNT_POSIX) != 0, + (data->flags & NFS_MOUNT_NOCTO) != 0, + (data->flags & NFS_MOUNT_NOAC) != 0); +#endif +} + +static inline void dump_fh(const char *data, int len) +{ + (void)data; + (void)len; + +#ifdef NFS_DEBUG + int i = 0; + int max = len - (len % 8); + + printf("Root file handle: %d bytes\n", NFS2_FHSIZE); + + while (i < max) { + int j; + + printf(" %4d: ", i); + for (j = 0; j < 4; j++) { + printf("%02x %02x %02x %02x ", + data[i] & 0xff, data[i + 1] & 0xff, + data[i + 2] & 0xff, data[i + 3] & 0xff); + } + i += j; + printf("\n"); + } +#endif +} + +static struct mount_reply mnt_reply; + +static int mount_call(uint32_t proc, uint32_t version, + const char *path, struct client *clnt) +{ + struct mount_call *mnt_call = NULL; + size_t path_len, call_len; + struct rpc rpc; + int ret = 0; + + path_len = strlen(path); + call_len = sizeof(*mnt_call) + pad_len(path_len); + + if ((mnt_call = malloc(call_len)) == NULL) { + perror("malloc"); + goto bail; + } + + memset(mnt_call, 0, sizeof(*mnt_call)); + + mnt_call->rpc.program = htonl(NFS_MNT_PROGRAM); + mnt_call->rpc.prog_vers = htonl(version); + mnt_call->rpc.proc = htonl(proc); + mnt_call->path_len = htonl(path_len); + memcpy(mnt_call->path, path, path_len); + + rpc.call = (struct rpc_call *)mnt_call; + rpc.call_len = call_len; + rpc.reply = (struct rpc_reply *)&mnt_reply; + rpc.reply_len = sizeof(mnt_reply); + + if (rpc_call(clnt, &rpc) < 0) + goto bail; + + if (proc != MNTPROC_MNT) { + goto done; + } + + if (rpc.reply_len < MNT_REPLY_MINSIZE) { + fprintf(stderr, "incomplete reply: %zu < %zu\n", + rpc.reply_len, MNT_REPLY_MINSIZE); + goto bail; + } + + if (mnt_reply.status != 0) { + fprintf(stderr, "mount call failed: %d\n", + ntohl(mnt_reply.status)); + goto bail; + } + + goto done; + +bail: + ret = -1; + +done: + if (mnt_call) { + free(mnt_call); + } + + return ret; +} + +static int mount_v2(const char *path, + struct nfs_mount_data *data, struct client *clnt) +{ + int ret = mount_call(MNTPROC_MNT, NFS_MNT_VERSION, path, clnt); + + if (ret == 0) { + dump_fh((const char *)&mnt_reply.fh, NFS2_FHSIZE); + + data->root.size = NFS_FHSIZE; + memcpy(data->root.data, &mnt_reply.fh, NFS_FHSIZE); + memcpy(data->old_root.data, &mnt_reply.fh, NFS_FHSIZE); + } + + return ret; +} + +static inline int umount_v2(const char *path, struct client *clnt) +{ + return mount_call(MNTPROC_UMNT, NFS_MNT_VERSION, path, clnt); +} + +static int mount_v3(const char *path, + struct nfs_mount_data *data, struct client *clnt) +{ + int ret = mount_call(MNTPROC_MNT, NFS_MNT3_VERSION, path, clnt); + + if (ret == 0) { + size_t fhsize = ntohl(mnt_reply.fh.size); + + dump_fh((const char *)&mnt_reply.fh.data, fhsize); + + memset(data->old_root.data, 0, NFS_FHSIZE); + memset(&data->root, 0, sizeof(data->root)); + data->root.size = fhsize; + memcpy(&data->root.data, mnt_reply.fh.data, fhsize); + data->flags |= NFS_MOUNT_VER3; + } + + return ret; +} + +static inline int umount_v3(const char *path, struct client *clnt) +{ + return mount_call(MNTPROC_UMNT, NFS_MNT3_VERSION, path, clnt); +} + +int nfs_mount(const char *pathname, const char *hostname, + uint32_t server, const char *rem_path, const char *path, + struct nfs_mount_data *data) +{ + struct client *clnt = NULL; + struct sockaddr_in addr; + char mounted = 0; + int sock = -1; + int ret = 0; + int mountflags; + + if (get_ports(server, data) != 0) { + goto bail; + } + + dump_params(server, rem_path, data); + + if (data->flags & NFS_MOUNT_TCP) { + clnt = tcp_client(server, mount_port, CLI_RESVPORT); + } else { + clnt = udp_client(server, mount_port, CLI_RESVPORT); + } + + if (clnt == NULL) { + goto bail; + } + + if (data->flags & NFS_MOUNT_VER3) { + ret = mount_v3(rem_path, data, clnt); + } else { + ret = mount_v2(rem_path, data, clnt); + } + + if (ret == -1) { + goto bail; + } + mounted = 1; + + if (data->flags & NFS_MOUNT_TCP) { + sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + } else { + sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); + } + + if (sock == -1) { + perror("socket"); + goto bail; + } + + if (bindresvport(sock, 0) == -1) { + perror("bindresvport"); + goto bail; + } + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = server; + addr.sin_port = htons(nfs_port); + memcpy(&data->addr, &addr, sizeof(data->addr)); + + strncpy(data->hostname, hostname, sizeof(data->hostname)); + + data->fd = sock; + + mountflags = (data->flags & NFS_MOUNT_KLIBC_RONLY) ? MS_RDONLY : 0; + data->flags = data->flags & NFS_MOUNT_FLAGMASK; + ret = mount(pathname, path, "nfs", mountflags, data); + + if (ret == -1) { + if (errno == ENODEV) { + fprintf(stderr, "mount: the kernel lacks NFS v%d " + "support\n", + (data->flags & NFS_MOUNT_VER3) ? 3 : 2); + } else { + perror("mount"); + } + goto bail; + } + + DEBUG(("Mounted %s on %s\n", pathname, path)); + + goto done; + + bail: + if (mounted) { + if (data->flags & NFS_MOUNT_VER3) { + umount_v3(path, clnt); + } else { + umount_v2(path, clnt); + } + } + + ret = -1; + + done: + if (clnt) { + client_free(clnt); + } + + if (sock != -1) { + close(sock); + } + + return ret; +} diff --git a/usr/kinit/nfsmount/nfsmount.h b/usr/kinit/nfsmount/nfsmount.h new file mode 100644 index 0000000..76632a2 --- /dev/null +++ b/usr/kinit/nfsmount/nfsmount.h @@ -0,0 +1,39 @@ +/* + * nfsmount/nfsmount.h + */ + +#ifndef NFSMOUNT_NFSMOUNT_H +#define NFSMOUNT_NFSMOUNT_H + +#include + +extern int nfs_port; + +extern int nfsmount_main(int argc, char *argv[]); +int nfs_mount(const char *rem_name, const char *hostname, + uint32_t server, const char *rem_path, + const char *path, struct nfs_mount_data *data); + +enum nfs_proto { + v2 = 2, + v3, +}; + +/* masked with NFS_MOUNT_FLAGMASK before mount() call */ +#define NFS_MOUNT_KLIBC_RONLY 0x00010000U +/* + * Note for gcc 3.2.2: + * + * If you're turning on debugging, make sure you get rid of -Os from + * the gcc command line, or else ipconfig will fail to link. + */ +#define NFS_DEBUG + +#undef DEBUG +#ifdef NFS_DEBUG +#define DEBUG(x) printf x +#else +#define DEBUG(x) do { } while(0) +#endif + +#endif /* NFSMOUNT_NFSMOUNT_H */ diff --git a/usr/kinit/nfsmount/portmap.c b/usr/kinit/nfsmount/portmap.c new file mode 100644 index 0000000..2f111d9 --- /dev/null +++ b/usr/kinit/nfsmount/portmap.c @@ -0,0 +1,73 @@ +#include +#include +#include /* __constant_hton* */ +#include +#include + +#include "nfsmount.h" +#include "sunrpc.h" + +struct portmap_call { + struct rpc_call rpc; + uint32_t program; + uint32_t version; + uint32_t proto; + uint32_t port; +}; + +struct portmap_reply { + struct rpc_reply rpc; + uint32_t port; +}; + +static struct portmap_call call = { + .rpc = { + .program = __constant_htonl(RPC_PMAP_PROGRAM), + .prog_vers = __constant_htonl(RPC_PMAP_VERSION), + .proc = __constant_htonl(PMAP_PROC_GETPORT), + } +}; + +uint32_t portmap(uint32_t server, uint32_t program, uint32_t version, uint32_t proto) +{ + struct portmap_reply reply; + struct client *clnt; + struct rpc rpc; + uint32_t port = 0; + + if ((clnt = tcp_client(server, RPC_PMAP_PORT, 0)) == NULL) { + if ((clnt = udp_client(server, RPC_PMAP_PORT, 0)) == NULL) { + goto bail; + } + } + + call.program = htonl(program); + call.version = htonl(version); + call.proto = htonl(proto); + + rpc.call = (struct rpc_call *)&call; + rpc.call_len = sizeof(call); + rpc.reply = (struct rpc_reply *)&reply; + rpc.reply_len = sizeof(reply); + + if (rpc_call(clnt, &rpc) < 0) + goto bail; + + if (rpc.reply_len < sizeof(reply)) { + fprintf(stderr, "incomplete reply: %zu < %zu\n", + rpc.reply_len, sizeof(reply)); + goto bail; + } + + port = ntohl(reply.port); + +bail: + DEBUG(("Port for %d/%d[%s]: %d\n", program, version, + proto == IPPROTO_TCP ? "tcp" : "udp", port)); + + if (clnt) { + client_free(clnt); + } + + return port; +} diff --git a/usr/kinit/nfsmount/sunrpc.c b/usr/kinit/nfsmount/sunrpc.c new file mode 100644 index 0000000..915d412 --- /dev/null +++ b/usr/kinit/nfsmount/sunrpc.c @@ -0,0 +1,253 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nfsmount.h" +#include "sunrpc.h" + +/* + * The magic offset is needed here because RPC over TCP includes a + * field that RPC over UDP doesn't. Luvverly. + */ +static int rpc_do_reply(struct client *clnt, struct rpc *rpc, size_t off) +{ + int ret; + + if ((ret = read(clnt->sock, + ((char *)rpc->reply) + off, + rpc->reply_len - off)) == -1) { + perror("read"); + goto bail; + } else if (ret < sizeof(struct rpc_reply) - off) { + fprintf(stderr, "short read: %d < %zu\n", ret, + sizeof(struct rpc_reply) - off); + goto bail; + } + rpc->reply_len = ret + off; + + if ((!off && !(ntohl(rpc->reply->hdr.frag_hdr) & LAST_FRAG)) || + rpc->reply->hdr.udp.xid != rpc->call->hdr.udp.xid || + rpc->reply->hdr.udp.msg_type != htonl(RPC_REPLY)) { + fprintf(stderr, "bad reply\n"); + goto bail; + } + + if (ntohl(rpc->reply->state) != REPLY_OK) { + fprintf(stderr, "rpc failed: %d\n", ntohl(rpc->reply->state)); + goto bail; + } + + ret = 0; + goto done; + +bail: + ret = -1; +done: + return ret; +} + +static void rpc_header(struct client *clnt, struct rpc *rpc) +{ + (void)clnt; + + rpc->call->hdr.frag_hdr = htonl(LAST_FRAG | rpc->call_len); + rpc->call->hdr.udp.xid = lrand48(); + rpc->call->hdr.udp.msg_type = htonl(RPC_CALL); + rpc->call->rpc_vers = htonl(2); +} + +static int rpc_call_tcp(struct client *clnt, struct rpc *rpc) +{ + int ret; + + rpc_header(clnt, rpc); + + if ((ret = write(clnt->sock, rpc->call, rpc->call_len)) == -1) { + perror("write"); + goto bail; + } else if (ret < rpc->call_len) { + fprintf(stderr, "short write: %d < %zu\n", ret, rpc->call_len); + goto bail; + } + + ret = rpc_do_reply(clnt, rpc, 0); + goto done; + + bail: + ret = -1; + + done: + return ret; +} + +static int rpc_call_udp(struct client *clnt, struct rpc *rpc) +{ +#define NR_FDS 1 +#define TIMEOUT_MS 3000 +#define MAX_TRIES 100 +#define UDP_HDR_OFF (sizeof(struct rpc_header) - sizeof(struct rpc_udp_header)) + struct pollfd fds[NR_FDS]; + int ret = -1; + int i; + + rpc_header(clnt, rpc); + + fds[0].fd = clnt->sock; + fds[0].events = POLLRDNORM; + + rpc->call_len -= UDP_HDR_OFF; + + for (i = 0; i < MAX_TRIES; i++) { + int timeout_ms = TIMEOUT_MS + (lrand48() % (TIMEOUT_MS / 2)); + if ((ret = write(clnt->sock, + ((char *)rpc->call) + UDP_HDR_OFF, + rpc->call_len)) == -1) { + perror("write"); + goto bail; + } else if (ret < rpc->call_len) { + fprintf(stderr, "short write: %d < %zu\n", ret, + rpc->call_len); + goto bail; + } + for (; i < MAX_TRIES; i++) { + if ((ret = poll(fds, NR_FDS, timeout_ms)) == -1) { + perror("poll"); + goto bail; + } + if (ret == 0) { + DEBUG(("Timeout #%d\n", i + 1)); + break; + } + if ((ret = rpc_do_reply(clnt, rpc, UDP_HDR_OFF)) == 0) { + goto done; + } else { + DEBUG(("Failed on try #%d - retrying\n", + i + 1)); + } + } + } + + bail: + ret = -1; + + done: + return ret; +} + +struct client *tcp_client(uint32_t server, uint16_t port, uint32_t flags) +{ + struct client *clnt = malloc(sizeof(*clnt)); + struct sockaddr_in addr; + int sock; + + if (clnt == NULL) { + perror("malloc"); + goto bail; + } + + memset(clnt, 0, sizeof(clnt)); + + if ((sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) { + perror("socket"); + goto bail; + } + + if ((flags & CLI_RESVPORT) && bindresvport(sock, 0) == -1) { + perror("bindresvport"); + goto bail; + } + + clnt->sock = sock; + clnt->call_stub = rpc_call_tcp; + + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + addr.sin_addr.s_addr = server; + + if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) == -1) { + perror("connect"); + goto bail; + } + + goto done; + bail: + if (clnt) { + free(clnt); + clnt = NULL; + } + done: + return clnt; +} + +struct client *udp_client(uint32_t server, uint16_t port, uint32_t flags) +{ + struct client *clnt = malloc(sizeof(*clnt)); + struct sockaddr_in addr; + int sock; + + if (clnt == NULL) { + perror("malloc"); + goto bail; + } + + memset(clnt, 0, sizeof(clnt)); + + if ((sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP)) == -1) { + perror("socket"); + goto bail; + } + + if ((flags & CLI_RESVPORT) && bindresvport(sock, 0) == -1) { + perror("bindresvport"); + goto bail; + } else { + struct sockaddr_in me; + + me.sin_family = AF_INET; + me.sin_port = 0; + me.sin_addr.s_addr = INADDR_ANY; + + if (0 && bind(sock, (struct sockaddr *)&me, sizeof(me)) == -1) { + perror("bind"); + goto bail; + } + } + + clnt->sock = sock; + clnt->call_stub = rpc_call_udp; + + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + addr.sin_addr.s_addr = server; + + if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) == -1) { + perror("connect"); + goto bail; + } + + goto done; + bail: + if (clnt) { + free(clnt); + clnt = NULL; + } + done: + return clnt; +} + +void client_free(struct client *c) +{ + if (c->sock != -1) + close(c->sock); + free(c); +} + +int rpc_call(struct client *client, struct rpc *rpc) +{ + return client->call_stub(client, rpc); +} diff --git a/usr/kinit/nfsmount/sunrpc.h b/usr/kinit/nfsmount/sunrpc.h new file mode 100644 index 0000000..00f19cd --- /dev/null +++ b/usr/kinit/nfsmount/sunrpc.h @@ -0,0 +1,99 @@ +/* + * sunrpc.h - open-coded SunRPC structures + */ +#ifndef NFSMOUNT_SUNRPC_H +#define NFSMOUNT_SUNRPC_H + +#include +#include + +#define SUNRPC_PORT 111 +#define MOUNT_PORT 627 + +#define RPC_CALL 0 +#define RPC_REPLY 1 + +#define PORTMAP_PROGRAM 100000 +#define NLM_PROGRAM 100021 + +#define RPC_PMAP_PROGRAM 100000 +#define RPC_PMAP_VERSION 2 +#define RPC_PMAP_PORT 111 + +#define PMAP_PROC_NULL 0 +#define PMAP_PROC_SET 1 +#define PMAP_PROC_UNSET 2 +#define PMAP_PROC_GETPORT 3 +#define PMAP_PROC_DUMP 4 + +#define LAST_FRAG 0x80000000 + +#define REPLY_OK 0 +#define REPLY_DENIED 1 + +#define SUCCESS 0 +#define PROG_UNAVAIL 1 +#define PROG_MISMATCH 2 +#define PROC_UNAVAIL 3 +#define GARBAGE_ARGS 4 +#define SYSTEM_ERR 5 + +struct rpc_udp_header { + uint32_t xid; + uint32_t msg_type; +}; + +struct rpc_header { + uint32_t frag_hdr; + struct rpc_udp_header udp; +}; + +struct rpc_call { + struct rpc_header hdr; + uint32_t rpc_vers; + + uint32_t program; + uint32_t prog_vers; + uint32_t proc; + uint32_t cred_flavor; + + uint32_t cred_len; + uint32_t vrf_flavor; + uint32_t vrf_len; +}; + +struct rpc_reply { + struct rpc_header hdr; + uint32_t reply_state; + uint32_t vrf_flavor; + uint32_t vrf_len; + uint32_t state; +}; + +struct rpc { + struct rpc_call *call; + size_t call_len; + struct rpc_reply *reply; + size_t reply_len; +}; + +struct client; + +typedef int (*call_stub) (struct client *, struct rpc *); + +struct client { + int sock; + call_stub call_stub; +}; + +#define CLI_RESVPORT 00000001 + +struct client *tcp_client(uint32_t server, uint16_t port, uint32_t flags); +struct client *udp_client(uint32_t server, uint16_t port, uint32_t flags); +void client_free(struct client *client); + +int rpc_call(struct client *client, struct rpc *rpc); + +uint32_t portmap(uint32_t server, uint32_t program, uint32_t version, uint32_t proto); + +#endif /* NFSMOUNT_SUNRPC_H */ diff --git a/usr/kinit/nfsroot.c b/usr/kinit/nfsroot.c new file mode 100644 index 0000000..9930fd5 --- /dev/null +++ b/usr/kinit/nfsroot.c @@ -0,0 +1,113 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "ipconfig.h" +#include "kinit.h" +#include "netdev.h" +#include "nfsmount.h" + +static char *sub_client(__u32 client, char *path, size_t len) +{ + struct in_addr addr = { client }; + char buf[len]; + + if (strstr(path, "%s") != NULL) { + if (client == INADDR_NONE) { + fprintf(stderr, "Root-NFS: no client address\n"); + exit(1); + } + + snprintf(buf, len, path, inet_ntoa(addr)); + strcpy(path, buf); + } + + return path; +} + +#define NFS_ARGC 6 +#define MOUNT_POINT "/root" + +int mount_nfs_root(int argc, char *argv[], int flags) +{ + (void)flags; /* FIXME - don't ignore this */ + + struct in_addr addr = { INADDR_NONE }; + __u32 client = INADDR_NONE; + const int len = 1024; + struct netdev *dev; + char *mtpt = MOUNT_POINT; + char *path = NULL; + char *dev_bootpath = NULL; + char root[len]; + char *x, *opts; + int ret = 0; + int a = 1; + char *nfs_argv[NFS_ARGC + 1] = { "NFS-Mount" }; + + for (dev = ifaces; dev; dev = dev->next) { + if (dev->ip_server != INADDR_NONE && + dev->ip_server != INADDR_ANY) { + addr.s_addr = dev->ip_server; + client = dev->ip_addr; + dev_bootpath = dev->bootpath; + break; + } + if (dev->ip_addr != INADDR_NONE && dev->ip_addr != INADDR_ANY) { + client = dev->ip_addr; + } + } + + /* + * if the "nfsroot" option is set then it overrides + * bootpath supplied by the boot server. + */ + if ((path = get_arg(argc, argv, "nfsroot=")) == NULL) { + if ((path = dev_bootpath) == NULL || path[0] == '\0') + /* no path - set a default */ + path = (char *)"/tftpboot/%s"; + } else if (dev_bootpath && dev_bootpath[0] != '\0') + fprintf(stderr, + "nfsroot=%s overrides boot server bootpath %s\n", + path, dev_bootpath); + + if ((opts = strchr(path, ',')) != NULL) { + *opts++ = '\0'; + nfs_argv[a++] = (char *)"-o"; + nfs_argv[a++] = opts; + } + + if ((x = strchr(path, ':')) == NULL) { + if (addr.s_addr == INADDR_NONE) { + fprintf(stderr, "Root-NFS: no server defined\n"); + exit(1); + } + + snprintf(root, len, "%s:%s", inet_ntoa(addr), path); + } else { + strcpy(root, path); + } + + nfs_argv[a++] = sub_client(client, root, len); + + DEBUG(("NFS-Root: mounting %s on %s with options \"%s\"\n", + nfs_argv[a-1], mtpt, opts ? opts : "")); + + nfs_argv[a++] = mtpt; + nfs_argv[a] = NULL; + assert(a <= NFS_ARGC); + + dump_args(a, nfs_argv); + + if ((ret = nfsmount_main(a, nfs_argv)) != 0) { + ret = -1; + goto done; + } + + done: + return ret; +} diff --git a/usr/kinit/open.c b/usr/kinit/open.c new file mode 100644 index 0000000..f53e2f1 --- /dev/null +++ b/usr/kinit/open.c @@ -0,0 +1,18 @@ +/* + * open.c + * + * A quick hack to do an open() and set the cloexec flag + */ + +#include +#include + +int open_cloexec(const char *path, int flags, mode_t mode) +{ + int fd = open(path, flags, mode); + + if (fd >= 0) + fcntl(fd, F_SETFD, FD_CLOEXEC); + + return fd; +} diff --git a/usr/kinit/ramdisk_load.c b/usr/kinit/ramdisk_load.c new file mode 100644 index 0000000..9d2e0b9 --- /dev/null +++ b/usr/kinit/ramdisk_load.c @@ -0,0 +1,271 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kinit.h" +#include "do_mounts.h" +#include "fstype.h" +#include "zlib.h" + +#define BUF_SZ 65536 + +static void wait_for_key(void) +{ + /* Wait until the user presses Enter */ + while (getchar() != '\n') + ; +} + +static int change_disk(const char *devpath, int rfd, int disk) +{ + /* Try to eject and/or quiesce the device */ + sync(); + if (ioctl(rfd, FDEJECT, 0)) { + if (errno == ENOTTY) { + /* Not a floppy */ + ioctl(rfd, CDROMEJECT, 0); + } else { + /* Non-ejectable floppy */ + ioctl(rfd, FDRESET, FD_RESET_IF_NEEDED); + } + } + close(rfd); + + fprintf(stderr, + "\nPlease insert disk %d for ramdisk and press Enter...", disk); + wait_for_key(); + + return open(devpath, O_RDONLY); +} + +/* Also used in initrd.c */ +int load_ramdisk_compressed(const char *devpath, FILE * wfd, + off_t ramdisk_start) +{ + int rfd = -1; + unsigned long long ramdisk_size, ramdisk_left; + int disk = 1; + ssize_t bytes; + int rv; + unsigned char in_buf[BUF_SZ], out_buf[BUF_SZ]; + z_stream zs; + + zs.zalloc = Z_NULL; /* Use malloc() */ + zs.zfree = Z_NULL; /* Use free() */ + zs.next_in = Z_NULL; /* No data read yet */ + zs.avail_in = 0; + zs.next_out = out_buf; + zs.avail_out = BUF_SZ; + + if (inflateInit2(&zs, 32 + 15) != Z_OK) + goto err1; + + rfd = open(devpath, O_RDONLY); + if (rfd < 0) + goto err2; + + /* Set to the size of the medium, or "infinite" */ + if (ioctl(rfd, BLKGETSIZE64, &ramdisk_size)) + ramdisk_size = ~0ULL; + + do { + /* Purge the output preferentially over reading new + input, so we don't end up overrunning the input by + accident and demanding a new disk which doesn't + exist... */ + if (zs.avail_out == 0) { + _fwrite(out_buf, BUF_SZ, wfd); + zs.next_out = out_buf; + zs.avail_out = BUF_SZ; + } else if (zs.avail_in == 0) { + if (ramdisk_start >= ramdisk_size) { + rfd = change_disk(devpath, rfd, ++disk); + if (rfd < 0) + goto err2; + + if (ioctl(rfd, BLKGETSIZE64, &ramdisk_size)) + ramdisk_size = ~0ULL; + ramdisk_start = 0; + DEBUG(("New size = %llu\n", ramdisk_size)); + } + do { + ramdisk_left = ramdisk_size - ramdisk_start; + bytes = min(ramdisk_left, + (unsigned long long)BUF_SZ); + bytes = pread(rfd, in_buf, bytes, + ramdisk_start); + } while (bytes == -1 && errno == EINTR); + if (bytes <= 0) + goto err2; + ramdisk_start += bytes; + zs.next_in = in_buf; + zs.avail_in = bytes; + + /* Print dots if we're reading from a real block device */ + if (ramdisk_size != ~0ULL) + putc('.', stderr); + } + rv = inflate(&zs, Z_SYNC_FLUSH); + } while (rv == Z_OK || rv == Z_BUF_ERROR); + + DEBUG(("kinit: inflate returned %d\n", rv)); + + if (rv != Z_STREAM_END) + goto err2; + + /* Write the last */ + _fwrite(out_buf, BUF_SZ - zs.avail_out, wfd); + DEBUG(("kinit: writing %d bytes\n", BUF_SZ - zs.avail_out)); + + inflateEnd(&zs); + return 0; + +err2: + inflateEnd(&zs); +err1: + return -1; +} + +static int +load_ramdisk_raw(const char *devpath, FILE * wfd, off_t ramdisk_start, + unsigned long long fssize) +{ + unsigned long long ramdisk_size, ramdisk_left; + int disk = 1; + ssize_t bytes; + unsigned char buf[BUF_SZ]; + int rfd; + + rfd = open(devpath, O_RDONLY); + if (rfd < 0) + return -1; + + /* Set to the size of the medium, or "infinite" */ + if (ioctl(rfd, BLKGETSIZE64, &ramdisk_size)) + ramdisk_size = ~0ULL; + + DEBUG(("start: %llu size: %llu fssize: %llu\n", + ramdisk_start, ramdisk_size, fssize)); + + while (fssize) { + + if (ramdisk_start >= ramdisk_size) { + rfd = change_disk(devpath, rfd, ++disk); + if (rfd < 0) + return -1; + + if (ioctl(rfd, BLKGETSIZE64, &ramdisk_size)) + ramdisk_size = ~0ULL; + ramdisk_start = 0; + } + + do { + ramdisk_left = + min(ramdisk_size - ramdisk_start, fssize); + bytes = min(ramdisk_left, (unsigned long long)BUF_SZ); + bytes = pread(rfd, buf, bytes, ramdisk_start); + } while (bytes == -1 && errno == EINTR); + if (bytes <= 0) + break; + _fwrite(buf, bytes, wfd); + + ramdisk_start += bytes; + fssize -= bytes; + + /* Print dots if we're reading from a real block device */ + if (ramdisk_size != ~0ULL) + putc('.', stderr); + } + + return !!fssize; +} + +int ramdisk_load(int argc, char *argv[], dev_t root_dev) +{ + const char *arg_prompt_ramdisk = get_arg(argc, argv, "prompt_ramdisk="); + const char *arg_ramdisk_blocksize = + get_arg(argc, argv, "ramdisk_blocksize="); + const char *arg_ramdisk_start = get_arg(argc, argv, "ramdisk_start="); + const char *arg_ramdisk_device = get_arg(argc, argv, "ramdisk_device="); + + int prompt_ramdisk = arg_prompt_ramdisk ? atoi(arg_prompt_ramdisk) : 0; + int ramdisk_blocksize = + arg_ramdisk_blocksize ? atoi(arg_ramdisk_blocksize) : 512; + off_t ramdisk_start = + arg_ramdisk_start + ? strtoumax(arg_ramdisk_start, NULL, 10) * ramdisk_blocksize : 0; + const char *ramdisk_device = + arg_ramdisk_device ? arg_ramdisk_device : "/dev/fd0"; + + dev_t ramdisk_dev; + int rfd; + FILE *wfd; + const char *fstype; + unsigned long long fssize; + int is_gzip = 0; + int err; + + if (prompt_ramdisk) { + fprintf(stderr, + "Please insert disk for ramdisk and press Enter..."); + wait_for_key(); + } + + ramdisk_dev = name_to_dev_t(ramdisk_device); + if (!ramdisk_dev) { + fprintf(stderr, + "Failure loading ramdisk: unknown device: %s\n", + ramdisk_device); + return 0; + } + + create_dev("/dev/rddev", ramdisk_dev); + create_dev("/dev/ram0", Root_RAM0); + rfd = open("/dev/rddev", O_RDONLY); + wfd = fopen("/dev/ram0", "w"); + + if (rfd < 0 || !wfd) { + perror("Could not open ramdisk device"); + return 0; + } + + /* Check filesystem type */ + if (identify_fs(rfd, &fstype, &fssize, ramdisk_start) || + (fssize == 0 && !(is_gzip = !strcmp(fstype, "gzip")))) { + fprintf(stderr, + "Failure loading ramdisk: unknown filesystem type\n"); + return 0; + } + + DEBUG(("kinit: ramdisk is %s, size %llu\n", fstype, fssize)); + + fprintf(stderr, "Loading ramdisk (%s) ...", is_gzip ? "gzip" : "raw"); + + close(rfd); + + if (is_gzip) + err = load_ramdisk_compressed("/dev/rddev", wfd, ramdisk_start); + else + err = load_ramdisk_raw("/dev/rddev", wfd, + ramdisk_start, fssize); + + fclose(wfd); + + putc('\n', stderr); + + if (err) { + perror("Failure loading ramdisk"); + return 0; + } + + return 1; +} diff --git a/usr/kinit/readfile.c b/usr/kinit/readfile.c new file mode 100644 index 0000000..f899a2a --- /dev/null +++ b/usr/kinit/readfile.c @@ -0,0 +1,86 @@ +/* + * readfile.c + * + * Read the entire contents of a file into malloc'd storage. This + * is mostly useful for things like /proc files where we can't just + * fstat() to get the length and then mmap(). + * + * Returns the number of bytes read, or -1 on error. + */ + +#include +#include +#include +#include + +ssize_t freadfile(FILE *f, char **pp) +{ + size_t bs; /* Decent starting point... */ + size_t bf; /* Bytes free */ + size_t bu = 0; /* Bytes used */ + char *buffer, *nb; + size_t rv; + int old_errno = errno; + + bs = BUFSIZ; /* A guess as good as any */ + bf = bs; + buffer = malloc(bs); + + if (!buffer) + return -1; + + for (;;) { + errno = 0; + + while (bf && (rv = _fread(buffer + bu, bf, f))) { + bu += rv; + bf -= rv; + } + + if (errno && errno != EINTR && errno != EAGAIN) { + /* error */ + free(buffer); + return -1; + } + + if (bf) { + /* Hit EOF, no error */ + + /* Try to free superfluous memory */ + if ((nb = realloc(buffer, bu + 1))) + buffer = nb; + + /* Null-terminate result for good measure */ + buffer[bu] = '\0'; + + *pp = buffer; + errno = old_errno; + return bu; + } + + /* Double the size of the buffer */ + bf += bs; + bs += bs; + if (!(nb = realloc(buffer, bs))) { + /* out of memory error */ + free(buffer); + return -1; + } + buffer = nb; + } +} + +ssize_t readfile(const char *filename, char **pp) +{ + FILE *f = fopen(filename, "r"); + ssize_t rv; + + if (!f) + return -1; + + rv = freadfile(f, pp); + + fclose(f); + + return rv; +} diff --git a/usr/kinit/resume.c b/usr/kinit/resume.c new file mode 100644 index 0000000..4fed367 --- /dev/null +++ b/usr/kinit/resume.c @@ -0,0 +1,75 @@ +/* + * resume.c + * + * Handle resume from suspend-to-disk + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* For CONFIG_PM_STD_PARTITION */ + +#include "kinit.h" +#include "do_mounts.h" + +#ifndef CONFIG_PM_STD_PARTITION +# define CONFIG_PM_STD_PARTITION "" +#endif + +int do_resume(int argc, char *argv[]) +{ + const char *resume_file = CONFIG_PM_STD_PARTITION; + const char *resume_arg; + dev_t resume_device; + int powerfd = -1; + char device_string[64]; + int len; + + resume_arg = get_arg(argc, argv, "resume="); + resume_file = resume_arg ? resume_arg : resume_file; + if (!resume_file[0]) + return 0; /* No resume device specified */ + + /* Fix: we either should consider reverting the device back to + ordinary swap, or (better) put that code into swapon */ + if (get_flag(argc, argv, "noresume")) + return 0; /* Noresume requested */ + + resume_device = name_to_dev_t(resume_file); + + if (major(resume_device) == 0) { + fprintf(stderr, "Invalid resume device: %s\n", resume_file); + goto failure; + } + + if ((powerfd = open("/sys/power/resume", O_WRONLY)) < 0) + goto fail_r; + + len = snprintf(device_string, sizeof device_string, "%u:%u", + major(resume_device), minor(resume_device)); + + if (len >= sizeof device_string) + goto fail_r; /* This should never happen */ + + DEBUG(("kinit: trying to resume from %s\n", resume_file)); + + if (write(powerfd, device_string, len) != len) + goto fail_r; + + /* Okay, what are we still doing alive... */ + failure: + if (powerfd >= 0) + close(powerfd); + fprintf(stderr, "Resume failed, doing normal boot...\n"); + return -1; + + fail_r: + fprintf(stderr, + "Resume failed: cannot write /sys/power/resume (no kernel support?)\n"); + goto failure; +} diff --git a/usr/kinit/run-init/Kbuild b/usr/kinit/run-init/Kbuild new file mode 100644 index 0000000..61a9d0b --- /dev/null +++ b/usr/kinit/run-init/Kbuild @@ -0,0 +1,25 @@ +# +# Kbuild file for run-init +# + +static-y := static/run-init +shared-y := shared/run-init + +# common .o files +objs := run-init.o runinitlib.o + +# TODO - do we want a stripped version +# TODO - do we want the static.g + shared.g directories? + +# Create built-in.o with all object files (used by kinit) +lib-y := $(objs) + +# .o files used to built executables +static/run-init-y := $(objs) +shared/run-init-y := $(objs) + +# Cleaning +clean-dirs := static shared + +# install binary +install-y := $(shared-y) diff --git a/usr/kinit/run-init/run-init.c b/usr/kinit/run-init/run-init.c new file mode 100644 index 0000000..2fc4f60 --- /dev/null +++ b/usr/kinit/run-init/run-init.c @@ -0,0 +1,95 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2004-2006 H. Peter Anvin - All Rights Reserved + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall + * be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * ----------------------------------------------------------------------- */ + +/* + * run-init.c + * + * Usage: exec run-init [-c /dev/console] /real-root /sbin/init "$@" + * + * This program should be called as the last thing in a shell script + * acting as /init in an initramfs; it does the following: + * + * - Delete all files in the initramfs; + * - Remounts /real-root onto the root filesystem; + * - Chroots; + * - Opens /dev/console; + * - Spawns the specified init program (with arguments.) + */ + +#include +#include +#include +#include +#include +#include "run-init.h" + +static const char *program; + +static void __attribute__ ((noreturn)) usage(void) +{ + fprintf(stderr, + "Usage: exec %s [-c consoledev] /real-root /sbin/init [args]\n", + program); + exit(1); +} + +int main(int argc, char *argv[]) +{ + /* Command-line options and defaults */ + const char *console = "/dev/console"; + const char *realroot; + const char *init; + const char *error; + char **initargs; + + /* Variables... */ + int o; + + /* Parse the command line */ + program = argv[0]; + + while ((o = getopt(argc, argv, "c:")) != -1) { + if (o == 'c') { + console = optarg; + } else { + usage(); + } + } + + if (argc - optind < 2) + usage(); + + realroot = argv[optind]; + init = argv[optind + 1]; + initargs = argv + optind + 1; + + error = run_init(realroot, console, init, initargs); + + /* If run_init returns, something went wrong */ + fprintf(stderr, "%s: %s: %s\n", program, error, strerror(errno)); + return 1; +} diff --git a/usr/kinit/run-init/run-init.h b/usr/kinit/run-init/run-init.h new file mode 100644 index 0000000..b905d82 --- /dev/null +++ b/usr/kinit/run-init/run-init.h @@ -0,0 +1,38 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2004-2006 H. Peter Anvin - All Rights Reserved + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall + * be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * ----------------------------------------------------------------------- */ + +/* + * run-init.h + */ + +#ifndef RUN_INIT_H +#define RUN_INIT_H + +const char *run_init(const char *realroot, const char *console, + const char *init, char **initargs); + +#endif diff --git a/usr/kinit/run-init/runinitlib.c b/usr/kinit/run-init/runinitlib.c new file mode 100644 index 0000000..6410373 --- /dev/null +++ b/usr/kinit/run-init/runinitlib.c @@ -0,0 +1,216 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2004-2006 H. Peter Anvin - All Rights Reserved + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall + * be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * ----------------------------------------------------------------------- */ + +/* + * runinitlib.c + * + * run_init(consoledev, realroot, init, initargs) + * + * This function should be called as the last thing in kinit, + * from initramfs, it does the following: + * + * - Delete all files in the initramfs; + * - Remounts /real-root onto the root filesystem; + * - Chroots; + * - Opens /dev/console; + * - Spawns the specified init program (with arguments.) + * + * On failure, returns a human-readable error message. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "run-init.h" + +/* Make it possible to compile on glibc by including constants that the + always-behind shipped glibc headers may not include. Classic example + on why the lack of ABI headers screw us up. */ +#ifndef TMPFS_MAGIC +# define TMPFS_MAGIC 0x01021994 +#endif +#ifndef RAMFS_MAGIC +# define RAMFS_MAGIC 0x858458f6 +#endif +#ifndef MS_MOVE +# define MS_MOVE 8192 +#endif + +static int nuke(const char *what); + +static int nuke_dirent(int len, const char *dir, const char *name, dev_t me) +{ + int bytes = len + strlen(name) + 2; + char path[bytes]; + int xlen; + struct stat st; + + xlen = snprintf(path, bytes, "%s/%s", dir, name); + assert(xlen < bytes); + + if (lstat(path, &st)) + return ENOENT; /* Return 0 since already gone? */ + + if (st.st_dev != me) + return 0; /* DO NOT recurse down mount points!!!!! */ + + return nuke(path); +} + +/* Wipe the contents of a directory, but not the directory itself */ +static int nuke_dir(const char *what) +{ + int len = strlen(what); + DIR *dir; + struct dirent *d; + int err = 0; + struct stat st; + + if (lstat(what, &st)) + return errno; + + if (!S_ISDIR(st.st_mode)) + return ENOTDIR; + + if (!(dir = opendir(what))) { + /* EACCES means we can't read it. Might be empty and removable; + if not, the rmdir() in nuke() will trigger an error. */ + return (errno == EACCES) ? 0 : errno; + } + + while ((d = readdir(dir))) { + /* Skip . and .. */ + if (d->d_name[0] == '.' && + (d->d_name[1] == '\0' || + (d->d_name[1] == '.' && d->d_name[2] == '\0'))) + continue; + + err = nuke_dirent(len, what, d->d_name, st.st_dev); + if (err) { + closedir(dir); + return err; + } + } + + closedir(dir); + + return 0; +} + +static int nuke(const char *what) +{ + int rv; + int err = 0; + + rv = unlink(what); + if (rv < 0) { + if (errno == EISDIR) { + /* It's a directory. */ + err = nuke_dir(what); + if (!err) + err = rmdir(what) ? errno : err; + } else { + err = errno; + } + } + + if (err) { + errno = err; + return err; + } else { + return 0; + } +} + +const char *run_init(const char *realroot, const char *console, + const char *init, char **initargs) +{ + struct stat rst, cst, ist; + struct statfs sfs; + int confd; + + /* First, change to the new root directory */ + if (chdir(realroot)) + return "chdir to new root"; + + /* This is a potentially highly destructive program. Take some + extra precautions. */ + + /* Make sure the current directory is not on the same filesystem + as the root directory */ + if (stat("/", &rst) || stat(".", &cst)) + return "stat"; + + if (rst.st_dev == cst.st_dev) + return "current directory on the same filesystem as the root"; + + /* The initramfs should have /init */ + if (stat("/init", &ist) || !S_ISREG(ist.st_mode)) + return "can't find /init on initramfs"; + + /* Make sure we're on a ramfs */ + if (statfs("/", &sfs)) + return "statfs /"; + if (sfs.f_type != RAMFS_MAGIC && sfs.f_type != TMPFS_MAGIC) + return "rootfs not a ramfs or tmpfs"; + + /* Okay, I think we should be safe... */ + + /* Delete rootfs contents */ + if (nuke_dir("/")) + return "nuking initramfs contents"; + + /* Overmount the root */ + if (mount(".", "/", NULL, MS_MOVE, NULL)) + return "overmounting root"; + + /* chroot, chdir */ + if (chroot(".") || chdir("/")) + return "chroot"; + + /* Open /dev/console */ + if ((confd = open(console, O_RDWR)) < 0) + return "opening console"; + dup2(confd, 0); + dup2(confd, 1); + dup2(confd, 2); + close(confd); + + /* Spawn init */ + execv(init, initargs); + return init; /* Failed to spawn init */ +} diff --git a/usr/kinit/xpio.c b/usr/kinit/xpio.c new file mode 100644 index 0000000..b5d4904 --- /dev/null +++ b/usr/kinit/xpio.c @@ -0,0 +1,51 @@ +/* + * kinit/xpio.c + * + * Looping versions of pread() and pwrite() + */ + +#include +#include +#include + +ssize_t xpread(int fd, void *buf, size_t count, off_t offset) +{ + ssize_t ctr = 0; + ssize_t rv = 0; + char *bp = buf; + + while (count) { + rv = pread(fd, bp, count, offset); + + if (rv == 0 || (rv == -1 && errno != EINTR)) + break; + + bp += rv; + count -= rv; + offset += rv; + ctr += rv; + } + + return ctr ? ctr : rv; +} + +ssize_t xpwrite(int fd, void *buf, size_t count, off_t offset) +{ + ssize_t ctr = 0; + ssize_t rv = 0; + char *bp = buf; + + while (count) { + rv = pwrite(fd, bp, count, offset); + + if (rv == 0 || (rv == -1 && errno != EINTR)) + break; + + bp += rv; + count -= rv; + offset += rv; + ctr += rv; + } + + return ctr ? ctr : rv; +} diff --git a/usr/kinit/xpio.h b/usr/kinit/xpio.h new file mode 100644 index 0000000..0596a32 --- /dev/null +++ b/usr/kinit/xpio.h @@ -0,0 +1,11 @@ +/* + * kinit/xpio.h + */ + +#ifndef KINIT_XPIO_H +#define KINIT_XPIO_H + +ssize_t xpread(int fd, void *buf, size_t count, off_t offset); +ssize_t xpwrite(int fd, void *buf, size_t count, off_t offset); + +#endif /* KINIT_XPIO_H */