diff -ru sysstat-4.1.2/common.h sysstat-4.1.2-2/common.h --- sysstat-4.1.2/common.h Thu Jan 23 09:32:47 2003 +++ sysstat-4.1.2-2/common.h Wed Jan 29 15:59:07 2003 @@ -16,6 +16,7 @@ /* Files */ #define STAT "/proc/stat" #define INTERRUPTS "/proc/interrupts" +#define MOUNTS "/proc/mounts" #define MAX_FILE_LEN 256 diff -ru sysstat-4.1.2/iostat.c sysstat-4.1.2-2/iostat.c --- sysstat-4.1.2/iostat.c Thu Jan 23 09:32:47 2003 +++ sysstat-4.1.2-2/iostat.c Mon Feb 3 01:03:39 2003 @@ -36,6 +36,7 @@ #include "iostat.h" #include "common.h" +#include #ifdef USE_NLS #include @@ -46,17 +47,59 @@ #endif -struct disk_stats disk_stats[2][MAX_PART]; -struct disk_hdr_stats disk_hdr_stats[MAX_PART]; +enum stats_loc { + UNSET, + PROC_PARTITIONS, /* extended info in partitions */ + SYSFS /* extended info in sysfs */ +}; + +enum stats_loc stats_at = UNSET; + +#define CURR (curr * part_max) +#define PREV (!(curr) * part_max) +struct disk_stats *disk_stats; +struct disk_hdr_stats *disk_hdr_stats; struct comm_stats comm_stats[2]; + struct tm loc_time; + int part_nr = 0; /* Nb of partitions */ + +/* + * length of disk_hdr_stats, and one half the length of disk_stats + */ +int part_max; + long int interval = 0; -unsigned char timestamp[64]; /* * Nb of proc on the machine. A value of 1 means two procs... */ int proc_used = -1; +unsigned char timestamp[64]; + +FILE *statfp; + +/* + * on a sysfs system, there isn't one file for stats. There are + * many. We will have to discover them the first time, but there's no + * point in retraversing the tree after that, so we store away + * the pathname. (We'd store away the FILE * too, but it seems that + * seeking back on a sysfs file doesn't really return you to beginning + * of file -- you need to close and reopen it. Besides we may run out of + * file descriptors on a machine with many disks or partitions.) + */ +struct sysfsentry { + char *sysfs_name; + int sysfs_hindex; /* index into disk_hdr_stats for easy */ + /* lookup. -1 if we couldn't find one. */ + unsigned int sysfs_major; + unsigned int sysfs_minor; + char sysfs_ispart; /* nonzero if this is a partition. */ + /* makes a difference in the number */ + /* and kind of stats available to us */ +} *statlist; +unsigned statlist_sz = 4; +unsigned statlist_cnt = 0; /* @@ -84,26 +127,520 @@ alarm(interval); } +/* + * sysfs_mntpt() - discover if a sysfs file system is mounted on + * this machine. Return the mountpoint if there is. We + * need to return a string of MAXPATHLEN because it will + * be used by the calling routine to generate pathnames. + */ +char *sysfs_mntpt(void) +{ + FILE *mntfp; + char *pathname, *buffer, fsname[64] /* yes, we're assuming here ... */; + + pathname = malloc(MAXPATHLEN); + if (!pathname) + return NULL; + buffer = malloc(MAXPATHLEN); + if (!buffer) { + free(pathname); + return NULL; + } + mntfp = fopen(MOUNTS, "r"); + if (!mntfp) { + free(pathname); + free(buffer); + return NULL; + } + + while (fgets(buffer, MAXPATHLEN, mntfp)) { + if (sscanf(buffer, "%*s %s %s %*s", pathname, fsname) == 2) { + if (strcmp(fsname, "sysfs") == 0) { + fclose(mntfp); + free(buffer); + return pathname; + } + } + } + fclose(mntfp); + free(buffer); + free(pathname); + return NULL; +} /* - * Initialize stats structures + * add_sysfs_entry() - add an entry to our global table. If successful, + * we'll increment statlist_cnt. If ispart is true, then this is a + * partition, not a whole disk. (This can be important for output + * formatting. */ -void init_stats(void) +void add_sysfs_entry(char *path, char *sname, int ispart) { + struct sysfsentry *this_entry; + FILE *fp; + char *ptr; int i; - for (i = 0; i < MAX_PART; i++) { - memset(&disk_stats[0][i], 0, DISK_STATS_SIZE); - memset(&disk_stats[1][i], 0, DISK_STATS_SIZE); - sprintf(disk_hdr_stats[i].name, "hdisk%d", i); + if (statlist == NULL) { + /* + * First one? Initialize the list + */ + statlist = malloc(sizeof(*statlist) * statlist_sz); + if (!statlist) + return; + } + + /* + * make list bigger if needed + */ + if (statlist_sz == statlist_cnt + 1) { + statlist = realloc(statlist, (statlist_sz *= 2) * sizeof(*statlist)); + if (!statlist) + return; + } + + this_entry = statlist + statlist_cnt; + + /* + * find major, minor (this is in path/dev) + */ + ptr = rindex(path, '/'); + strcpy(ptr+1,"dev"); + fp = fopen(path, "r"); + if (fp) { + fscanf(fp, "%2x%2x", &this_entry->sysfs_major, &this_entry->sysfs_minor); + fclose(fp); + } else { + /* ignore this entry */ + return; + } + strcpy(ptr+1,"stat"); + + this_entry->sysfs_name = malloc(strlen(path)+1); + if (this_entry->sysfs_name) { + strcpy(this_entry->sysfs_name, path); + statlist_cnt++; + } else + /* + * this entry will get overwritten by the next one. Print an error? + * we don't right now. + */ + return; + + this_entry->sysfs_ispart = ispart; + + /* + * now add an index into the disk_hdr_stat information so we can + * access it easily if desired. + */ + this_entry->sysfs_hindex = -1; + for (i = 0; i < part_nr; i++) { + if (strcmp(disk_hdr_stats[i].name, sname) == 0) { + this_entry->sysfs_hindex = i; + /* + * now reset the disk_hdr_stats name, too. If we are doing + * extended stats it will take one form, and if not then it + * takes another. + */ +#if 0 + if (usepathname) { + char name[64]; + /* sprintf(name, "/dev/%s", disk_hdr_stats[i].name); + strcpy(disk_hdr_stats[i].name,name); + printf("name set to %s\n", name); */ + } + else + sprintf(disk_hdr_stats[i].name, "dev%d-%d", disk_hdr_stats[i].major, + disk_hdr_stats[i].minor); + break; +#endif + } + } +} + +/* + * init_sysfs_tree() - Initialize our list of sysfs stat entries. path is + * the root of the block devices listed in the sysfs in question and + * is expected to point to an array long enough to contain any possible + * path. + * + * Unlike files such as /proc/partitions, doing an lseek back to the + * beginning of a sysfs file and re-reading it doesn't give you new + * statistics. (If I recall, it gives you nothing, not even the original + * data back.) We must actually close and open each file. Might be + * limiting, anyway, to have to keep open hundreds of fd's on large + * disk farms. So rather than traverse the entire directory tree + * each time, this routine just stashs the pathnames once in a list + * and then we can open/read/close all of them for each statistics period. + */ +void init_sysfs_tree(char *path, int flags) +{ + struct dirent *this_dev, *this_entry; + DIR *devices, *this_dev_dir; + char *endofdev, *endofpart; + + endofdev = &path[strlen(path)]; + devices = opendir(path); + if (devices == NULL) { + /* + * well, that was a nice try, but we should probably note this. + */ + fprintf(stderr, "iostat: could not open sysfs block root %s (%s)\n", + path, sys_errlist[errno]); + return; + } + + while ((this_dev = readdir(devices))) { + if (strcmp(this_dev->d_name, ".") == 0 || + strcmp(this_dev->d_name, "..") == 0) + continue; + + /* + * for each device we find here, we want to look at the + * stat entry. If we are interested in partitions, then + * we need to look for entries labeled in the + * device directory. + */ + *endofdev = '/'; + strcpy(endofdev+1,this_dev->d_name); + this_dev_dir = opendir(path); + if (this_dev_dir == NULL) { + *endofdev = '\0'; + continue; + } + endofpart = endofdev + 1 + strlen(this_dev->d_name); + + if (DISPLAY_EXTENDED(flags)) { + /* + * open that directory and go looking for partitions + */ + while ((this_entry = readdir(this_dev_dir))) { + if (strcmp(this_dev->d_name, ".") == 0 || + strcmp(this_dev->d_name, "..") == 0) + continue; + if (strncmp(this_dev->d_name, this_entry->d_name, + strlen(this_dev->d_name)) == 0) { + /* + * we found a partition + */ + strcat(endofpart,"/"); + strcat(endofpart,this_entry->d_name); + strcat(endofpart,"/stat"); + add_sysfs_entry(path, this_entry->d_name, 1); + *endofpart = '\0'; + } + } + closedir(this_dev_dir); + } + *endofdev = '\0'; + strcat(endofdev,"/"); + strcat(endofdev,this_dev->d_name); + strcat(endofdev,"/stat"); + add_sysfs_entry(path, this_dev->d_name, 0); + *endofdev = '\0'; + } + closedir(devices); +} + +char buffer[1024]; + +/* + * init_stats() - Size and initialize stats structures. If n is + * nonzero, then devlist is a segment of argv which tells us + * which disks are interesting. (The default is that all disks + * are interesting.) disk_hdr_stat[].name is the same as you'd find + * in /proc/partitions upon exit of this routine. + */ +void init_stats(char **dev_list, int ndevs, int flags) +{ + struct disk_hdr_stats currp; + char *sysfs = 0; + int p; + + /* + * dynamically figure out how big to make the arrays. We'll read + * /proc/partitions, which should include entries for both partitions + * and disks. + */ + statfp = fopen(PARTITIONS, "r"); + + /* + * if we're given a device list, then we don't have to + * monitor every device and we can probably keep these + * lists shorter. + */ + if (ndevs) { + part_max = ndevs; + } else { + while (fgets(buffer, sizeof(buffer), statfp)) { + + if (sscanf(buffer, "%d %d %*d %31s", + &currp.major, &currp.minor, currp.name) == 3) { + part_max++; + } + } } - memset(&comm_stats[0], 0, COMM_STATS_SIZE); - memset(&comm_stats[1], 0, COMM_STATS_SIZE); + /* + * now that we know how big to make them, allocate disk_stats and + * disk_hdr_stats. The assumption here is that new partitions are + * not added while we monitor the system, but even if they are, with + * care, nothing catastrophic should happen. + * + * We allocate twice as many disk_stats as it appears we need because + * we have to keep the "last" stats around to do the delta for + * our report. + */ + disk_stats = malloc(2 * sizeof (struct disk_stats) * part_max); + disk_hdr_stats = malloc(sizeof (struct disk_hdr_stats) * part_max); + + memset(disk_stats, 0, 2 * sizeof (struct disk_stats) * part_max); + memset(disk_hdr_stats, 0, sizeof (struct disk_hdr_stats) * part_max); + memset(&comm_stats, 0, 2 * sizeof(struct comm_stats)); + + /* + * get basic stats about each partition. While doing that, we'll see if + * we can figure out where the extended statistic information is. Here, + * we can see if this /proc/partitions has extra fields or not, as did + * some 2.2 and 2.4 patches. If so, then this is where we get our + * extended I/O stats from too. + */ + rewind(statfp); + while (fgets(buffer, sizeof(buffer), statfp)) { + + int count, ticks; + + currp.active = 0; + count = sscanf(buffer, + "%d %d %*d %31s %*d %*d %*d %*d " + "%*d %*d %*d %*d %*d %d %*d", + &currp.major, &currp.minor, currp.name, &ticks); + if (count == 4) { + /* + * Then this is a release where the extended disk + * I/O info is in /proc/partitions. + */ + stats_at = PROC_PARTITIONS; + } else if (count == 3) { + /* + * hmm. /proc/partitions looks normal. We'll see if there's a + * sysfs file system mounted, which is where stats landed + * post-2.5.45. + */ + if ((sysfs = sysfs_mntpt()) != NULL) + stats_at = SYSFS; + /* + * else ... well, we didn't find any extended stats. But basic + * disk I/O info may still be in /proc/stat. + */ + } else + continue; + + /* + * Now store it away. + */ + if (ndevs) { + for (p = 0; p < ndevs; p++) { + if (strcmp(dev_list[p], currp.name) == 0) { + disk_hdr_stats[part_nr++] = currp; + break; + } + } + } else { + disk_hdr_stats[part_nr++] = currp; + } +#if 0 + if (stats_at != SYSFS) { + /* + * init_sysfs_tree will fix up the names itself, later, if this + * is a sysfs system + */ + sprintf(disk_hdr_stats[part_nr-1].name, "dev%d-%d", + disk_hdr_stats[part_nr-1].major, disk_hdr_stats[part_nr-1].minor); + } +#endif + } + + if (stats_at == SYSFS) { + /* + * partitions had some useful information for us, but + * we'll want to cache the pathnames of all the stat files + * in the sysfs tree so that we don't always have to + * traverse the filesystem tree rediscovering things. The + * directory "block" below the mountpoint should contain + * directories for each disk. + */ + strcat(sysfs, "/block"); + init_sysfs_tree(sysfs, flags); + } } /* + * read_ext_stat_part() - Read extended stats from /proc/partitions. + * statfp is assumed to have been opened and still valid. Extended + * stats will be present in patched versions of both 2.4 and + * 2.5. 2.5.45 and beyond has stats in a sysfs file system which is + * wholly different (see companion function, below.) We shouldn't + * ever end up in this routine if a sysfs file system has been found. + */ +void read_ext_stat_part(int curr, int flags) +{ + int i; + struct disk_stats part; + struct disk_hdr_stats part_hdr; + + rewind(statfp); + while (fgets(buffer, 1024, statfp) != NULL) { + + if (sscanf(buffer, "%*d %*d %*d %63s %d %d %d %d %d %d %d %d %*d %d %d", + part_hdr.name, /* No need to read major and minor numbers */ + &part.rd_ios, &part.rd_merges, &part.rd_sectors, &part.rd_ticks, + &part.wr_ios, &part.wr_merges, &part.wr_sectors, &part.wr_ticks, + &part.ticks, &part.aveq) == 11) { + + /* + * We have just read a line from /proc/partitions containing stats + * for a partition (ie this is not a fake line: title, etc.). + * Moreover, we now know that the kernel has the patch applied. + */ + + /* Look for partition in data table */ + for (i = 0; i < part_nr; i++) { + printf("checking ('%s' vs '%s')\n", disk_hdr_stats[i].name, part_hdr.name); + if (!strcmp(disk_hdr_stats[i].name, part_hdr.name)) { + /* Partition found */ + disk_hdr_stats[i].active = 1; + disk_stats[CURR + i] = part; + break; + } + } + +#if 0 + if ((i == part_nr) && + DISPLAY_EXTENDED_ALL(flags) && + (part_nr < MAX_PART) && + part.ticks) { + + /* + * Allocate new partition + */ + disk_stats[CURR + part_nr] = part; + disk_hdr_stats[part_nr].active = 1; + strcpy(disk_hdr_stats[part_nr++].name, part_hdr.name); + } +#else + /* ignore new partitions */ +#endif + } + } +} + +/* + * get_sysfs_disks_ext() - the devices we are interested in + * will be found in device_path, expected to be /sys/block/[sh]da. + * Since devices may come or go, we note whether (this time) the + * device is present and should be reported upon via the active + * field. + */ +void get_sysfs_disks_ext(int curr) +{ + int i, currindex; + FILE *fp; + + for (i = 0; i < statlist_cnt; i++) { + if (statlist[i].sysfs_hindex == -1) + continue; + currindex = CURR + statlist[i].sysfs_hindex; + memset(&disk_stats[currindex], 0, sizeof(struct disk_stats)); + fp = fopen(statlist[i].sysfs_name, "r"); + + if (!fp) { + disk_hdr_stats[statlist[i].sysfs_hindex].active = 0; + continue; + } else + disk_hdr_stats[statlist[i].sysfs_hindex].active = 1; + + if (statlist[i].sysfs_ispart) + fscanf(fp, "%d %d %d %d", + &disk_stats[currindex].rd_ios, &disk_stats[currindex].rd_sectors, + &disk_stats[currindex].wr_ios, + &disk_stats[currindex].wr_sectors); + else + fscanf(fp, "%d %d %d %d %d %d %d %d %*d %d %d", + &disk_stats[currindex].rd_ios, &disk_stats[currindex].rd_merges, + &disk_stats[currindex].rd_sectors, + &disk_stats[currindex].rd_ticks, &disk_stats[currindex].wr_ios, + &disk_stats[currindex].wr_merges, + &disk_stats[currindex].wr_sectors, + &disk_stats[currindex].wr_ticks, &disk_stats[currindex].ticks, + &disk_stats[currindex].aveq); + + fclose(fp); + } +} + +/* + * get_sysfs_disks() - get basic disk statistics from a sysfs file system. + * On systems which utilize sysfs, there are no stats in /proc/stat + * anymore. This is similar to get_sysfs_disks_ext(), above, except + * we collect less information. + */ +void get_sysfs_disks(int curr) +{ + int i, currindex; + FILE *fp; + + for (i = 0; i < statlist_cnt; i++) { + if (statlist[i].sysfs_hindex == -1) + continue; + currindex = CURR + statlist[i].sysfs_hindex; + memset(&disk_stats[currindex], 0, sizeof(struct disk_stats)); + fp = fopen(statlist[i].sysfs_name, "r"); + if (!fp) { + disk_hdr_stats[statlist[i].sysfs_hindex].active = 0; + continue; + } else + disk_hdr_stats[statlist[i].sysfs_hindex].active = 1; + if (statlist[i].sysfs_ispart) + fscanf(fp, "%*d %d %*d %d\n", + &disk_stats[currindex].rd_sectors, + &disk_stats[currindex].wr_sectors); + else + fscanf(fp, "%*d %*d %d %*d %*d %*d %d\n", + &disk_stats[currindex].rd_sectors, + &disk_stats[currindex].wr_sectors); + + disk_stats[currindex].dk_drive = 2 * (disk_stats[currindex].rd_sectors + + disk_stats[currindex].wr_sectors); + disk_stats[currindex].dk_drive_rblk = + 2 * disk_stats[currindex].rd_sectors; + disk_stats[currindex].dk_drive_wblk = + 2 * disk_stats[currindex].wr_sectors; + fclose(fp); + } +} + +/* + * read_ext_stat -- get the extended statistics, wherever they may be. + * We've previously discovered where they are and indicated it through + * the variable stats_at. curr indicates whether to use array member 0 + * or 1 for the new data. + */ +void read_ext_stat(int curr, int flags) +{ + switch (stats_at) { + case PROC_PARTITIONS: + read_ext_stat_part(curr, flags); + return; + case SYSFS: + get_sysfs_disks_ext(curr); + default: + return; + } +} + +/* * Read stats from /proc/stat file... * (see linux source file linux/fs/proc/array.c) */ @@ -128,6 +665,10 @@ * Read the number of jiffies spent in user, nice, system, idle * and iowait mode and compute system uptime in jiffies (1/100ths * of a second if HZ=100). + * + * This line is present in 2.2, 2.4, and 2.5. Only in 2.5, however, + * is the iowait field present (representing # of jiffies spent waiting for + * I/O to complete.) This was previously counted as idle time. */ comm_stats[curr].cpu_iowait = 0; /* For non 2.5 machines */ sscanf(line + 5, "%u %u %u %lu %lu", @@ -155,41 +696,60 @@ else if (!strncmp(line, "disk_rblk ", 10)) { /* + * We've found a pre 2.4 kernel. We've previously filled in + * disk_hdr_stats with names from /proc/partitions already, + * but we want these to be called hdisk0, hdisk1, hdisk2, and + * hdisk3 for compatibility reasons. First time through, (which + * we recognize because active isn't set) we'll fix this. + * * Read the number of blocks read from disk. * A block is of indeterminate size. * The size may vary depending on the device type. */ sscanf(line + 10, "%u %u %u %u", - &(disk_stats[curr][0].dk_drive_rblk), - &(disk_stats[curr][1].dk_drive_rblk), - &(disk_stats[curr][2].dk_drive_rblk), - &(disk_stats[curr][3].dk_drive_rblk)); + &(disk_stats[CURR].dk_drive_rblk), &(disk_stats[CURR+1].dk_drive_rblk), + &(disk_stats[CURR+2].dk_drive_rblk), &(disk_stats[CURR+3].dk_drive_rblk)); + /* Statistics handled for the first four disks with pre 2.4 kernels */ part_nr = 4; + /* + * Need to reset the names of those first four. The first time + * through here, active will not be set for any elements of + * disk_hdr_stats because we've not encountered any stats by + * name (and won't, we know now.) + */ + if (!disk_hdr_stats[0].active) { + for (i=0; i < 4; i++) { + sprintf(disk_hdr_stats[i].name, "hdisk%d",i); + disk_hdr_stats[i].active = 1; + } + } } else if (!strncmp(line, "disk_wblk ", 10)) /* Read the number of blocks written to disk */ sscanf(line + 10, "%u %u %u %u", - &(disk_stats[curr][0].dk_drive_wblk), - &(disk_stats[curr][1].dk_drive_wblk), - &(disk_stats[curr][2].dk_drive_wblk), - &(disk_stats[curr][3].dk_drive_wblk)); + &(disk_stats[CURR].dk_drive_wblk), &(disk_stats[CURR+1].dk_drive_wblk), + &(disk_stats[CURR+2].dk_drive_wblk), &(disk_stats[CURR+3].dk_drive_wblk)); else if (!strncmp(line, "disk ", 5)) /* Read the number of I/O done since the last reboot */ sscanf(line + 5, "%u %u %u %u", - &(disk_stats[curr][0].dk_drive), - &(disk_stats[curr][1].dk_drive), - &(disk_stats[curr][2].dk_drive), - &(disk_stats[curr][3].dk_drive)); + &(disk_stats[CURR].dk_drive), &(disk_stats[CURR+1].dk_drive), + &(disk_stats[CURR+2].dk_drive), &(disk_stats[CURR+3].dk_drive)); else if (!strncmp(line, "disk_io: ", 9)) { + /* + * This field is present in both 2.4 (with patches) and 2.5 + * (with patches). It was removed entirely in 2.5.47. + */ + pos = 9; /* Read disks I/O statistics (for 2.4 kernels) */ - while (pos < strlen(line) - 1) { /* Beware: a CR is already included in the line */ + while (pos < strlen(line) - 1) { + /* Beware: a CR is already included in the line */ sscanf(line + pos, "(%u,%u):(%u,%*u,%u,%*u,%u) ", &v_major, &v_index, &v_tmp[0], &v_tmp[1], &v_tmp[2]); i = 0; @@ -197,6 +757,7 @@ (v_index != disk_hdr_stats[i].minor))) i++; if (i == part_nr) { +#if 0 /* * New device registered. Assume that devices may be registered, * but not unregistered dynamically... @@ -206,10 +767,12 @@ sprintf(disk_hdr_stats[i].name, "dev%d-%d", v_major, v_index); part_nr++; +#endif } - disk_stats[curr][i].dk_drive = v_tmp[0]; - disk_stats[curr][i].dk_drive_rblk = v_tmp[1]; - disk_stats[curr][i].dk_drive_wblk = v_tmp[2]; + disk_stats[CURR+i].dk_drive = v_tmp[0]; + disk_stats[CURR+i].dk_drive_rblk = v_tmp[1]; + disk_stats[CURR+i].dk_drive_wblk = v_tmp[2]; + disk_hdr_stats[i].active = 1; pos += strcspn(line + pos, " ") + 1; } @@ -219,67 +782,21 @@ /* Close stat file */ fclose(statfp); + if (DISPLAY_EXTENDED(flags)) + read_ext_stat(curr, flags); + else if (stats_at == SYSFS) { + /* + * Well, if we have a sysfs system, then the while loop above didn't + * collect anything about disks. In 2.5.45 and beyond, disk stats are + * gone from /proc/stat. + */ + get_sysfs_disks(curr); + } + /* Compute total number of I/O done */ comm_stats[curr].dk_drive_sum = 0; for (i = 0; i < part_nr; i++) - comm_stats[curr].dk_drive_sum += disk_stats[curr][i].dk_drive; -} - - -/* - * Read extended stats from /proc/partitions file - */ -void read_ext_stat(int curr, int flags) -{ - FILE *partfp; - int i; - char line[1024]; - struct disk_stats part; - struct disk_hdr_stats part_hdr; - - /* Open partitions file */ - if ((partfp = fopen(PARTITIONS, "r")) == NULL) { - perror("fopen"); - exit(2); - } - - while (fgets(line, 1024, partfp) != NULL) { - - if (sscanf(line, "%*d %*d %*d %63s %d %d %d %d %d %d %d %d %*d %d %d", - part_hdr.name, /* No need to read major and minor numbers */ - &part.rd_ios, &part.rd_merges, &part.rd_sectors, &part.rd_ticks, - &part.wr_ios, &part.wr_merges, &part.wr_sectors, &part.wr_ticks, - &part.ticks, &part.aveq) == 11) { - - /* - * We have just read a line from /proc/partitions containing stats - * for a partition (ie this is not a fake line: title, etc.). - * Moreover, we now know that the kernel has the patch applied. - */ - - /* Look for partition in data table */ - for (i = 0; i < part_nr; i++) { - if (!strcmp(disk_hdr_stats[i].name, part_hdr.name)) { - /* Partition found */ - disk_hdr_stats[i].active = 1; - disk_stats[curr][i] = part; - break; - } - } - - if ((i == part_nr) && DISPLAY_EXTENDED_ALL(flags) - && (part_nr < MAX_PART) - && part.ticks) { - /* Allocate new partition */ - disk_stats[curr][part_nr] = part; - disk_hdr_stats[part_nr].active = 1; - strcpy(disk_hdr_stats[part_nr++].name, part_hdr.name); - } - } - } - - /* Close file */ - fclose(partfp); + comm_stats[curr].dk_drive_sum += disk_stats[CURR + i].dk_drive; } @@ -288,11 +805,12 @@ * Notes about the formula used to display stats as: * (x(t2) - x(t1)) / (t2 - t1) = XX.YY: * We have the identity: a = (a / b) * b + a % b (a and b are integers). - * Apply this with a = x(t2) - x(t1) (values about which stats are to be displayed) - * and b = t2 - t1 (elapsed time in seconds). + * Apply this with a = x(t2) - x(t1) (values about which stats are to be + * displayed) and b = t2 - t1 (elapsed time in seconds). * Since uptime is given in jiffies, it is always divided by HZ to get seconds. * The integer part XX is: a / b - * The decimal part YY is: ((a % b) * HZ) / b (multiplied by HZ since we want YY and not 0.YY) + * The decimal part YY is: ((a % b) * HZ) / b (multiplied by HZ since we + * want YY and not 0.YY) */ int write_stat(int curr, int flags, struct tm *loc_time) { @@ -349,37 +867,51 @@ if (disk_hdr_stats[disk_index].active) { - current.rd_ios = disk_stats[curr][disk_index].rd_ios - - disk_stats[!curr][disk_index].rd_ios; - current.wr_ios = disk_stats[curr][disk_index].wr_ios - - disk_stats[!curr][disk_index].wr_ios; - current.rd_ticks = disk_stats[curr][disk_index].rd_ticks - - disk_stats[!curr][disk_index].rd_ticks; - current.wr_ticks = disk_stats[curr][disk_index].wr_ticks - - disk_stats[!curr][disk_index].wr_ticks; - current.rd_merges = disk_stats[curr][disk_index].rd_merges - - disk_stats[!curr][disk_index].rd_merges; - current.wr_merges = disk_stats[curr][disk_index].wr_merges - - disk_stats[!curr][disk_index].wr_merges; - current.rd_sectors = disk_stats[curr][disk_index].rd_sectors - - disk_stats[!curr][disk_index].rd_sectors; - current.wr_sectors = disk_stats[curr][disk_index].wr_sectors - - disk_stats[!curr][disk_index].wr_sectors; - current.ticks = disk_stats[curr][disk_index].ticks - - disk_stats[!curr][disk_index].ticks; - current.aveq = disk_stats[curr][disk_index].aveq - - disk_stats[!curr][disk_index].aveq; - + current.rd_ios = + disk_stats[CURR + disk_index].rd_ios - + disk_stats[PREV + disk_index].rd_ios; + current.wr_ios = + disk_stats[CURR + disk_index].wr_ios - + disk_stats[PREV + disk_index].wr_ios; + current.rd_ticks = + disk_stats[CURR + disk_index].rd_ticks - + disk_stats[PREV + disk_index].rd_ticks; + current.wr_ticks = + disk_stats[CURR + disk_index].wr_ticks - + disk_stats[PREV + disk_index].wr_ticks; + current.rd_merges = + disk_stats[CURR + disk_index].rd_merges - + disk_stats[PREV + disk_index].rd_merges; + current.wr_merges = + disk_stats[CURR + disk_index].wr_merges - + disk_stats[PREV + disk_index].wr_merges; + current.rd_sectors = + disk_stats[CURR + disk_index].rd_sectors - + disk_stats[PREV + disk_index].rd_sectors; + current.wr_sectors = + disk_stats[CURR + disk_index].wr_sectors - + disk_stats[PREV + disk_index].wr_sectors; + current.ticks = + disk_stats[CURR + disk_index].ticks - + disk_stats[PREV + disk_index].ticks; + current.aveq = + disk_stats[CURR + disk_index].aveq - + disk_stats[PREV + disk_index].aveq; + nr_ios = current.rd_ios + current.wr_ios; tput = nr_ios * HZ / itv; util = ((double) current.ticks) / itv; svctm = tput ? util / tput : 0.0; /* - * kernel gives ticks already in milliseconds for all platforms - * => no need for further scaling. + * kernel gives ticks already in milliseconds for all + * platforms -> no need for further scaling. */ - await = nr_ios ? (current.rd_ticks + current.wr_ticks) / nr_ios : 0.0; - arqsz = nr_ios ? (current.rd_sectors + current.wr_sectors) / nr_ios : 0.0; + await = nr_ios ? + (current.rd_ticks + current.wr_ticks) / nr_ios : + 0.0; + arqsz = nr_ios ? + (current.rd_sectors + current.wr_sectors) / nr_ios : + 0.0; printf("/dev/%-5s", disk_hdr_stats[disk_index].name); if (strlen(disk_hdr_stats[disk_index].name) > 5) @@ -419,20 +951,40 @@ for (disk_index = 0; disk_index < part_nr; disk_index++) { - printf("%-13s", disk_hdr_stats[disk_index].name); - if (strlen(disk_hdr_stats[disk_index].name) > 13) - printf("\n "); + if (!disk_hdr_stats[disk_index].active) + continue; + + /* + * If the name in disk_hdr_stats has been reset to hdisk*, + * it means we're getting our stats from a 2.2 kernel (see + * read_stat()) and we only know there are four disks, but + * not their major/minor numbers. Otherwise, we're on a + * later kernel and we know both their real name (.name) + * and their major/minor numbers (.major, .minor). We rewrite + * the name to be dev%d-%d to keep with historical output. + */ + if (strncmp("hdisk", disk_hdr_stats[disk_index].name, 5)) { + + char name[64]; + + sprintf(name, "dev%d-%d", disk_hdr_stats[disk_index].major, + disk_hdr_stats[disk_index].minor); + printf("%-13s", name); + if (strlen(name) > 13) + printf("\n "); + } else + printf("%-13s", disk_hdr_stats[disk_index].name); printf(" %8.2f %12.2f %12.2f %10u %10u\n", - S_VALUE(disk_stats[!curr][disk_index].dk_drive, - disk_stats[curr][disk_index].dk_drive, itv), - S_VALUE(disk_stats[!curr][disk_index].dk_drive_rblk, - disk_stats[curr][disk_index].dk_drive_rblk, itv) / fct, - S_VALUE(disk_stats[!curr][disk_index].dk_drive_wblk, - disk_stats[curr][disk_index].dk_drive_wblk, itv) / fct, - (disk_stats[curr][disk_index].dk_drive_rblk - - disk_stats[!curr][disk_index].dk_drive_rblk) / fct, - (disk_stats[curr][disk_index].dk_drive_wblk - - disk_stats[!curr][disk_index].dk_drive_wblk) / fct); + S_VALUE(disk_stats[PREV + disk_index].dk_drive, + disk_stats[CURR + disk_index].dk_drive, itv), + S_VALUE(disk_stats[PREV + disk_index].dk_drive_rblk, + disk_stats[CURR + disk_index].dk_drive_rblk, itv) / fct, + S_VALUE(disk_stats[PREV + disk_index].dk_drive_wblk, + disk_stats[CURR + disk_index].dk_drive_wblk, itv) / fct, + (disk_stats[CURR + disk_index].dk_drive_rblk - + disk_stats[PREV + disk_index].dk_drive_rblk) / fct, + (disk_stats[CURR + disk_index].dk_drive_wblk - + disk_stats[PREV + disk_index].dk_drive_wblk) / fct); } } printf("\n"); @@ -457,9 +1009,6 @@ init_nls(); #endif - /* Init stat counters */ - init_stats(); - /* How many processors on this machine ? */ get_nb_proc_used(&proc_used, ~0); @@ -467,15 +1016,21 @@ while (opt < argc) { if (!strcmp(argv[opt], "-x")) { - flags |= D_EXTENDED + D_EXTENDED_ALL; /* Extended statistics */ - /* Get device names */ - while (argv[++opt] && strncmp(argv[opt], "-", 1) - && !isdigit(argv[opt][0])) { - flags &= ~D_EXTENDED_ALL; - if (part_nr < MAX_PART) - strncpy(disk_hdr_stats[part_nr++].name, - device_name(argv[opt]), MAX_NAME_LEN - 1); - } + + int n = 0; + + /* + * figure out how many, if any, devices are specified + * and pass them into init_stats() + */ + while (argv[opt+(++n)] && !isdigit(argv[opt+n][0]) && + argv[opt+n][0] != '-') + ; + + flags |= D_EXTENDED; /* Extended statistics */ + + init_stats(&argv[opt+1], n-1, flags); + opt += n; } else if (!strncmp(argv[opt], "-", 1)) { @@ -524,6 +1079,9 @@ } } + if (!DISPLAY_EXTENDED(flags)) + init_stats(0,0,flags); + get_localtime(&loc_time); /* Get system name, release number and hostname */ @@ -538,8 +1096,6 @@ do { /* Read kernel statistics */ read_stat(curr, flags); - if (DISPLAY_EXTENDED(flags)) - read_ext_stat(curr, flags); /* Save time */ get_localtime(&loc_time); diff -ru sysstat-4.1.2/iostat.h sysstat-4.1.2-2/iostat.h --- sysstat-4.1.2/iostat.h Thu Jan 23 09:32:47 2003 +++ sysstat-4.1.2-2/iostat.h Wed Jan 29 15:59:07 2003 @@ -8,9 +8,6 @@ #include "common.h" -/* Maximum number of partitions (minimum is 4) */ -#define MAX_PART 256 - #define MAX_NAME_LEN 68 /* Files */ @@ -80,5 +77,15 @@ #define DISK_STATS_SIZE (sizeof(int) * 13) +/* + * We dynamically allocate as many disk_stats and disk_stat_hdr structs as + * we need when we start up, but it's possible more disks and/or partitions + * may appear while we're running. To make it easier to accommodate this, + * we'll allocate an extra MAX_DISK_PAD structures. If we find more than + * MAX_DISK_PAD extra disks/partitions while running, we'll have to + * reallocate and repopulate whole arrays of structures (an expensive and + * mildly complex operation that we hope to avoid.) + */ +#define MAX_DISK_PAD 5 #endif /* _IOSTAT_H */ diff -ru sysstat-4.1.2/sadc.c sysstat-4.1.2-2/sadc.c --- sysstat-4.1.2/sadc.c Thu Jan 23 09:32:47 2003 +++ sysstat-4.1.2-2/sadc.c Fri Mar 7 17:57:50 2003 @@ -375,6 +375,212 @@ } +#ifndef MAXPATHLEN +#define MAXPATHLEN 1024 +#endif +/* + * sysfs_mntpt() - discover if a sysfs file system is mounted on + * this machine. Return the mountpoint if there is. We + * need to return a string of MAXPATHLEN because it may + * be used by the calling routine to generate pathnames. + */ +char *sysfs_mntpt(void) +{ + FILE *mntfp; + char *pathname, *buffer, fsname[64] /* yes, we're assuming here ... */; + + pathname = malloc(MAXPATHLEN); + if (!pathname) + return NULL; + buffer = malloc(2 * MAXPATHLEN); + if (!buffer) { + free(pathname); + return NULL; + } + mntfp = fopen(MOUNTS, "r"); + if (!mntfp) { + free(pathname); + free(buffer); + return NULL; + } + + while (fgets(buffer, 2 * MAXPATHLEN, mntfp)) { + if (sscanf(buffer, "%*s %s %s %*s", pathname, fsname) == 2) { + if (strcmp(fsname, "sysfs") == 0) { + fclose(mntfp); + free(buffer); + return pathname; + } + } + } + fclose(mntfp); + free(buffer); + free(pathname); + return NULL; +} + + +char *sysfs_path; +/* + * check_sysfs_tree() - Simply count up how many disks we seem able to + * report on, probably so we can dynamically size some storage. + * + * Returns the number of disks found in a sysfs tree. Returns 0 + * if this machine does not have a valid sysfs tree. + */ +int check_sysfs_tree(void) +{ + struct dirent *this_dev; + DIR *devices; + char *endofdev; + struct stat st; + int dsk = 0; + + if ((sysfs_path = sysfs_mntpt()) == NULL) { + return 0; + } + + /* + * We expect to find block devices in the block directory beneath + * the mount point + */ + strcat(sysfs_path, "/block"); + endofdev = &sysfs_path[strlen(sysfs_path)]; + devices = opendir(sysfs_path); + if (devices == NULL) { + /* + * well, that was a nice try, but we should probably note this. + */ + fprintf(stderr, "sadc: could not open sysfs block root %s (%s)\n", + sysfs_path, sys_errlist[errno]); + return 0; + } + + while ((this_dev = readdir(devices))) { + if (strcmp(this_dev->d_name, ".") == 0 || + strcmp(this_dev->d_name, "..") == 0) + continue; + + /* + * for each device we find here, we want to look at the + * stat entry. If we are interested in partitions, then + * we need to look for entries labeled in the + * device directory. + */ + strcat(endofdev,"/"); + strcat(endofdev,this_dev->d_name); + strcat(endofdev,"/stat"); + if (stat(sysfs_path, &st) == 0) { + /* + * then this device has stats + */ + dsk++; + } + *endofdev = '\0'; + } + closedir(devices); + return dsk; +} + +/* + * get_sysfs_stats() - Traverse the sysfs tree, storing disk statistics we + * find along the way. + * + * We could conceivably keep these descriptors open between re-reads, + * but unlike files such as /proc/partitions, doing an lseek back to the + * beginning of a sysfs file and re-reading it doesn't give you new + * statistics. (If I recall, it gives you nothing, not even the original + * data back.) So we HAVE to actually close and open each file. Might be + * limiting, anyway, to have to keep open hundreds of fd's on large + * disk farms. + */ +void get_sysfs_stats(void) +{ + struct dirent *this_dev; + DIR *devices; + char *endofdev; + FILE *stat_fp; + struct disk_stats *this_disk; + unsigned int d_major, d_minor, dsk, items, rio, wio, rblk, wblk; + + endofdev = &sysfs_path[strlen(sysfs_path)]; + devices = opendir(sysfs_path); + if (devices == NULL) { + /* + * Odd that it would disappear after initially being found. + */ + fprintf(stderr, "sadc: could not open sysfs block root %s (%s)\n", + sysfs_path, sys_errlist[errno]); + return; + } + + dsk = 0; + file_stats.dk_drive = file_stats.dk_drive_rio = + file_stats.dk_drive_rblk = file_stats.dk_drive_wio = + file_stats.dk_drive_wblk = 0; + while ((this_dev = readdir(devices))) { + if (strcmp(this_dev->d_name, ".") == 0 || + strcmp(this_dev->d_name, "..") == 0) + continue; + + /* + * for each device we find here, we want to look at the + * dev entry first, then the stat entry. Failure to find either + * is cause for us to skip the device. + */ + strcat(endofdev,"/"); + strcat(endofdev,this_dev->d_name); + strcat(endofdev,"/dev"); + items = 0; + if ((stat_fp = fopen(sysfs_path, "r")) >= 0) { + items = fscanf(stat_fp, "%2x%2x", &d_major, &d_minor); + fclose(stat_fp); + } + *endofdev = '\0'; + if (items != 2) { + /* + * something's wrong with this sysfs; skip this device + */ + continue; + } + + strcat(endofdev,"/"); + strcat(endofdev,this_dev->d_name); + strcat(endofdev,"/stat"); + items = 0; + if ((stat_fp = fopen(sysfs_path, "r")) >= 0) { + items = fscanf(stat_fp, "%u %*u %*u %u %u %*u %*u %u", + &rio, &rblk, &wio, &wblk); + fclose(stat_fp); + } + *endofdev = '\0'; + if (items != 4) { + /* + * something's wrong with this sysfs; skip this device + */ + continue; + } + + file_stats.dk_drive += rio + wio; + file_stats.dk_drive_rio += rio; + file_stats.dk_drive_rblk += rblk; + file_stats.dk_drive_wio += wio; + file_stats.dk_drive_wblk += wblk; + + /* + * Now add this info to the disk_stat structures + */ + if (dsk < disk_used) { + this_disk = st_disk + dsk; + this_disk->major = d_major; + this_disk->index = d_minor; + this_disk->dk_drive = rio + wio; + this_disk->dk_drive_rwblk = rblk + wblk; + dsk++; + } + } + closedir(devices); +} /* * Find number of disks that are in /proc/stat file @@ -403,6 +609,13 @@ /* Close /proc/stat file */ fclose(statfp); + if (!dsk) { + /* + * perhaps there's a sysfs file system (2.5.45 and beyond) + */ + dsk = check_sysfs_tree(); + } + if (dsk) *nr_disks = dsk + NR_DISK_PREALLOC; else @@ -510,6 +723,10 @@ file_hdr.sa_irqcpu = irqcpu_used; file_hdr.sa_iface = iface_used; file_hdr.sa_nr_disk = disk_used; + fprintf(stderr," sa_proc=%d sa_actflag=%x sa_serial=%d sa_nr_disk=%d\n", + file_hdr.sa_proc, file_hdr.sa_actflag, file_hdr.sa_serial, + file_hdr.sa_nr_disk); + *file_stats_size = FILE_STATS_SIZE; @@ -890,6 +1107,10 @@ /* Close stat file */ fclose(statfp); + + if (sysfs_path) { + get_sysfs_stats(); + } }