// // Orcallator.se, a log generating performance monitor. // // This program logs many different system quantities to a log file // for later processing. // // Author: Blair Zajac . // // Portions copied from percollator.se written by Adrian Cockroft. // // Version 1.28: Oct 2, 2001 No changes, bump version number to 1.28. // Version 1.28b7: Sep 29, 2001 Change the output log filename format from // percol-%Y-%m-%d to percol-%Y-%m-%d-XXX, where // XXX is a number starting at 0 that is // incremented anytime the number of output // columns changes or type of data stored in a // column changes. This is in addition to the // creation of a new log filename when a new day // starts. Whenever the program needs to create // a new log file for any reason, it will search // for the smallest XXX so that there are no log // files named percol-%Y-%m-%d-XXX{,.Z,.gz,.bz2}. // If the COMPRESSOR environmental is set and any // uncompressed files are found while looking for // the smallest XXX, they are compressed with the // COMPRESSOR command. // Version 1.28b6: Sep 28, 2001 Instead of outputting the number of CPUs only // when WATCH_MUTEX is defined, output it when // either WATCH_CPU or WATCH_MUTEX is defined. // Only declare and update tmp_mutex if // WATCH_MUTEX defined. // Version 1.28b5: Sep 28, 2001 Add three parameters that vmstat outputs, // #runque, vmstat's `r' column, which is the // number of processes in the run queue waiting // to run on a CPU, #waiting, vmstat's `b' column, // which is the number of processes blocked for // resources (I/O, paging), and #swpque, vmstat's // `w', the number of processes runnable but // swapped out. Increase MAX_COLUMNS from 512 to // 2048. Check [wr]lentime to see if an EMC disk // is using a fake disk for control. EMC disks // have a fake disk which commands are run over to // configure the disk array or to get stats from; // they are not real data transfers. They can // cause 1000 MB/sec writes to appear in the // stats. I still get them but not as often with // this bit of code in. If the I/O which occurred // in the last five minutes is not greater than // 1/100sec then it is not a valid measurement // anyway. What happens is that we can have a // small I/O, say 1024 bytes, in a 1/100sec = // 1024*100/sec. I am thinking of making it // wlentime+rlentime > 2 since I am still getting // fake write spikes. Make sure to define // HAVE_EMC_DISK_CONTROL to enable this check. // Patch contributed by Damon // Atkins . // Version 1.28b4: Mar 27, 2001 Recoded measure_disk() to access the RAWDISK // interface to sys_kstat device information to // allow the activity on Sun's A1000 and Clariion // Raid controller drives to be seen. Apparently // the pseudo drivers do not update the kstat // interface. It is also inverts the fix // provided by version 1.23 to avoid over-counting // md devices. By suppressing stats from slices // and metadevices and instead reporting on full // devices such as c0t0d0 or sd0. Note: This may // have introduced an interaction with the // live_rules.se class monitoring of drive // performance. Prevent floppy disks and tape // drives from RAWDISK. Added wio% to measure // wait time since the idle calculation is wrong // without this. Prevent filesystems mounted // under /snapshots from being seen. Patch // contributed by Alan LeGrand // . // Version 1.27: Mar 27, 2001 Print the portion of time running in idle mode // with some process waiting for block I/O as // wio% and otherwise completely idle time as // idle%. // Version 1.26: Feb 5, 2001 Make sure to check the return from stat() on // the web server access log in case the file is // missing. Use fstat() instead of stat() when a // file descriptor is available. // Version 1.25: Mar 30, 2000 Fix a typo where nil was misspelled as nik. // Version 1.24: Mar 25, 2000 When orcallator.se was running on a system // with an older version of SE the p_vmstat.scan // variable is an integer and the sprintf to // %8.3f fails, resulting in a perceived scan rate // of 0 pages per second. Now always add 0.0 to // p_vmstat.scan to get a double. // Version 1.23: Feb 25, 2000 When orcallator.se was running on a system // with DiskSuite, the same physical disk was // listed multiple times when it appeared in // several metadevices. The solution to the // problem is not to build the c0t0d0 name but // use the long disk name provided by the // long_name string. Patch contributed by Paul // Haldane . // Version 1.22: Jan 14, 2000 Include code to record NFS v2 and v3 server // statistics. The new statistics are: // nfss_calls, the number of NFS calls to the NFS // server, nfss_bad, the number of bad NFS calls // per second, and v{2,3}{reads,writes}, which are // nfss_calls broken down into NFS version 2 and // NFS version 3 calls. The sum of v{2,3}{reads, // writes} will be less than nfss_calls as the // other types of NFS calls, such as getattr and // lookup, are not included. Contributed by Paul // Haldane . This // code is enabled by the standard -DWATCH_OS or // individually by -DWATCH_NFS_SERVER. The // define -DWATCH_NFS has been superseded by // -DWATCH_NFS_CLIENT, but to keep backwards // compatibility, -DWATCH_NFS_CLIENT will be // defined if -DWATCH_NFS is defined. // Version 1.21: Jan 12, 2000 Prevent core dumps on extremely long access // log lines. // Version 1.20: Oct 20, 1999 Update my email address. // Version 1.19: Oct 13, 1999 Prevent a division by zero in calculating the // mean_disk_busy if the number of disks on the // system is 0. // Version 1.18: Oct 12, 1999 Rename disk_runp.c?t?d? to disk_runp_c?t?d? // to remove the .'s. // Version 1.17: Oct 8, 1999 Do not record mount point statistics for // locally mounted /cdrom partitions. // Version 1.16: Oct 7, 1999 To keep backwards compatibility, define // WATCH_WEB if WATCH_HTTPD is defined. // If the COMPRESSOR environmental variable // is defined, then when a new log file is opened // for a new day, the just closed log file is // compressed using the COMPRESSOR command in the // following manner: // system(sprintf("%s %s &", COMPRESSOR, log_file) // COMPRESSOR should be set to something like // "gzip -9", or "compress", or "bzip2 -9". // Version 1.15: Oct 5, 1999 kvm$mpid is a int not a long. This caused // problems on Solaris 7 hosts running a 64 // bit kernel. // Version 1.14: Oct 1, 1999 Rename disk.c?t?d? column names to // disk_runp.c?t?d? to better reflect the data // being recorded and to allow for more per disk // information later. // Version 1.13: Sep 24, 1999 Fix a bug in the disk_mean calculation where // it was being divided by the wrong disk_count. // Now it should be much larger and in scale with // disk_peak. When WATCH_DISK is defined, now // print each disk's run percent. Add a new // define WATCH_MOUNTS, which reports each local // mount point's disk space and inode capacity, // usage, available for non-root users and // percent used. This comes from Duncan Lawie // tyger@hoopoes.com. Add some smarts so that if // the number of interfaces, physical disks, or // mounted partitions changes, then a new header // is printed. This will prevent column name and // data mixups when the system configuration // changes. // Version 1.12: Sep 14, 1999 Add the page scan rate as scanrate in // measure_cpu. // Version 1.11: Aug 13, 1999 Add the number of CPUs as ncpus. Move // measure_disk and measure_ram sooner in the // list of subsystems to handle. Increase the // number of characters for each network // interface from four to five. Add new disk // reads, writes, Kbytes read, and Kbytes // written per second. Add number of bytes // of free memory in bytes as freememK. // Version 1.10: Jul 28, 1999 Measure the process spawn rate if WATCH_CPU // is defined and the user is root. // Version 1.9: Jun 2, 1999 If WATCH_YAHOO is defined, then process the // access log as a Yahoo! style access log. // Restructure the code to handle different // web server access log formats. // Version 1.8: Jun 1, 1999 If the environmental variable WEB_SERVER is // defined, use its value of the as the name // of the process to count for the number of // web servers on the system. If WEB_SERVER // is not defined, then count number of httpd's. // Version 1.7: Mar 25, 1999 Simplify and speed up count_proc by 20%. // Version 1.6: Feb 23, 1999 Print pvm.user_time and system_time correctly. // Version 1.5: Feb 23, 1999 Always write header to a new file. // Version 1.4: Feb 19, 1999 Handle missing HTTP/1.x in access log. // Version 1.3: Feb 18, 1999 On busy machines httpops5 will be enlarged. // Version 1.2: Feb 18, 1999 Output data on integral multiples of interval. // Version 1.1: Feb 18, 1999 Integrate Squid log processing from SE 3.1. // Version 1.0: Sep 9, 1998 Initial version. // // The default sampling interval in seconds. #define SAMPLE_INTERVAL 300 // The maximum number of columns of data. #define MAX_COLUMNS 2048 // Define the different parts of the system you want to examine. #ifdef WATCH_OS #define WATCH_CPU 1 #define WATCH_MUTEX 1 #define WATCH_NET 1 #define WATCH_TCP 1 #define WATCH_NFS_CLIENT 1 #define WATCH_NFS_SERVER 1 #define WATCH_MOUNTS 1 #define USE_RAWDISK 1 #define WATCH_DISK 1 #define WATCH_DNLC 1 #define WATCH_INODE 1 #define WATCH_RAM 1 #define WATCH_PAGES 1 #endif // Keep backwards compatibility with WATCH_HTTPD. #ifdef WATCH_HTTPD #define WATCH_WEB 1 #endif // Keep backwards compatibility with WATCH_NFS. #ifdef WATCH_NFS #ifndef WATCH_NFS_CLIENT #define WATCH_NFS_CLIENT 1 #endif #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef WATCH_MOUNTS #include #include #endif #if WATCH_CPU || WATCH_WEB #include #ifdef WATCH_CPU // This is the maximum pid on Solaris hosts. #define DEFAULT_MAXPID 30000 #include #endif #ifdef WATCH_WEB // Define this macro which returns the size index for a file of a // particular size. This saves the overhead of a function call. #define WWW_SIZE_INDEX(size, size_index) \ if (size < 1024) { \ size_index=0; /* Under 1KB */ \ } else { \ if (size < 10240) { \ size_index=1; /* Under 10K */ \ } else { \ if (size < 102400) { \ size_index=2; /* Under 100KB */ \ } else { \ if (size < 1048576) { \ size_index=3; /* Under 1MB */ \ } else { \ size_index=4; /* Over 1MB */ \ } \ } \ } \ } \ dwnld_size[size_index]++; // Handle the reply code from the server. #define WWW_REPLY_CODE(word) \ if (word == "304") { \ httpop_condgets++; \ } \ else { \ first_byte = word; \ if (first_byte[0] == '4' || first_byte[0] == '5') { \ httpop_errors++; \ } \ } \ // Handle the method of the object served. This define only works // with non-proxy servers. #define WWW_METHOD1(word) \ switch (word) { \ case "get": \ case "GET": \ httpop_gets++; \ break; \ case "post": \ case "POST": \ httpop_posts++; \ break; \ case "head": \ case "HEAD": \ ishead = 1; \ httpop_condgets++; \ break; #ifdef WATCH_SQUID #define WWW_METHOD2 \ case "icp_query": \ case "ICP_QUERY": \ squid_icp_queries++; \ break; #else #define WWW_METHOD2 #endif #define WWW_METHOD_END \ default: \ break; \ } #define WWW_METHOD(word) WWW_METHOD1(word) WWW_METHOD2 WWW_METHOD_END #endif #endif // Put all rules here so they can be accessed by the handle functions. lr_cpu_t lr_cpu$cpu; lr_cpu_t tmp_lrcpu; lr_mutex_t lr_mutex$m; #ifdef WATCH_MUTEX lr_mutex_t tmp_mutex; #endif lr_net_t lr_net$nr; lr_net_t tmp_nr; lr_tcp_t lr_tcp$tcp; lr_tcp_t tmp_lrtcp; #ifdef WATCH_TCP tcp tcp$tcp; tcp tmp_tcp; #endif lr_rpcclient_t lr_rpcclient$r; lr_rpcclient_t tmp_lrpcc; lr_disk_t lr_disk$dr; lr_disk_t tmp_dr; lr_dnlc_t lr_dnlc$dnlc; lr_dnlc_t tmp_lrdnlc; lr_inode_t lr_inode$inode; lr_inode_t tmp_lrinode; lr_ram_t lr_ram$ram; lr_ram_t tmp_lrram; #ifdef WATCH_PAGES ks_system_pages kstat$pages; ks_system_pages tmp_kstat_pages; #endif lr_swapspace_t lr_swapspace$s; lr_swapspace_t tmp_lrswap; lr_kmem_t lr_kmem$kmem; lr_kmem_t tmp_lrkmem; ks_system_misc kstat$misc; ks_system_misc tmp_kstat_misc; // Put application globals here. string nodename; // Name of this machine. string program_name; // Name of this program. int hz; // Clock tick rate. int page_size; // Page size in bytes. long boot_time; // Boot time of the system. long interval = SAMPLE_INTERVAL; // Sampling interval. #ifdef WATCH_CPU int can_read_kernel = 0; // If the kernel can be read. int kvm$mpid; // The last created PID. // These variables store the mpid before and after the standard interval. int mpid_previous; int mpid_current; ulonglong mpid_then; ulonglong mpid_now; // These variables store the mpid before and after 5 second intervals. int mpid5_previous; int mpid5_current; ulonglong mpid5_then; ulonglong mpid5_now; double mpid5_rate; #endif #ifdef WATCH_MOUNTS mnttab_t mnt$mnt; mnttab_t tmp_mnt; #endif #ifdef WATCH_NFS_SERVER ks_nfs_server kstat$nfs; ks_nfs_server tmp_nfs; ks_rfs_proc_v2 kstat$rfsproccnt_v2; ks_rfs_proc_v2 tmp_rfsproccnt_v2; ks_rfs_proc_v3 kstat$rfsproccnt_v3; ks_rfs_proc_v3 tmp_rfsproccnt_v3; #endif // Variables for handling the httpd access log. #ifdef WATCH_WEB string www_search_url = getenv("SEARCHURL"); string www_server_proc_name = getenv("WEB_SERVER"); string www_log_filename = getenv("WEB_LOG"); string www_gateway = getenv("GATEWAY"); ulong www_fd; uint www_gatelen; stat_t www_stat[1]; ulong www_ino; long www_size; double www_interval; // Hi-res interval time. ulonglong www_then; ulonglong www_now; double www5_interval; // Actual hi-res 5 second interval. ulonglong www5_then; ulonglong www5_now; double httpops; double httpops5; double gateops; double dtmp; long httpop_gets; long httpop_condgets; // HEAD or code = 304 conditional get no data. long httpop_posts; long httpop_cgi_bins; long httpop_searches; long httpop_errors; long dwnld_size[5]; // [0] < 1K, [1] < 10K, [2] < 100K, [3] < 1M, [4] >= 1M long dwnld_totalz; // Total size counted from log. #if WATCH_PROXY || WATCH_SQUID || WATCH_YAHOO // If we're watching a Yahoo log, then take the transfer time to be the // processing time. double www_dwnld_time_sum; // Transfer time. double www_dwnld_time_by_size[5]; // Mean transfer time by size bin. #endif #if WATCH_PROXY || WATCH_SQUID long prxy_squid_indirect; // # hits that go via PROXY,SOCKS,parent long prxy_squid_cache_hits; // # hits returned from cache. #endif #ifdef WATCH_PROXY long prxy_cache_writes; // Number of writes and updates to cache. long prxy_uncacheable; // Number of explicitly uncacheable httpops. // Any extra is errors or incomplete ops. #endif #ifdef WATCH_SQUID long squid_cache_misses; long squid_icp_requests; long squid_icp_queries; long squid_client_http; #endif #endif // RAWDISK BEGIN #ifdef USE_RAWDISK #include // This extension accesses the sys_kstat.se interface to the kstat IO // queues to extract info on drives not available in the kstat.se // kstat$disk interface. Global data shared between function calls. struct RawDisk { // Exposed interface (matches kstat) char long_name[16]; char short_name[8]; double reads; double kreads; double writes; double kwrites; double avg_wait; double avg_serv; double service; double wait_percent; double run_percent; // Hidden internal registers to track sys_kstats counters. int _number; // kstat disk # ulonglong _nread; // Number of bytes read ulonglong _nwritten; // Number of bytes written uint _reads; // Number of read operations uint _writes; // Number of write operations longlong _wtime; // Cumulative wait (pre-service) time longlong _wlentime; // Cumulative wait length*time product longlong _wlastupdate; // Last time wait queue changed longlong _rtime; // Cumulative run (service) time longlong _rlentime; // Cumulative run length*time product longlong _rlastupdate; // Last time run queue changed uint _wcnt; // Count of elements in wait state uint _rcnt; // Count of elements in run state }; // Define global for tracking raw disk data. #define MAX_RAWDISKS 512 RawDisk RAW_disk[MAX_RAWDISKS]; int RAW_disk_map=0; int RAW_disk_count=0; double RAW_disk_lastupdate; // Function to scan kstat and map short device names to long device names. raw_disk_map() { int i; int j=0; char long_name[16]; char short_name[16]; // SE for 2.6 & 2.7 appears to have a bug where MAX_DISK is too // large when an A1000 raid controller is present. It's not really // SE as iostat had to be patched for the same bug. The bug appears // to be tied to a failure to update the kstat interface diskinfo // uses to map short names to long names. Since raw_disk_update has // already identified how many physical devices are present and they // are listed first we limit our self to mapping the first // RAW_disk_count device name. for (i=0; i _nread) { _nread = RAW_disk[rdisk]._nread; } _reads = kio.reads; if (RAW_disk[rdisk]._reads > _reads) { _reads = RAW_disk[rdisk]._reads; } _rlentime = kio.rlentime; _rtime = kio.rtime; _rlastupdate = kio.wlastupdate; _rcnt = kio.rcnt; _nwritten = kio.nwritten; if (RAW_disk[rdisk]._nwritten > _nwritten) { _nwritten = RAW_disk[rdisk]._nwritten; } _writes = kio.writes; if (RAW_disk[rdisk]._writes > _writes) { _writes = RAW_disk[rdisk]._nwritten; } _wlentime = kio.wlentime; _wtime = kio.wtime; _wlastupdate = kio.wlastupdate; _wcnt = kio.wcnt; elapsed_etime = (_wlastupdate - RAW_disk[rdisk]._wlastupdate); if (elapsed_etime > 0) { hz_etime = elapsed_etime / nanosecond; big_etime = 1024.0 * hz_etime; } else { elapsed_etime = nanosecond; hz_etime = 1.0; big_etime = 1024.0; } RAW_disk[rdisk].reads =(_reads-RAW_disk[rdisk]._reads) /hz_etime; RAW_disk[rdisk].kreads =(_nread-RAW_disk[rdisk]._nread) /big_etime; RAW_disk[rdisk].writes =(_writes-RAW_disk[rdisk]._writes)/hz_etime; RAW_disk[rdisk].kwrites=(_nwritten-RAW_disk[rdisk]._nwritten) / big_etime; read_writes = elapsed_etime * (RAW_disk[rdisk].reads + RAW_disk[rdisk].writes) / 1024.0; if (read_writes > 0) { RAW_disk[rdisk].avg_wait = (_wlentime - RAW_disk[rdisk]._wlentime) / read_writes; RAW_disk[rdisk].avg_serv = (_rlentime - RAW_disk[rdisk]._rlentime) / read_writes; RAW_disk[rdisk].service = RAW_disk[rdisk].avg_wait + RAW_disk[rdisk].avg_serv; } else { RAW_disk[rdisk].avg_wait = 0.0; RAW_disk[rdisk].avg_serv = 0.0; RAW_disk[rdisk].service = 0.0; } // Update the counters. RAW_disk[rdisk].run_percent = 100.0 * (_rtime - RAW_disk[rdisk]._rtime) / elapsed_etime; RAW_disk[rdisk].wait_percent = 100.0 * (_wtime - RAW_disk[rdisk]._wtime) / elapsed_etime; RAW_disk[rdisk]._writes = _writes; RAW_disk[rdisk]._nwritten = _nwritten; RAW_disk[rdisk]._wlastupdate = _wlastupdate; RAW_disk[rdisk]._wlentime = _wlentime; RAW_disk[rdisk]._wtime = _wtime; RAW_disk[rdisk]._wcnt = _wcnt; RAW_disk[rdisk]._reads = _reads; RAW_disk[rdisk]._nread = _nread; RAW_disk[rdisk]._rlastupdate = _rlastupdate; RAW_disk[rdisk]._rlentime = _rlentime; RAW_disk[rdisk]._rtime = _rtime; RAW_disk[rdisk]._rcnt = _rcnt; } } } } kstat_close(kc); // Map long device names for any drives that we just discovered. if (RAW_disk_map == 1) { raw_disk_map(); } } #endif // RAWDISK_END // Variables for handling output. string compress = getenv("COMPRESSOR"); // How to compress logs. ulong ofile; // File pointer to the logging file. string col_comment[MAX_COLUMNS]; // Comments for each column. string col_data[MAX_COLUMNS]; // Data for each column. string col_previous_comment[MAX_COLUMNS]; // Previous comments. int current_column = 0; // The current column. int previous_number_columns = -1; // Number columns printed last. int print_header = 1; // Flag to flush header. // This is a list of the extensions the compress programs add to the // compress filename. #define NUMBER_COMPRESS_SUFFIXES 3 string compression_suffixes[NUMBER_COMPRESS_SUFFIXES] = {".Z", ".gz", ".bz2"}; // Add one column of comments and data to the buffers. put_output(string comment, string data) { if (current_column >= MAX_COLUMNS) { fprintf(stderr, "%s: too many columns (%d). Increase MAX_COLUMNS.\n", program_name, current_column); exit(1); } col_comment[current_column] = comment; col_data[current_column] = data; current_column++; } // Send the stored columns of information to the output. print_columns(string data[]) { int i; for (i=0; i= mpid5_previous) { mpid5_diff = mpid5_current - mpid5_previous; } else { mpid5_diff = mpid5_current + DEFAULT_MAXPID - mpid5_previous; } rate = mpid5_diff/mpid5_interval; if (rate > mpid5_rate) { mpid5_rate = rate; } mpid5_previous = mpid5_current; // Now take these results to measure the long interval rate. // Because the mpid may flip over DEFAULT_MAXPID more than once // in the long interval time span, use the difference between // the previous and current mpid over a 5 second interval to // calculate the long interval difference. mpid_current += mpid5_diff; mpid_now = mpid5_now; } else { sleep(sleep_till - now); } #else sleep(sleep_till - now); #endif now = time(0); } } #ifdef WATCH_CPU measure_cpu() { p_vmstat pvm; double mpid_interval; double mpid_rate; pvm = vmglobal_total(); // In SE 3.0 and below user_time and system_time are int and in SE // 3.1 and above they are double, so cast everything to double using // + 0.0. put_output(" usr%", sprintf("%5.1f", pvm.user_time + 0.0)); put_output(" sys%", sprintf("%5.1f", pvm.system_time + 0.0)); put_output(" wio%", sprintf("%5.1f", pvm.wait_time + 0.0)); put_output("idle%", sprintf("%5.1f", pvm.idle_time + 0.0)); put_output(" 1runq", sprintf("%6.2f", tmp_kstat_misc.avenrun_1min/256.0)); put_output(" 5runq", sprintf("%6.2f", tmp_kstat_misc.avenrun_5min/256.0)); put_output("15runq", sprintf("%6.2f", tmp_kstat_misc.avenrun_15min/256.0)); put_output("#proc", sprintf("%5lu", tmp_kstat_misc.nproc)); put_output(" #runque", sprintf("%8.2f", pvm.runque + 0.0)); put_output("#waiting", sprintf("%8.2f", pvm.waiting + 0.0)); put_output(" #swpque", sprintf("%8.2f", pvm.swpque + 0.0)); put_output("scanrate", sprintf("%8.3f", pvm.scan + 0.0)); // Calculate the rate of new process spawning. if (can_read_kernel != 0) { mpid_interval = (mpid_now - mpid_then) * 0.000000001; mpid_rate = (mpid_current - mpid_previous) / mpid_interval; put_output("#proc/s", sprintf("%7.3f", mpid_rate)); put_output("#proc/p5s", sprintf("%9.4f", mpid5_rate)); // Reset counters. mpid_then = mpid_now; mpid_previous = mpid_current; mpid5_rate = 0; } } #endif #ifdef WATCH_MUTEX measure_mutex() { put_output(" smtx", sprintf("%5d", tmp_mutex.smtx)); put_output("smtx/cpu", sprintf("%8d", tmp_mutex.smtx/GLOBAL_pvm_ncpus)); } #endif #ifdef WATCH_NET measure_net() { int previous_count = -1; int current_count; int i; current_count = 0; for (i=0; i. Check // [wr]lentime to see if an EMC is using a fake disk for control. // EMC disks have a fake disk which commands are run over to // configure the disk array or to get stats from; they are not // real data transfers. They cause 1000 MB/sec writes to appear // in the stats. I still get them but not as often with this bit // of code in. If the I/O which occurred in the last five minutes // is not greater than 1/100sec then it is not a valid measurement // anyway. What happens is that we can have a small I/O, say 1024 // bytes, in a 1/100sec = 1024*100/sec. I am thinking of making // it wlentime+rlentime>2 since I am still getting fake write // spikes. #ifdef HAVE_EMC_DISK_CONTROL if ((pioGLOB_old_wlentime[i] + pioGLOB_old_rlentime[i]) > 1) { #endif total_reads += RAW_disk[i].reads; total_writes += RAW_disk[i].writes; total_readk += RAW_disk[i].kreads; total_writek += RAW_disk[i].kwrites; mean_disk_busy += RAW_disk[i].run_percent; if (RAW_disk[i].run_percent > peak_disk_busy) { peak_disk_busy = RAW_disk[i].run_percent; } #ifdef HAVE_EMC_DISK_CONTROL } #endif } #else for (i=0; i. Check // [wr]lentime to see if an EMC is using a fake disk for control. // EMC disks have a fake disk which commands are run over to // configure the disk array or to get stats from; they are not // real data transfers. They cause 1000 MB/sec writes to appear // in the stats. I still get them but not as often with this bit // of code in. If the I/O which occurred in the last five minutes // is not greater than 1/100sec then it is not a valid measurement // anyway. What happens is that we can have a small I/O, say 1024 // bytes, in a 1/100sec = 1024*100/sec. I am thinking of making // it wlentime+rlentime>2 since I am still getting fake write // spikes. #ifdef HAVE_EMC_DISK_CONTROL if ((pioGLOB_old_wlentime[i] + pioGLOB_old_rlentime[i]) > 1) { #endif total_reads += GLOBAL_disk[i].reads; total_writes += GLOBAL_disk[i].writes; total_readk += GLOBAL_disk[i].kreads; total_writek += GLOBAL_disk[i].kwrites; mean_disk_busy += GLOBAL_disk[i].run_percent; if (GLOBAL_disk[i].run_percent > peak_disk_busy) { peak_disk_busy = GLOBAL_disk[i].run_percent; } #ifdef HAVE_EMC_DISK_CONTROL } #endif } #endif if (disk_count != 0) { mean_disk_busy = mean_disk_busy/disk_count; } put_output("disk_peak", sprintf("%9.3f", peak_disk_busy)); put_output("disk_mean", sprintf("%9.3f", mean_disk_busy)); put_output("disk_rd/s", sprintf("%9.1f", total_reads)); put_output("disk_wr/s", sprintf("%9.1f", total_writes)); put_output("disk_rK/s", sprintf("%9.1f", total_readk)); put_output("disk_wK/s", sprintf("%9.1f", total_writek)); // If the number of disks has changed, say due to a add_drv, then print // new headers. if (previous_count != disk_count) { print_header = 1; previous_count = disk_count; } } #endif #ifdef WATCH_DNLC measure_dnlc() { put_output("dnlc_ref/s", sprintf("%10.3f", tmp_lrdnlc.refrate)); put_output("dnlc_hit%", sprintf("%9.3f", tmp_lrdnlc.hitrate)); } #endif #ifdef WATCH_INODE measure_inode() { put_output("inod_ref/s", sprintf("%10.3f", tmp_lrinode.refrate)); put_output("inod_hit%", sprintf("%9.3f", tmp_lrinode.hitrate)); put_output("inod_stl/s", sprintf("%10.3f", tmp_lrinode.iprate)); } #endif #ifdef WATCH_RAM measure_ram() { put_output("swap_avail", sprintf("%10ld", GLOBAL_pvm[0].swap_avail)); put_output("page_rstim", sprintf("%10d", tmp_lrram.restime)); put_output(" freememK", sprintf("%10d", GLOBAL_pvm[0].freemem)); put_output("free_pages", sprintf("%10d", (GLOBAL_pvm[0].freemem*1024)/page_size)); } #endif #ifdef WATCH_PAGES measure_pages() { put_output("pp_kernel", sprintf("%9lu", tmp_kstat_pages.pp_kernel)); put_output("pagesfree", sprintf("%9lu", tmp_kstat_pages.pagesfree)); put_output("pageslock", sprintf("%9lu", tmp_kstat_pages.pageslocked)); put_output("pagesio", sprintf("%7lu", tmp_kstat_pages.pagesio)); put_output("pagestotl", sprintf("%9lu", tmp_kstat_pages.pagestotal)); } #endif #ifdef WATCH_WEB /* * Breakdown access log format. */ accesslog(string buf) { int z; int size_index; int ishead; string word; char first_byte[1]; #if WATCH_PROXY || WATCH_SQUID || WATCH_YAHOO double xf; #ifdef WATCH_SQUID string logtag; string request; #endif #ifdef WATCH_YAHOO string arg; ulong ptr; ulong tmp; ulong ulong_xf; #endif #endif ishead = 0; #ifdef WATCH_YAHOO /* * Make sure that the input line has at least 32 bytes of data plus a new * line, for a total length of 33. */ if (strlen(buf) < 33) { return; } word = strtok(buf, "\05"); #else word = strtok(buf, " "); #endif if (word == nil) { return; } #ifdef WATCH_SQUID /* * Word contains unix time in seconds.milliseconds. */ word = strtok(nil, " "); if (word == nil) { return; } xf = atof(word)/1000.0; www_dwnld_time_sum += xf; #ifdef DINKY printf("time: %s %f total %f\n", word, xf, xfer_sum); #endif word = strtok(nil, " "); /* Client IP address. */ logtag = strtok(nil, "/"); /* Log tag. */ if (logtag == nil) { return; } if (logtag =~ "TCP") { squid_client_http++; } if (logtag =~ "UDP") { squid_icp_requests++; } if (logtag =~ "HIT") { prxy_squid_cache_hits++; } if (logtag =~ "MISS") { squid_cache_misses++; } word = strtok(nil, " "); /* Reply code. */ if (word == nil) { return; } WWW_REPLY_CODE(word) word = strtok(nil, " "); /* Size sent to client. */ if (word == nil) { return; } z = atoi(word); WWW_SIZE_INDEX(z, size_index) www_dwnld_time_by_size[size_index] += xf; request = strtok(nil, " "); /* Request method. */ if (request == nil) { return; } WWW_METHOD(request) /* Do not add the size if it is a HEAD. */ if (ishead == 0) { dwnld_totalz += z; } word = strtok(nil, " "); /* URL. */ if (word == nil) { return; } if (word =~ "cgi-bin") { httpop_cgi_bins++; } if (word =~ www_search_url) { httpop_searches++; } strtok(nil, " "); /* Optional user identity. */ word = strtok(nil, "/"); /* Hierarchy. */ if (word == nil) { return; } if (word =~ "DIRECT") { prxy_squid_indirect++; } #if 0 word = strtok(nil, " "); /* Hostname. */ if (word == nil) { return; } word = strtok(nil, " "); /* Content-type. */ if (word == nil) { return; } #endif #elif WATCH_YAHOO /* * Yahoo log format. Fields in square brackets will only appear in the * log file if the data actually exists (i.e. you will never see a null * Referrer field). Further, fields labeled here with "(CONFIG)" will * only appear if they are enabled via the YahooLogOptions configuration * directive. * * IP Address (8 hex digits) * Timestamp (time_t as 8 hex digits) * Processing Time (in microseconds, as 8 hex digits) * Bytes Sent (8 hex digits) * URL * [^Er referrer] (CONFIG) * [^Em method] (CONFIG) * [^Es status_code] * ^Ed signature * \n */ /* * Ignore the IP address and timestamp. Get the processing time, the * number of bytes sent and the URL. For each portion of the line, split * it up into separate pieces. */ if (sscanf(word, "%8lx%8lx%8x%8x", &tmp, &tmp, &ulong_xf, &z) != 4) { return; } xf = ulong_xf/1000000.0; WWW_SIZE_INDEX(z, size_index) www_dwnld_time_sum += xf; www_dwnld_time_by_size[size_index] += xf; if (word =~ "cgi-bin") { httpop_cgi_bins++; } if (word =~ www_search_url) { httpop_searches++; } for (;;) { word = strtok(nil, "\05"); if (word == nil) { break; } first_byte = word; ptr = &word + 1; arg = ((string) ptr); ptr = 0; switch (first_byte[0]) { case 'm': WWW_METHOD(arg) ptr = 1; break; case 's': WWW_REPLY_CODE(arg) break; default: break; } } /* If no method was seen, then assume it was a GET. */ if (ptr == 0) { httpop_gets++; } /* Do not add the size if it is a HEAD. */ if (ishead == 0) { dwnld_totalz += z; } #else /* common or Netscape proxy formats */ strtok(nil, " "); /* -. */ strtok(nil, " "); /* -. */ strtok(nil, " ["); /* date. */ strtok(nil, " "); /* zone]. */ strtok(nil, " \""); /* GET or POST. */ if (word == nil) { return; } WWW_METHOD(word) word = strtok(nil, " "); /* URL. */ if (word == nil) { return; } if (word =~ "cgi-bin") { httpop_cgi_bins++; } if (word =~ www_search_url) { httpop_searches++; } /* * Sometimes HTTP/1.x is not listed in the access log. Skip it * if it does exist. Load the error/success code. */ word = strtok(nil, " "); if (word == nil) { return; } if (word =~ "HTTP" || word =~ "http") { word = strtok(nil, " "); if (word == nil) { return; } } WWW_REPLY_CODE(word) word = strtok(nil, " "); /* Bytes transferred. */ if (word == nil) { return; } z = atoi(word); /* Do not add the size if it is a HEAD. */ if (ishead == 0) { dwnld_totalz += z; } WWW_SIZE_INDEX(z, size_index) #ifdef WATCH_PROXY strtok(nil, " "); /* Status from server. */ strtok(nil, " "); /* Length from server. */ strtok(nil, " "); /* Length from client POST. */ strtok(nil, " "); /* Length POSTed to remote. */ strtok(nil, " "); /* Client header request. */ strtok(nil, " "); /* Proxy header response. */ strtok(nil, " "); /* Proxy header request. */ strtok(nil, " "); /* Server header response. */ strtok(nil, " "); /* Transfer total seconds. */ word = strtok(nil, " "); /* Route. */ if (word == nil) { return; } /* - DIRECT PROXY(host.domain:port) SOCKS. */ if (strncmp(word, "PROXY", 5) == 0 || strncmp(word, "SOCKS", 5) == 0) { prxy_squid_indirect++; } strtok(nil, " "); /* Client finish status. */ strtok(nil, " "); /* Server finish status. */ word = strtok(nil, " "); /* Cache finish status. */ if (word == nil) { return; } /* * ERROR HOST-NOT-AVAILABLE = error or incomplete op * WRITTEN REFRESHED CL-MISMATCH(content length mismatch) = cache_writes * NO-CHECK UP-TO-DATE = cache_hits * DO-NOT-CACHE NON-CACHEABLE = uncacheable */ switch(word) { case "WRITTEN": case "REFRESHED": case "CL-MISMATCH": prxy_cache_writes++; break; case "NO-CHECK": case "UP-TO-DATE": prxy_squid_cache_hits++; break; case "DO-NOT-CACHE": case "NON-CACHEABLE": prxy_uncacheable++; break; default: break; } word = strtok(nil, " ["); /* [transfer total time x.xxx. */ if (word == nil) { return; } xf = atof(word); www_dwnld_time_sum += xf; www_dwnld_time_by_size[size_index] += xf; #endif #endif } measure_web(long sleep_till) { double lastops = 0.0; char buf[BUFSIZ]; int i; long now; httpops = 0.0; httpops5 = 0.0; gateops = 0.0; httpop_gets = 0; httpop_condgets = 0; httpop_posts = 0; httpop_cgi_bins = 0; httpop_errors = 0; httpop_searches = 0; for (i=0; i<5; ++i) { dwnld_size[i] = 0; #if WATCH_PROXY || WATCH_SQUID || WATCH_YAHOO www_dwnld_time_by_size[i] = 0.0; #endif } dwnld_totalz = 0; #if WATCH_PROXY || WATCH_SQUID || WATCH_YAHOO www_dwnld_time_sum = 0.0; #endif #if WATCH_PROXY || WATCH_SQUID prxy_squid_indirect = 0; prxy_squid_cache_hits = 0; #ifdef WATCH_PROXY prxy_cache_writes = 0; prxy_uncacheable = 0; #else squid_cache_misses = 0; squid_icp_requests = 0; squid_icp_queries = 0; squid_client_http = 0; #endif #endif if (www_log_filename != nil) { now = time(0); while (now < sleep_till) { #ifdef WATCH_CPU sleep_till_and_count_new_processes(now + 5); #else sleep(5); #endif now = time(0); if (www_fd != 0) { buf[BUFSIZ-1] = 127; while (fgets(buf, BUFSIZ, www_fd) != nil) { httpops += 1.0; if (www_gatelen > 0) { if (strncmp(buf, www_gateway, www_gatelen) == 0) { gateops += 1.0; } } accesslog(buf); /* * If the line is longer than the buffer, then ignore the rest * of the line. */ while (buf[BUFSIZ-1] == 0 && buf[BUFSIZ-2] != '\n') { buf[BUFSIZ-1] = 127; if (fgets(buf, BUFSIZ, www_fd) == nil) { break; } } } } /* * See if the file has been switched or truncated. */ if (stat(www_log_filename, www_stat) == 0) { if (www_ino != www_stat[0].st_ino || www_size > www_stat[0].st_size) { /* * Close the old log file. */ if (www_fd != 0) { fclose(www_fd); } /* * The log file has changed, open the new one. */ www_fd = fopen(www_log_filename, "r"); if (www_fd != 0) { fstat(www_fd, www_stat); www_ino = www_stat[0].st_ino; buf[BUFSIZ-1] = 127; while(fgets(buf, BUFSIZ, www_fd) != nil) { httpops += 1.0; if (www_gatelen > 0) { if (strncmp(buf, www_gateway, www_gatelen) == 0) { gateops += 1.0; } } accesslog(buf); /* * If the line is longer than the buffer, then ignore * the rest of the line. */ while (buf[BUFSIZ-1] == 0 && buf[BUFSIZ-2] != '\n') { buf[BUFSIZ-1] = 127; if (fgets(buf, BUFSIZ, www_fd) == nil) { break; } } } } } /* Remember size for next time. */ www_size = www_stat[0].st_size; } www5_now = gethrtime(); www5_interval = (www5_now - www5_then) * 0.000000001; www5_then = www5_now; dtmp = (httpops - lastops)/www5_interval; if (dtmp > httpops5) { httpops5 = dtmp; } lastops = httpops; } } else { sleep_till_and_count_new_processes(sleep_till); www5_now = gethrtime(); } www_now = www5_now; www_interval = (www_now - www_then) * 0.000000001; www_then = www_now; /* * Use dtmp to get percentages. */ if (httpops == 0.0) { dtmp = 0.0; } else { dtmp = 100.0 / httpops; } #if WATCH_PROXY || WATCH_SQUID || WATCH_YAHOO for (i=0; i<5; ++i) { if (dwnld_size[i] == 0) { www_dwnld_time_by_size[i] = 0.0; } else { www_dwnld_time_by_size[i] = www_dwnld_time_by_size[i]/dwnld_size[i]; } } #endif } int count_proc(string name) { int count; prpsinfo_t p; count = 0; for (p=first_proc(); p.pr_pid != -1; p=next_proc()) { if (p.pr_fname =~ name) { count++; } } return count; } put_httpd() { put_output("#httpds", sprintf("%7d", count_proc(www_server_proc_name))); put_output("httpop/s", sprintf("%8.2f", httpops/www_interval)); put_output("http/p5s", sprintf("%8.2f", httpops5)); put_output("cndget/s", sprintf("%8.2f", httpop_condgets/www_interval)); put_output("search/s", sprintf("%8.3f", httpop_searches/www_interval)); put_output(" cgi/s", sprintf("%8.3f", httpop_cgi_bins/www_interval)); put_output(" htErr/s", sprintf("%8.3f", httpop_errors/www_interval)); put_output(" httpb/s", sprintf("%8.0f", dwnld_totalz/www_interval)); put_output(" %to1KB", sprintf("%8.2f", dtmp*dwnld_size[0])); put_output(" %to10KB", sprintf("%8.2f", dtmp*dwnld_size[1])); put_output("%to100KB", sprintf("%8.2f", dtmp*dwnld_size[2])); put_output(" %to1MB", sprintf("%8.2f", dtmp*dwnld_size[3])); put_output("%over1MB", sprintf("%8.2f", dtmp*dwnld_size[4])); put_output(www_gateway, sprintf("%8.2f", gateops/www_interval)); #if WATCH_PROXY || WATCH_SQUID put_output(" %indir", sprintf("%8.2f", dtmp * prxy_squid_indirect)); put_output("%cch_hit", sprintf("%8.2f", dtmp * prxy_squid_cache_hits)); #ifdef WATCH_PROXY put_output("%cch_wrt", sprintf("%8.2f", dtmp * prxy_cache_writes)); put_output("%cch_unc", sprintf("%8.2f", dtmp * prxy_uncacheable)); #else put_output("%cch_mis", sprintf("%8.2f", dtmp * squid_cache_misses)); put_output("%cch_req", sprintf("%8.2f", dtmp * squid_icp_requests)); put_output("%cch_qry", sprintf("%8.2f", dtmp * squid_icp_queries)); #endif put_output(" xfr_t", sprintf("%8.2f", 0.01 * dtmp * www_dwnld_time_sum)); put_output(" xfr1_t", sprintf("%8.2f", www_dwnld_time_by_size[0])); put_output(" xfr10_t", sprintf("%8.2f", www_dwnld_time_by_size[1])); put_output("xfr100_t", sprintf("%8.2f", www_dwnld_time_by_size[2])); put_output(" xfr1M_t", sprintf("%8.2f", www_dwnld_time_by_size[3])); put_output("xfro1M_t", sprintf("%8.2f", www_dwnld_time_by_size[4])); #elif WATCH_YAHOO put_output(" wprc_t", sprintf("%9.5f", 0.01 * dtmp * www_dwnld_time_sum)); put_output(" wprc1_t", sprintf("%9.5f", www_dwnld_time_by_size[0])); put_output(" wprc10_t", sprintf("%9.5f", www_dwnld_time_by_size[1])); put_output("wprc100_t", sprintf("%9.5f", www_dwnld_time_by_size[2])); put_output(" wprc1M_t", sprintf("%9.5f", www_dwnld_time_by_size[3])); put_output("wprco1M_t", sprintf("%9.5f", www_dwnld_time_by_size[4])); #endif } #endif