// // Orcallator.se, a log generating performance monitor. // // This program logs many different system quantities to a log file // for later processing. // // Author: Blair Zajac . // // Portions copied from percollator.se written by Adrian Cockroft. // // Version 1.28b3: Mar 27, 2001 Recoded measure_disk() to access the RAWDISK // interface to sys_kstat device information to // allow the activity on Sun's A1000 and Clariion // Raid controller drives to be seen. Apparently // the pseudo drivers do not update the kstat // interface. It is also inverts the fix // provided by version 1.23 to avoid over-counting // md devices. By suppressing stats from slices // and metadevices and instead reporting on full // devices such as c0t0d0 or sd0. Note: This may // have introduced an interaction with the // live_rules.se class monitoring of drive // performance. Prevent floppy disks and tape // drives from RAWDISK. Added wio% to measure // wait time since the idle calculation is wrong // without this. Prevent filesystems mounted // under /snapshots from being seen. Patch // contributed by Alan LeGrand // . // Version 1.27: Mar 27, 2001 Print the portion of time running in idle mode // with some process waiting for block I/O as // wio% and otherwise completely idle time as // idle%. // Version 1.26: Feb 5, 2001 Make sure to check the return from stat() on // the web server access log in case the file is // missing. Use fstat() instead of stat() when a // file descriptor is available. // Version 1.25: Mar 30, 2000 Fix a typo where nil was misspelled as nik. // Version 1.24: Mar 25, 2000 When orcallator.se was running on a system // with an older version of SE the p_vmstat.scan // variable is an integer and the sprintf to // %8.3f fails, resulting in a perceived scan rate // of 0 pages per second. Now always add 0.0 to // p_vmstat.scan to get a double. // Version 1.23: Feb 25, 2000 When orcallator.se was running on a system // with DiskSuite, the same physical disk was // listed multiple times when it appeared in // several metadevices. The solution to the // problem is not to build the c0t0d0 name but // use the long disk name provided by the // long_name string. Patch contributed by Paul // Haldane . // Version 1.22: Jan 14, 2000 Include code to record NFS v2 and v3 server // statistics. The new statistics are: // nfss_calls, the number of NFS calls to the NFS // server, nfss_bad, the number of bad NFS calls // per second, and v{2,3}{reads,writes}, which are // nfss_calls broken down into NFS version 2 and // NFS version 3 calls. The sum of v{2,3}{reads, // writes} will be less than nfss_calls as the // other types of NFS calls, such as getattr and // lookup, are not included. Contributed by Paul // Haldane . This // code is enabled by the standard -DWATCH_OS or // individually by -DWATCH_NFS_SERVER. The // define -DWATCH_NFS has been superseded by // -DWATCH_NFS_CLIENT, but to keep backwards // compatibility, -DWATCH_NFS_CLIENT will be // defined if -DWATCH_NFS is defined. // Version 1.21: Jan 12, 2000 Prevent core dumps on extremely long access // log lines. // Version 1.20: Oct 20, 1999 Update my email address. // Version 1.19: Oct 13, 1999 Prevent a division by zero in calculating the // mean_disk_busy if the number of disks on the // system is 0. // Version 1.18: Oct 12, 1999 Rename disk_runp.c?t?d? to disk_runp_c?t?d? // to remove the .'s. // Version 1.17: Oct 8, 1999 Do not record mount point statistics for // locally mounted /cdrom partitions. // Version 1.16: Oct 7, 1999 To keep backwards compatibility, define // WATCH_WEB if WATCH_HTTPD is defined. // If the COMPRESSOR environmental variable // is defined, then when a new log file is opened // for a new day, the just closed log file is // compressed using the COMPRESSOR command in the // following manner: // system(sprintf("%s %s &", COMPRESSOR, log_file) // COMPRESSOR should be set to something like // "gzip -9", or "compress", or "bzip2 -9". // Version 1.15: Oct 5, 1999 kvm$mpid is a int not a long. This caused // problems on Solaris 7 hosts running a 64 // bit kernel. // Version 1.14: Oct 1, 1999 Rename disk.c?t?d? column names to // disk_runp.c?t?d? to better reflect the data // being recorded and to allow for more per disk // information later. // Version 1.13: Sep 24, 1999 Fix a bug in the disk_mean calculation where // it was being divided by the wrong disk_count. // Now it should be much larger and in scale with // disk_peak. When WATCH_DISK is defined, now // print each disk's run percent. Add a new // define WATCH_MOUNTS, which reports each local // mount point's disk space and inode capacity, // usage, available for non-root users and // percent used. This comes from Duncan Lawie // tyger@hoopoes.com. Add some smarts so that if // the number of interfaces, physical disks, or // mounted partitions changes, then a new header // is printed. This will prevent column name and // data mixups when the system configuration // changes. // Version 1.12: Sep 14, 1999 Add the page scan rate as scanrate in // measure_cpu. // Version 1.11: Aug 13, 1999 Add the number of CPUs as ncpus. Move // measure_disk and measure_ram sooner in the // list of subsystems to handle. Increase the // number of characters for each network // interface from four to five. Add new disk // reads, writes, Kbytes read, and Kbytes // written per second. Add number of bytes // of free memory in bytes as freememK. // Version 1.10: Jul 28, 1999 Measure the process spawn rate if WATCH_CPU // is defined and the user is root. // Version 1.9: Jun 2, 1999 If WATCH_YAHOO is defined, then process the // access log as a Yahoo! style access log. // Restructure the code to handle different // web server access log formats. // Version 1.8: Jun 1, 1999 If the environmental variable WEB_SERVER is // defined, use its value of the as the name // of the process to count for the number of // web servers on the system. If WEB_SERVER // is not defined, then count number of httpd's. // Version 1.7: Mar 25, 1999 Simplify and speed up count_proc by 20%. // Version 1.6: Feb 23, 1999 Print pvm.user_time and system_time correctly. // Version 1.5: Feb 23, 1999 Always write header to a new file. // Version 1.4: Feb 19, 1999 Handle missing HTTP/1.x in access log. // Version 1.3: Feb 18, 1999 On busy machines httpops5 will be enlarged. // Version 1.2: Feb 18, 1999 Output data on integral multiples of interval. // Version 1.1: Feb 18, 1999 Integrate Squid log processing from SE 3.1. // Version 1.0: Sep 9, 1998 Initial version. // // The default sampling interval in seconds. #define SAMPLE_INTERVAL 300 // The maximum number of colums of data. #define MAX_COLUMNS 512 // Define the different parts of the system you want to examine. #ifdef WATCH_OS #define WATCH_CPU 1 #define WATCH_MUTEX 1 #define WATCH_NET 1 #define WATCH_TCP 1 #define WATCH_NFS_CLIENT 1 #define WATCH_NFS_SERVER 1 #define WATCH_MOUNTS 1 #define USE_RAWDISK 1 #define WATCH_DISK 1 #define WATCH_DNLC 1 #define WATCH_INODE 1 #define WATCH_RAM 1 #define WATCH_PAGES 1 #endif // Keep backwards compatibility with WATCH_HTTPD. #ifdef WATCH_HTTPD #define WATCH_WEB 1 #endif // Keep backwards compatibility with WATCH_NFS. #ifdef WATCH_NFS #ifndef WATCH_NFS_CLIENT #define WATCH_NFS_CLIENT 1 #endif #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef WATCH_MOUNTS #include #include #endif #if WATCH_CPU || WATCH_WEB #include #ifdef WATCH_CPU // This is the maximum pid on Solaris hosts. #define DEFAULT_MAXPID 30000 #include #endif #ifdef WATCH_WEB #include // Define this macro which returns the size index for a file of a // particular size. This saves the overhead of a function call. #define WWW_SIZE_INDEX(size, size_index) \ if (size < 1024) { \ size_index=0; /* Under 1KB */ \ } else { \ if (size < 10240) { \ size_index=1; /* Under 10K */ \ } else { \ if (size < 102400) { \ size_index=2; /* Under 100KB */ \ } else { \ if (size < 1048576) { \ size_index=3; /* Under 1MB */ \ } else { \ size_index=4; /* Over 1MB */ \ } \ } \ } \ } \ dwnld_size[size_index]++; // Handle the reply code from the server. #define WWW_REPLY_CODE(word) \ if (word == "304") { \ httpop_condgets++; \ } \ else { \ first_byte = word; \ if (first_byte[0] == '4' || first_byte[0] == '5') { \ httpop_errors++; \ } \ } \ // Handle the method of the object served. This define only works // with non-proxy servers. #define WWW_METHOD1(word) \ switch (word) { \ case "get": \ case "GET": \ httpop_gets++; \ break; \ case "post": \ case "POST": \ httpop_posts++; \ break; \ case "head": \ case "HEAD": \ ishead = 1; \ httpop_condgets++; \ break; #ifdef WATCH_SQUID #define WWW_METHOD2 \ case "icp_query": \ case "ICP_QUERY": \ squid_icp_queries++; \ break; #else #define WWW_METHOD2 #endif #define WWW_METHOD_END \ default: \ break; \ } #define WWW_METHOD(word) WWW_METHOD1(word) WWW_METHOD2 WWW_METHOD_END #endif #endif // Put all rules here so they can be accessed by the handle functions. lr_cpu_t lr_cpu$cpu; lr_cpu_t tmp_lrcpu; lr_mutex_t lr_mutex$m; lr_mutex_t tmp_mutex; lr_net_t lr_net$nr; lr_net_t tmp_nr; lr_tcp_t lr_tcp$tcp; lr_tcp_t tmp_lrtcp; #ifdef WATCH_TCP tcp tcp$tcp; tcp tmp_tcp; #endif lr_rpcclient_t lr_rpcclient$r; lr_rpcclient_t tmp_lrpcc; lr_disk_t lr_disk$dr; lr_disk_t tmp_dr; lr_dnlc_t lr_dnlc$dnlc; lr_dnlc_t tmp_lrdnlc; lr_inode_t lr_inode$inode; lr_inode_t tmp_lrinode; lr_ram_t lr_ram$ram; lr_ram_t tmp_lrram; #ifdef WATCH_PAGES ks_system_pages kstat$pages; ks_system_pages tmp_kstat_pages; #endif lr_swapspace_t lr_swapspace$s; lr_swapspace_t tmp_lrswap; lr_kmem_t lr_kmem$kmem; lr_kmem_t tmp_lrkmem; ks_system_misc kstat$misc; ks_system_misc tmp_kstat_misc; // Put application globals here. string nodename; // Name of this machine. string program_name; // Name of this program. int hz; // Clock tick rate. int page_size; // Page size in bytes. long boot_time; // Boot time of the system. long interval = SAMPLE_INTERVAL; // Sampling interval. #ifdef WATCH_CPU int can_read_kernel = 0; // If the kernel can be read. int kvm$mpid; // The last created PID. // These variables store the mpid before and after the standard interval. int mpid_previous; int mpid_current; ulonglong mpid_then; ulonglong mpid_now; // These variables store the mpid before and after 5 second intervals. int mpid5_previous; int mpid5_current; ulonglong mpid5_then; ulonglong mpid5_now; double mpid5_rate; #endif #ifdef WATCH_MOUNTS mnttab_t mnt$mnt; mnttab_t tmp_mnt; #endif #ifdef WATCH_NFS_SERVER ks_nfs_server kstat$nfs; ks_nfs_server tmp_nfs; ks_rfs_proc_v2 kstat$rfsproccnt_v2; ks_rfs_proc_v2 tmp_rfsproccnt_v2; ks_rfs_proc_v3 kstat$rfsproccnt_v3; ks_rfs_proc_v3 tmp_rfsproccnt_v3; #endif // Variables for handling the httpd access log. #ifdef WATCH_WEB string www_search_url = getenv("SEARCHURL"); string www_server_proc_name = getenv("WEB_SERVER"); string www_log_filename = getenv("WEB_LOG"); string www_gateway = getenv("GATEWAY"); ulong www_fd; uint www_gatelen; stat_t www_stat[1]; ulong www_ino; long www_size; double www_interval; // Hi-res interval time. ulonglong www_then; ulonglong www_now; double www5_interval; // Actual hi-res 5 second interval. ulonglong www5_then; ulonglong www5_now; double httpops; double httpops5; double gateops; double dtmp; long httpop_gets; long httpop_condgets; // HEAD or code = 304 conditional get no data. long httpop_posts; long httpop_cgi_bins; long httpop_searches; long httpop_errors; long dwnld_size[5]; // [0] < 1K, [1] < 10K, [2] < 100K, [3] < 1M, [4] >= 1M long dwnld_totalz; // Total size counted from log. #if WATCH_PROXY || WATCH_SQUID || WATCH_YAHOO // If we're watching a Yahoo log, then take the transfer time to be the // processing time. double www_dwnld_time_sum; // Transfer time. double www_dwnld_time_by_size[5]; // Mean transfer time by size bin. #endif #if WATCH_PROXY || WATCH_SQUID long prxy_squid_indirect; // # hits that go via PROXY,SOCKS,parent long prxy_squid_cache_hits; // # hits returned from cache. #endif #ifdef WATCH_PROXY long prxy_cache_writes; // Number of writes and updates to cache. long prxy_uncacheable; // Number of explicitly uncacheable httpops. // Any extra is errors or incomplete ops. #endif #ifdef WATCH_SQUID long squid_cache_misses; long squid_icp_requests; long squid_icp_queries; long squid_client_http; #endif #endif // RAWDISK BEGIN #ifdef USE_RAWDISK #include // This extention accesses the sys_kstat.se interface to the kstat IO // queues to extract info on drives not available in the kstat.se // kstat$disk interface. Global data shared between function calls. struct RawDisk { // Exposed interface (matches kstat) char long_name[16]; char short_name[8]; double reads; double kreads; double writes; double kwrites; double avg_wait; double avg_serv; double service; double wait_percent; double run_percent; // Hidden internal registers to track sys_kstats counters. int _number; // kstat disk # ulonglong _nread; // Number of bytes read ulonglong _nwritten; // Number of bytes written uint _reads; // Number of read operations uint _writes; // Number of write operations longlong _wtime; // Cumulative wait (pre-service) time longlong _wlentime; // Cumulative wait length*time product longlong _wlastupdate; // Last time wait queue changed longlong _rtime; // Cumulative run (service) time longlong _rlentime; // Cumulative run length*time product longlong _rlastupdate; // Last time run queue changed uint _wcnt; // Count of elements in wait state uint _rcnt; // Count of elements in run state }; // Define global for tracking raw disk data. #define MAX_RAWDISKS 512 RawDisk RAW_disk[MAX_RAWDISKS]; int RAW_disk_map=0; int RAW_disk_count=0; double RAW_disk_lastupdate; // Function to scan kstat and map short device names to long device names. raw_disk_map() { int i; int j=0; char long_name[16]; char short_name[16]; // SE for 2.6 & 2.7 appears to have a bug where MAX_DISK is too // large when an A1000 raid controller is present. It's not really // SE as iostat had to be patched for the same bug. The bug appears // to be tied to a failure to update the kstat interface diskinfo // uses to map short names to long names. Since raw_disk_update has // already identified how many physical devices are present and they // are listed first we limit our self to mapping the first // RAW_disk_count device name. for (i=0; i _nread) { _nread = RAW_disk[rdisk]._nread; } _reads = kio.reads; if (RAW_disk[rdisk]._reads > _reads) { _reads = RAW_disk[rdisk]._reads; } _rlentime = kio.rlentime; _rtime = kio.rtime; _rlastupdate = kio.wlastupdate; _rcnt = kio.rcnt; _nwritten = kio.nwritten; if (RAW_disk[rdisk]._nwritten > _nwritten) { _nwritten = RAW_disk[rdisk]._nwritten; } _writes = kio.writes; if (RAW_disk[rdisk]._writes > _writes) { _writes = RAW_disk[rdisk]._nwritten; } _wlentime = kio.wlentime; _wtime = kio.wtime; _wlastupdate = kio.wlastupdate; _wcnt = kio.wcnt; elapsed_etime = (_wlastupdate - RAW_disk[rdisk]._wlastupdate); if (elapsed_etime > 0) { hz_etime = elapsed_etime / nanosecond; big_etime = 1024.0 * hz_etime; } else { elapsed_etime = nanosecond; hz_etime = 1.0; big_etime = 1024.0; } RAW_disk[rdisk].reads =(_reads-RAW_disk[rdisk]._reads) /hz_etime; RAW_disk[rdisk].kreads =(_nread-RAW_disk[rdisk]._nread) /big_etime; RAW_disk[rdisk].writes =(_writes-RAW_disk[rdisk]._writes)/hz_etime; RAW_disk[rdisk].kwrites=(_nwritten-RAW_disk[rdisk]._nwritten) / big_etime; read_writes = elapsed_etime * (RAW_disk[rdisk].reads + RAW_disk[rdisk].writes) / 1024.0; if (read_writes > 0) { RAW_disk[rdisk].avg_wait = (_wlentime - RAW_disk[rdisk]._wlentime) / read_writes; RAW_disk[rdisk].avg_serv = (_rlentime - RAW_disk[rdisk]._rlentime) / read_writes; RAW_disk[rdisk].service = RAW_disk[rdisk].avg_wait + RAW_disk[rdisk].avg_serv; } else { RAW_disk[rdisk].avg_wait = 0.0; RAW_disk[rdisk].avg_serv = 0.0; RAW_disk[rdisk].service = 0.0; } // Update the counters. RAW_disk[rdisk].run_percent = 100.0 * (_rtime - RAW_disk[rdisk]._rtime) / elapsed_etime; RAW_disk[rdisk].wait_percent = 100.0 * (_wtime - RAW_disk[rdisk]._wtime) / elapsed_etime; RAW_disk[rdisk]._writes = _writes; RAW_disk[rdisk]._nwritten = _nwritten; RAW_disk[rdisk]._wlastupdate = _wlastupdate; RAW_disk[rdisk]._wlentime = _wlentime; RAW_disk[rdisk]._wtime = _wtime; RAW_disk[rdisk]._wcnt = _wcnt; RAW_disk[rdisk]._reads = _reads; RAW_disk[rdisk]._nread = _nread; RAW_disk[rdisk]._rlastupdate = _rlastupdate; RAW_disk[rdisk]._rlentime = _rlentime; RAW_disk[rdisk]._rtime = _rtime; RAW_disk[rdisk]._rcnt = _rcnt; } } } } kstat_close(kc); // Map long device names for any drives that we just discovered. if (RAW_disk_map == 1) { raw_disk_map(); } } #endif // RAWDISK_END // Variables for handling output. string compress = getenv("COMPRESSOR"); // How to compress logs. ulong ofile; // File pointer to the logging file. string col_comment[MAX_COLUMNS]; // Comments for each column. string col_data[MAX_COLUMNS]; // Data for each column. int current_column = 0; // The current column. int print_header = 1; // Flag to flush header. // Send the stored columns of information to the output. print_columns(string data[]) { int i; for (i=0; i= MAX_COLUMNS) { fprintf(stderr, "%s: too many columns (%d). Increase MAX_COLUMNS.\n", program_name, current_column); exit(1); } col_comment[current_column] = comment; col_data[current_column] = data; ++current_column; } flush_output() { if (print_header != 0) { print_columns(col_comment); print_header = 0; } print_columns(col_data); current_column = 0; } // Sets ofile to the output file pointer. Creates or appends to the // log file if OUTDIR is set, otherwise sets the file pointer to STDOUT. // It start a new log file each day. It compresses the previous days // log file if the environmental variable COMPRESSOR is set. checkoutput(tm_t now) { string outdir = getenv("OUTDIR"); string outname; tm_t then; char tm_buf[32]; if (outdir == nil) { // No output dir so use stdout. if (ofile == 0) { // First time, so print header and set ofile. ofile = stdout; print_header = 1; } return; } // Maintain daily output logfiles in OUTDIR. if (now.tm_yday != then.tm_yday) { // First time or day has changed, start new logfile. if (ofile != 0) { // Close and optionally compress the existing output file. fclose(ofile); if (compress != nil) { system(sprintf(compress, outname)); } } strftime(tm_buf, sizeof(tm_buf), "%Y-%m-%d", now); outname = sprintf("%s/percol-%s", outdir, tm_buf); // Open for append either way. ofile = fopen(outname, "a"); if (ofile == 0) { perror("can't open output logfile"); exit(1); } // Always write header. print_header = 1; then = now; } } int main(int argc, string argv[]) { utsname_t u[1]; long now; long sleep_till; // Time to sleep to. tm_t tm_now; // Get the nodename of the machine. uname(u); nodename = u[0].nodename; program_name = argv[0]; // Handle the command line arguments. switch (argc) { case 1: break; case 2: interval = atoi(argv[1]); break; default: fprintf(stderr, "usage: se [-Defines] %s [interval]\n", program_name); fprintf(stderr, "The default interval is %d seconds.\n", SAMPLE_INTERVAL); fprintf(stderr, "%s uses the following environmental variables:\n", program_name); fprintf(stderr, " OUTDIR directory to write log files, output to stdout if not set\n"); fprintf(stderr, " WEB_SERVER string to search for number of web servers, i.e. httpd\n"); fprintf(stderr, " WEB_LOG location of web server access log, i.e. /httpd/logs/access\n"); fprintf(stderr, " GATEWAY special address to monitor, i.e. some.where.com\n"); fprintf(stderr, " SEARCHURL match for search scripts, default is search.cgi\n"); fprintf(stderr, " COMPRESSOR compress previous days logs with this command, i.e. \"gzip -9\"\n"); fprintf(stderr, "Add these defines to enable monitoring of specific subsystems:\n"); fprintf(stderr, " -DWATCH_WEB watch web server access logs\n"); fprintf(stderr, " -DWATCH_PROXY use WEB_LOG as a NCSA style proxy log\n"); fprintf(stderr, " -DWATCH_SQUID use WEB_LOG as a Squid log\n"); fprintf(stderr, " -DWATCH_OS includes all of the below:\n"); fprintf(stderr, " -DWATCH_CPU watch the cpu load, run queue, etc\n"); fprintf(stderr, " -DWATCH_MUTEX watch the number of mutex spins\n"); fprintf(stderr, " -DWATCH_NET watch all Ethernet interfaces\n"); fprintf(stderr, " -DWATCH_TCP watch all the TCP/IP stack\n"); fprintf(stderr, " -DWATCH_NFS_CLIENT watch NFS client requests\n"); fprintf(stderr, " -DWATCH_NFS_SERVER watch NFS server requests\n"); fprintf(stderr, " -DWATCH_MOUNTS watch usage of mount points\n"); fprintf(stderr, " -DWATCH_DISK watch disk read/write usage\n"); fprintf(stderr, " -DWATCH_DNLC watch the directory name lookup cache\n"); fprintf(stderr, " -DWATCH_INODE watch the inode cache\n"); fprintf(stderr, " -DWATCH_RAM watch memory usage\n"); fprintf(stderr, " -DWATCH_PAGES watch where pages are allocated\n"); exit(1); break; } // Initialize the various structures. initialize(); // Run forever. If WATCH_WEB is defined, then have measure_web() // do the sleeping while it is watching the access log file until the // next update time for the whole operating system. Also, collect the // data from the access log file before printing any output. for (;;) { // Calculate the next time to sleep to that is an integer multiple of // the interval time. Make sure that at least half of the interval // passes before waking up. now = time(0); sleep_till = (now/interval)*interval; while (sleep_till < now + interval*0.5) { sleep_till += interval; } #ifdef WATCH_WEB measure_web(sleep_till); #else sleep_till_and_count_new_proceses(sleep_till); #endif // Get the current time. now = time(0); tm_now = localtime(&now); measure_os(now, tm_now); #ifdef WATCH_WEB put_httpd(); #endif // Get a file descriptor to write to. Maintains daily output files. checkoutput(tm_now); // Print the output. flush_output(); } return 0; } initialize() { #ifdef WATCH_CPU int i; #endif // Get the command to compress the log files. if (compress == nil || compress == "") { compress = nil; } else { compress = sprintf("%s %%s &", compress); } #ifdef WATCH_CPU // Initialize the process spawning rate measurement variables. // Determine if the kernel can be read to measure the last pid. i = open("/dev/kmem", O_RDONLY); if (i != -1) { close(i); can_read_kernel = 1; mpid_previous = kvm$mpid; mpid_then = gethrtime(); mpid_current = mpid_previous; mpid5_then = mpid_then; mpid5_previous = mpid_previous; mpid5_current = mpid_previous; mpid5_rate = 0; } #endif #ifdef WATCH_WEB // Initialize those variables that were not set with environmental // variables. if (www_search_url == nil || www_search_url == "") { www_search_url = "search.cgi"; } if (www_server_proc_name == nil || www_server_proc_name == "") { www_server_proc_name = "httpd"; } if (www_gateway == nil || www_gateway == "" ) { www_gateway = "NoGatway"; www_gatelen = 0; } else { www_gatelen = strlen(www_gateway); } // Initialize the web server watching variables. Move the file pointer // to the end of the web access log and note the current time. if (www_log_filename != nil) { www_fd = fopen(www_log_filename, "r"); www_ino = 0; www_size = 0; if (www_fd != 0) { if (fstat(www_fd, www_stat) == 0) { www_ino = www_stat[0].st_ino; www_size = www_stat[0].st_size; } // Move to the end of the file. fseek(www_fd, 0, 2); } } www_then = gethrtime(); www5_then = www_then; #endif // Sleep to give the disks a chance to update. sleep(DISK_UPDATE_RATE); // Get the clock tick rate. hz = sysconf(_SC_CLK_TCK); // Get the page size. page_size = sysconf(_SC_PAGESIZE); // Calculate the system boot time. boot_time = time(0) - (kstat$misc.clk_intr / hz); // Perform the first measurement of the system. _measure_os(); } // Measure the system statistics all at once. _measure_os() { tmp_lrcpu = lr_cpu$cpu; tmp_mutex = lr_mutex$m; tmp_nr = lr_net$nr; tmp_lrtcp = lr_tcp$tcp; #ifdef WATCH_TCP tmp_tcp = tcp$tcp; #endif tmp_lrpcc = lr_rpcclient$r; tmp_dr = lr_disk$dr; tmp_lrdnlc = lr_dnlc$dnlc; tmp_lrinode = lr_inode$inode; tmp_lrram = lr_ram$ram; #ifdef WATCH_PAGES tmp_kstat_pages = kstat$pages; #endif tmp_lrswap = lr_swapspace$s; tmp_lrkmem = lr_kmem$kmem; tmp_kstat_misc = kstat$misc; #ifdef WATCH_NFS_SERVER tmp_nfs = kstat$nfs; tmp_rfsproccnt_v2 = kstat$rfsproccnt_v2; tmp_rfsproccnt_v3 = kstat$rfsproccnt_v3; #endif #ifdef USE_RAWDISK raw_disk_update(); #endif } measure_os(long now, tm_t tm_now) { // Measure the system now. _measure_os(); // Take care of miscellaneous measurements. measure_misc(now, tm_now); // Take care of cpu. #ifdef WATCH_CPU measure_cpu(); #endif // Take care of mutexes. #ifdef WATCH_MUTEX measure_mutex(); #endif // Take care of mount pointes. #ifdef WATCH_MOUNTS measure_mounts(); #endif // Take care of the disks. #ifdef WATCH_DISK measure_disk(); #endif // Take care of ram. #ifdef WATCH_RAM measure_ram(); #endif // Take care of the network. #ifdef WATCH_NET measure_net(); #endif // Take care of TCP/IP. #ifdef WATCH_TCP measure_tcp(); #endif // Take care of NFS client statistics. #ifdef WATCH_NFS_CLIENT measure_nfs_client(); #endif // Take care of NFS server statistics. #ifdef WATCH_NFS_SERVER measure_nfs_server(); #endif // Take care of DNLC. #ifdef WATCH_DNLC measure_dnlc(); #endif // Take care of the inode cache. #ifdef WATCH_INODE measure_inode(); #endif // Take care of page allocations. #ifdef WATCH_PAGES measure_pages(); #endif } /* * State as a character */ char state_char(int state) { switch(state) { case ST_WHITE: return 'w'; /* OK states are lower case. */ case ST_BLUE: return 'b'; case ST_GREEN: return 'g'; case ST_AMBER: return 'A'; /* Bad states are upper case to stand out. */ case ST_RED: return 'R'; case ST_BLACK: return 'B'; default: return 'I'; /* Invalid state. */ } } measure_misc(long now, tm_t tm_now) { long uptime; char states[12]; char tm_buf[16]; uptime = now - boot_time; states = "wwwwwwwwwww"; strftime(tm_buf, sizeof(tm_buf), "%T", tm_now); states[0] = state_char(lr_disk$dr.state); states[1] = state_char(lr_net$nr.state); states[2] = state_char(lr_rpcclient$r.state); states[3] = state_char(lr_swapspace$s.state); states[4] = state_char(lr_ram$ram.state); states[5] = state_char(lr_kmem$kmem.state); states[6] = state_char(lr_cpu$cpu.state); states[7] = state_char(lr_mutex$m.state); states[8] = state_char(lr_dnlc$dnlc.state); states[9] = state_char(lr_inode$inode.state); states[10]= state_char(lr_tcp$tcp.state); put_output(" timestamp", sprintf("%10d", now)); put_output("locltime", tm_buf); put_output("DNnsrkcmdit", states); put_output(" uptime", sprintf("%8d", uptime)); } sleep_till_and_count_new_proceses(long sleep_till) { long now; #ifdef WATCH_CPU long sleep_till1; int mpid5_diff; double mpid5_interval; double rate; #endif now = time(0); while (now < sleep_till) { #ifdef WATCH_CPU if (can_read_kernel != 0) { // Sleep at least 5 seconds to make a measurement. sleep_till1 = now + 5; while (now < sleep_till1) { sleep(sleep_till1 - now); now = time(0); } // Measure the 5 second process creation rate. mpid5_current = kvm$mpid; mpid5_now = gethrtime(); mpid5_interval = (mpid5_now - mpid5_then) * 0.000000001; mpid5_then = mpid5_now; if (mpid5_current >= mpid5_previous) { mpid5_diff = mpid5_current - mpid5_previous; } else { mpid5_diff = mpid5_current + DEFAULT_MAXPID - mpid5_previous; } rate = mpid5_diff/mpid5_interval; if (rate > mpid5_rate) { mpid5_rate = rate; } mpid5_previous = mpid5_current; // Now take these results to measure the long interval rate. // Because the mpid may flip over DEFAULT_MAXPID more than once // in the long interval time span, use the difference between // the previous and current mpid over a 5 second interval to // calculate the long interval difference. mpid_current += mpid5_diff; mpid_now = mpid5_now; } else { sleep(sleep_till - now); } #else sleep(sleep_till - now); #endif now = time(0); } } #ifdef WATCH_CPU measure_cpu() { p_vmstat pvm; double mpid_interval; double mpid_rate; pvm = vmglobal_total(); // In SE 3.0 user_time and system_time are int and in SE 3.1 they are // double, so cast everything to double using + 0.0. put_output(" usr%", sprintf("%5.1f", pvm.user_time + 0.0)); put_output(" sys%", sprintf("%5.1f", pvm.system_time + 0.0)); put_output(" wio%", sprintf("%5.1f", pvm.wait_time + 0.0)); put_output("idle%", sprintf("%5.1f", pvm.idle_time + 0.0)); put_output(" 1runq", sprintf("%6.2f", tmp_kstat_misc.avenrun_1min/256.0)); put_output(" 5runq", sprintf("%6.2f", tmp_kstat_misc.avenrun_5min/256.0)); put_output("15runq", sprintf("%6.2f", tmp_kstat_misc.avenrun_15min/256.0)); put_output("#proc", sprintf("%5lu", tmp_kstat_misc.nproc)); put_output("scanrate", sprintf("%8.3f", pvm.scan + 0.0)); // Calculate the rate of new process spawning. if (can_read_kernel != 0) { mpid_interval = (mpid_now - mpid_then) * 0.000000001; mpid_rate = (mpid_current - mpid_previous) / mpid_interval; put_output("#proc/s", sprintf("%7.3f", mpid_rate)); put_output("#proc/p5s", sprintf("%9.4f", mpid5_rate)); // Reset counters. mpid_then = mpid_now; mpid_previous = mpid_current; mpid5_rate = 0; } } #endif #ifdef WATCH_MUTEX measure_mutex() { put_output(" smtx", sprintf("%5d", tmp_mutex.smtx)); put_output("smtx/cpu", sprintf("%8d", tmp_mutex.smtx/tmp_mutex.ncpus)); put_output("ncpus", sprintf("%5d", tmp_mutex.ncpus)); } #endif #ifdef WATCH_NET measure_net() { int previous_count = -1; int current_count; int i; current_count = 0; for (i=0; i peak_disk_busy) { peak_disk_busy = RAW_disk[i].run_percent; } } #else for (i=0; i peak_disk_busy) { peak_disk_busy = GLOBAL_disk[i].run_percent; } } #endif if (disk_count != 0) { mean_disk_busy = mean_disk_busy/disk_count; } put_output("disk_peak", sprintf("%9.3f", peak_disk_busy)); put_output("disk_mean", sprintf("%9.3f", mean_disk_busy)); put_output("disk_rd/s", sprintf("%9.1f", total_reads)); put_output("disk_wr/s", sprintf("%9.1f", total_writes)); put_output("disk_rK/s", sprintf("%9.1f", total_readk)); put_output("disk_wK/s", sprintf("%9.1f", total_writek)); // If the number of disks has changed, say due to a add_drv, then print // new headers. if (previous_count != disk_count) { print_header = 1; previous_count = disk_count; } } #endif #ifdef WATCH_DNLC measure_dnlc() { put_output("dnlc_ref/s", sprintf("%10.3f", tmp_lrdnlc.refrate)); put_output("dnlc_hit%", sprintf("%9.3f", tmp_lrdnlc.hitrate)); } #endif #ifdef WATCH_INODE measure_inode() { put_output("inod_ref/s", sprintf("%10.3f", tmp_lrinode.refrate)); put_output("inod_hit%", sprintf("%9.3f", tmp_lrinode.hitrate)); put_output("inod_stl/s", sprintf("%10.3f", tmp_lrinode.iprate)); } #endif #ifdef WATCH_RAM measure_ram() { put_output("swap_avail", sprintf("%10ld", GLOBAL_pvm[0].swap_avail)); put_output("page_rstim", sprintf("%10d", tmp_lrram.restime)); put_output(" freememK", sprintf("%10d", GLOBAL_pvm[0].freemem)); put_output("free_pages", sprintf("%10d", (GLOBAL_pvm[0].freemem*1024)/page_size)); } #endif #ifdef WATCH_PAGES measure_pages() { put_output("pp_kernel", sprintf("%9lu", tmp_kstat_pages.pp_kernel)); put_output("pagesfree", sprintf("%9lu", tmp_kstat_pages.pagesfree)); put_output("pageslock", sprintf("%9lu", tmp_kstat_pages.pageslocked)); put_output("pagesio", sprintf("%7lu", tmp_kstat_pages.pagesio)); put_output("pagestotl", sprintf("%9lu", tmp_kstat_pages.pagestotal)); } #endif #ifdef WATCH_WEB /* * Breakdown access log format. */ accesslog(string buf) { int z; int size_index; int ishead; string word; char first_byte[1]; #if WATCH_PROXY || WATCH_SQUID || WATCH_YAHOO double xf; #ifdef WATCH_SQUID string logtag; string request; #endif #ifdef WATCH_YAHOO string arg; ulong ptr; ulong tmp; ulong ulong_xf; #endif #endif ishead = 0; #ifdef WATCH_YAHOO /* * Make sure that the input line has at least 32 bytes of data plus a new * line, for a total length of 33. */ if (strlen(buf) < 33) { return; } word = strtok(buf, "\05"); #else word = strtok(buf, " "); #endif if (word == nil) { return; } #ifdef WATCH_SQUID /* * Word contains unix time in seconds.milliseconds. */ word = strtok(nil, " "); if (word == nil) { return; } xf = atof(word)/1000.0; www_dwnld_time_sum += xf; #ifdef DINKY printf("time: %s %f total %f\n", word, xf, xfer_sum); #endif word = strtok(nil, " "); /* Client IP address. */ logtag = strtok(nil, "/"); /* Log tag. */ if (logtag == nil) { return; } if (logtag =~ "TCP") { squid_client_http++; } if (logtag =~ "UDP") { squid_icp_requests++; } if (logtag =~ "HIT") { prxy_squid_cache_hits++; } if (logtag =~ "MISS") { squid_cache_misses++; } word = strtok(nil, " "); /* Reply code. */ if (word == nil) { return; } WWW_REPLY_CODE(word) word = strtok(nil, " "); /* Size sent to client. */ if (word == nil) { return; } z = atoi(word); WWW_SIZE_INDEX(z, size_index) www_dwnld_time_by_size[size_index] += xf; request = strtok(nil, " "); /* Request method. */ if (request == nil) { return; } WWW_METHOD(request) /* Do not add the size if it is a HEAD. */ if (ishead == 0) { dwnld_totalz += z; } word = strtok(nil, " "); /* URL. */ if (word == nil) { return; } if (word =~ "cgi-bin") { httpop_cgi_bins++; } if (word =~ www_search_url) { httpop_searches++; } strtok(nil, " "); /* Optional user identity. */ word = strtok(nil, "/"); /* Hierarchy. */ if (word == nil) { return; } if (word =~ "DIRECT") { prxy_squid_indirect++; } #if 0 word = strtok(nil, " "); /* Hostname. */ if (word == nil) { return; } word = strtok(nil, " "); /* Content-type. */ if (word == nil) { return; } #endif #elif WATCH_YAHOO /* * Yahoo log format. Fields in square brackets will only appear in the * log file if the data actually exists (ie. you will never see a null * Referrer field). Further, fields labelled here with "(CONFIG)" will * only appear if they are enabled via the YahooLogOptions configuration * directive. * * IP Address (8 hex digits) * Timestamp (time_t as 8 hex digits) * Processing Time (in microseconds, as 8 hex digits) * Bytes Sent (8 hex digits) * URL * [^Er referrer] (CONFIG) * [^Em method] (CONFIG) * [^Es status_code] * ^Ed signature * \n */ /* * Ignore the IP address and timestamp. Get the processing time, the * number of bytes sent and the URL. For each portion of the line, split * it up into separate pieces. */ if (sscanf(word, "%8lx%8lx%8x%8x", &tmp, &tmp, &ulong_xf, &z) != 4) { return; } xf = ulong_xf/1000000.0; WWW_SIZE_INDEX(z, size_index) www_dwnld_time_sum += xf; www_dwnld_time_by_size[size_index] += xf; if (word =~ "cgi-bin") { httpop_cgi_bins++; } if (word =~ www_search_url) { httpop_searches++; } for (;;) { word = strtok(nil, "\05"); if (word == nil) { break; } first_byte = word; ptr = &word + 1; arg = ((string) ptr); ptr = 0; switch (first_byte[0]) { case 'm': WWW_METHOD(arg) ptr = 1; break; case 's': WWW_REPLY_CODE(arg) break; default: break; } } /* If no method was seen, then assume it was a GET. */ if (ptr == 0) { httpop_gets++; } /* Do not add the size if it is a HEAD. */ if (ishead == 0) { dwnld_totalz += z; } #else /* common or netscape proxy formats */ strtok(nil, " "); /* -. */ strtok(nil, " "); /* -. */ strtok(nil, " ["); /* date. */ strtok(nil, " "); /* zone]. */ strtok(nil, " \""); /* GET or POST. */ if (word == nil) { return; } WWW_METHOD(word) word = strtok(nil, " "); /* URL. */ if (word == nil) { return; } if (word =~ "cgi-bin") { httpop_cgi_bins++; } if (word =~ www_search_url) { httpop_searches++; } /* * Sometimes HTTP/1.x is not listed in the access log. Skip it * if it does exist. Load the error/success code. */ word = strtok(nil, " "); if (word == nil) { return; } if (word =~ "HTTP" || word =~ "http") { word = strtok(nil, " "); if (word == nil) { return; } } WWW_REPLY_CODE(word) word = strtok(nil, " "); /* Bytes transferred. */ if (word == nil) { return; } z = atoi(word); /* Do not add the size if it is a HEAD. */ if (ishead == 0) { dwnld_totalz += z; } WWW_SIZE_INDEX(z, size_index) #ifdef WATCH_PROXY strtok(nil, " "); /* Status from server. */ strtok(nil, " "); /* Length from server. */ strtok(nil, " "); /* Length from client POST. */ strtok(nil, " "); /* Length POSTed to remote. */ strtok(nil, " "); /* Client header request. */ strtok(nil, " "); /* Proxy header response. */ strtok(nil, " "); /* Proxy header request. */ strtok(nil, " "); /* Server header response. */ strtok(nil, " "); /* Transfer total seconds. */ word = strtok(nil, " "); /* Route. */ if (word == nil) { return; } /* - DIRECT PROXY(host.domain:port) SOCKS. */ if (strncmp(word, "PROXY", 5) == 0 || strncmp(word, "SOCKS", 5) == 0) { prxy_squid_indirect++; } strtok(nil, " "); /* Client finish status. */ strtok(nil, " "); /* Server finish status. */ word = strtok(nil, " "); /* Cache finish status. */ if (word == nil) { return; } /* * ERROR HOST-NOT-AVAILABLE = error or incomplete op * WRITTEN REFRESHED CL-MISMATCH(content length mismatch) = cache_writes * NO-CHECK UP-TO-DATE = cache_hits * DO-NOT-CACHE NON-CACHEABLE = uncacheable */ switch(word) { case "WRITTEN": case "REFRESHED": case "CL-MISMATCH": prxy_cache_writes++; break; case "NO-CHECK": case "UP-TO-DATE": prxy_squid_cache_hits++; break; case "DO-NOT-CACHE": case "NON-CACHEABLE": prxy_uncacheable++; break; default: break; } word = strtok(nil, " ["); /* [transfer total time x.xxx. */ if (word == nil) { return; } xf = atof(word); www_dwnld_time_sum += xf; www_dwnld_time_by_size[size_index] += xf; #endif #endif } measure_web(long sleep_till) { double lastops = 0.0; char buf[BUFSIZ]; int i; long now; httpops = 0.0; httpops5 = 0.0; gateops = 0.0; httpop_gets = 0; httpop_condgets = 0; httpop_posts = 0; httpop_cgi_bins = 0; httpop_errors = 0; httpop_searches = 0; for (i=0; i<5; i++) { dwnld_size[i] = 0; #if WATCH_PROXY || WATCH_SQUID || WATCH_YAHOO www_dwnld_time_by_size[i] = 0.0; #endif } dwnld_totalz = 0; #if WATCH_PROXY || WATCH_SQUID || WATCH_YAHOO www_dwnld_time_sum = 0.0; #endif #if WATCH_PROXY || WATCH_SQUID prxy_squid_indirect = 0; prxy_squid_cache_hits = 0; #ifdef WATCH_PROXY prxy_cache_writes = 0; prxy_uncacheable = 0; #else squid_cache_misses = 0; squid_icp_requests = 0; squid_icp_queries = 0; squid_client_http = 0; #endif #endif if (www_log_filename != nil) { now = time(0); while (now < sleep_till) { #ifdef WATCH_CPU sleep_till_and_count_new_proceses(now + 5); #else sleep(5); #endif now = time(0); if (www_fd != 0) { buf[BUFSIZ-1] = 127; while (fgets(buf, BUFSIZ, www_fd) != nil) { httpops += 1.0; if (www_gatelen > 0) { if (strncmp(buf, www_gateway, www_gatelen) == 0) { gateops += 1.0; } } accesslog(buf); /* * If the line is longer than the buffer, then ignore the rest * of the line. */ while (buf[BUFSIZ-1] == 0 && buf[BUFSIZ-2] != '\n') { buf[BUFSIZ-1] = 127; if (fgets(buf, BUFSIZ, www_fd) == nil) { break; } } } } /* * See if the file has been switched or truncated. */ if (stat(www_log_filename, www_stat) == 0) { if (www_ino != www_stat[0].st_ino || www_size > www_stat[0].st_size) { /* * Close the old log file. */ if (www_fd != 0) { fclose(www_fd); } /* * The log file has changed, open the new one. */ www_fd = fopen(www_log_filename, "r"); if (www_fd != 0) { fstat(www_fd, www_stat); www_ino = www_stat[0].st_ino; buf[BUFSIZ-1] = 127; while(fgets(buf, BUFSIZ, www_fd) != nil) { httpops += 1.0; if (www_gatelen > 0) { if (strncmp(buf, www_gateway, www_gatelen) == 0) { gateops += 1.0; } } accesslog(buf); /* * If the line is longer than the buffer, then ignore * the rest of the line. */ while (buf[BUFSIZ-1] == 0 && buf[BUFSIZ-2] != '\n') { buf[BUFSIZ-1] = 127; if (fgets(buf, BUFSIZ, www_fd) == nil) { break; } } } } } /* Remember size for next time. */ www_size = www_stat[0].st_size; } www5_now = gethrtime(); www5_interval = (www5_now - www5_then) * 0.000000001; www5_then = www5_now; dtmp = (httpops - lastops)/www5_interval; if (dtmp > httpops5) { httpops5 = dtmp; } lastops = httpops; } } else { sleep_till_and_count_new_proceses(sleep_till); www5_now = gethrtime(); } www_now = www5_now; www_interval = (www_now - www_then) * 0.000000001; www_then = www_now; /* * Use dtmp to get percentages. */ if (httpops == 0.0) { dtmp = 0.0; } else { dtmp = 100.0 / httpops; } #if WATCH_PROXY || WATCH_SQUID || WATCH_YAHOO for (i=0; i<5; i++) { if (dwnld_size[i] == 0) { www_dwnld_time_by_size[i] = 0.0; } else { www_dwnld_time_by_size[i] = www_dwnld_time_by_size[i]/dwnld_size[i]; } } #endif } int count_proc(string name) { int count; prpsinfo_t p; count = 0; for (p=first_proc(); p.pr_pid != -1; p=next_proc()) { if (p.pr_fname =~ name) { count++; } } return count; } put_httpd() { put_output("#httpds", sprintf("%7d", count_proc(www_server_proc_name))); put_output("httpop/s", sprintf("%8.2f", httpops/www_interval)); put_output("http/p5s", sprintf("%8.2f", httpops5)); put_output("cndget/s", sprintf("%8.2f", httpop_condgets/www_interval)); put_output("search/s", sprintf("%8.3f", httpop_searches/www_interval)); put_output(" cgi/s", sprintf("%8.3f", httpop_cgi_bins/www_interval)); put_output(" htErr/s", sprintf("%8.3f", httpop_errors/www_interval)); put_output(" httpb/s", sprintf("%8.0f", dwnld_totalz/www_interval)); put_output(" %to1KB", sprintf("%8.2f", dtmp*dwnld_size[0])); put_output(" %to10KB", sprintf("%8.2f", dtmp*dwnld_size[1])); put_output("%to100KB", sprintf("%8.2f", dtmp*dwnld_size[2])); put_output(" %to1MB", sprintf("%8.2f", dtmp*dwnld_size[3])); put_output("%over1MB", sprintf("%8.2f", dtmp*dwnld_size[4])); put_output(www_gateway, sprintf("%8.2f", gateops/www_interval)); #if WATCH_PROXY || WATCH_SQUID put_output(" %indir", sprintf("%8.2f", dtmp * prxy_squid_indirect)); put_output("%cch_hit", sprintf("%8.2f", dtmp * prxy_squid_cache_hits)); #ifdef WATCH_PROXY put_output("%cch_wrt", sprintf("%8.2f", dtmp * prxy_cache_writes)); put_output("%cch_unc", sprintf("%8.2f", dtmp * prxy_uncacheable)); #else put_output("%cch_mis", sprintf("%8.2f", dtmp * squid_cache_misses)); put_output("%cch_req", sprintf("%8.2f", dtmp * squid_icp_requests)); put_output("%cch_qry", sprintf("%8.2f", dtmp * squid_icp_queries)); #endif put_output(" xfr_t", sprintf("%8.2f", 0.01 * dtmp * www_dwnld_time_sum)); put_output(" xfr1_t", sprintf("%8.2f", www_dwnld_time_by_size[0])); put_output(" xfr10_t", sprintf("%8.2f", www_dwnld_time_by_size[1])); put_output("xfr100_t", sprintf("%8.2f", www_dwnld_time_by_size[2])); put_output(" xfr1M_t", sprintf("%8.2f", www_dwnld_time_by_size[3])); put_output("xfro1M_t", sprintf("%8.2f", www_dwnld_time_by_size[4])); #elif WATCH_YAHOO put_output(" wprc_t", sprintf("%9.5f", 0.01 * dtmp * www_dwnld_time_sum)); put_output(" wprc1_t", sprintf("%9.5f", www_dwnld_time_by_size[0])); put_output(" wprc10_t", sprintf("%9.5f", www_dwnld_time_by_size[1])); put_output("wprc100_t", sprintf("%9.5f", www_dwnld_time_by_size[2])); put_output(" wprc1M_t", sprintf("%9.5f", www_dwnld_time_by_size[3])); put_output("wprco1M_t", sprintf("%9.5f", www_dwnld_time_by_size[4])); #endif } #endif