diff -r aa909246edb3 configure.in --- a/configure.in Mon Aug 18 14:32:19 2008 +0200 +++ b/configure.in Thu Jan 13 20:58:57 2011 +0900 @@ -3064,6 +3064,7 @@ crm/admin/crm_sh \ crm/admin/crm_utils.py \ crm/admin/crm_commands.py \ + crm/admin/hb_monitor/Makefile \ include/crm/Makefile \ include/crm/common/Makefile \ include/crm/pengine/Makefile \ diff -r aa909246edb3 crm/admin/Makefile.am --- a/crm/admin/Makefile.am Mon Aug 18 14:32:19 2008 +0200 +++ b/crm/admin/Makefile.am Thu Jan 13 20:58:57 2011 +0900 @@ -17,6 +17,9 @@ # MAINTAINERCLEANFILES = Makefile.in +SUBDIRS = hb_monitor +DIST_SUBDIRS = hb_monitor + INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ diff -r aa909246edb3 crm/admin/hb_monitor/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crm/admin/hb_monitor/Makefile.am Thu Jan 13 20:58:57 2011 +0900 @@ -0,0 +1,58 @@ +# +# Copyright (C) 2004 Andrew Beekhof +# Copyright (C) 2008 NIPPON TELEGRAPH AND TELEPHONE CORPORATION +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +MAINTAINERCLEANFILES = Makefile.in + +INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ + -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ + -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ + -I$(top_builddir) -I$(top_srcdir) + +halibdir = $(libdir)/@HB_PKG@ +hasbindir = $(sbindir) +LIBRT = @LIBRT@ +AM_CFLAGS = @CFLAGS@ $(CRM_DEBUG_FLAGS) + +COMMONLIBS = \ + $(top_builddir)/lib/clplumbing/libplumb.la \ + $(top_builddir)/lib/crm/common/libcrmcommon.la \ + $(top_builddir)/lib/crm/cib/libcib.la \ + $(top_builddir)/lib/hbclient/libhbclient.la \ + $(GLIBLIB) \ + $(CURSESLIBS) \ + $(LIBRT) + + +hasbin_PROGRAMS = hb_monitor + +## SOURCES + +#noinst_HEADERS = config.h control.h crmd.h +noinst_HEADERS = + +hb_monitor_SOURCES = hb_monitor.c +hb_monitor_LDADD = $(COMMONLIBS) \ + $(top_builddir)/lib/lrm/liblrm.la \ + $(top_builddir)/lib/crm/pengine/libpe_status.la + +clean-generic: + rm -f *.log *.debug *.xml *~ + +install-exec-local: + +uninstall-local: diff -r aa909246edb3 crm/admin/hb_monitor/hb_monitor.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crm/admin/hb_monitor/hb_monitor.c Thu Jan 13 20:58:57 2011 +0900 @@ -0,0 +1,2256 @@ + +/* + * Copyright (C) 2004 Andrew Beekhof + * Copyright (C) 2008 NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#ifdef HAVE_GETOPT_H +# include +#endif +#include + + +/* GMainLoop *mainloop = NULL; */ +const char *crm_system_name = "hb_monitor"; +#define OPTARGS "V?i:nrh:cdp:s1wX:eoft" + + +void usage(const char *cmd, int exit_status); +void blank_screen(void); +int print_status(crm_data_t *cib); +void print_warn(const char *descr); +int print_simple_status(crm_data_t *cib); +/* #define printw_at(line, fmt...) move(line, 0); printw(fmt); line++ */ +void wait_for_refresh(int offset, const char *prefix, int msec); +int print_html_status(crm_data_t *cib, const char *filename, gboolean web_cgi); +void make_daemon(gboolean daemonize, const char *pidfile); +gboolean mon_timer_popped(gpointer data); +void mon_update(const HA_Message*, int, int, crm_data_t*,void*); +void clean_up(int rc); + +/* EXPANDED_ */ +void print_expand_failcount(gpointer key, gpointer value, gpointer user_data); +void print_expand_attrs_status(gpointer key, gpointer value, gpointer user_data); +void print_expand_if_status(node_t *node, const char *pre_text, long options, void *print_data, pe_working_set_t *pdata_set); +static int init_heartbeat(void); +static void LinkStatus(const char *node, const char *lnk, const char *status, void *private); +static gboolean ifstatus_dispatch(IPC_Channel* ipc, gpointer user_data); +static void ifstatus_dispatch_destroy(gpointer user_data); +static int init_iftable(void); +static int walk_iftable(GListPtr); +static void free_iftable(void); +static void free_storage(void); +/* _EXPANDED */ + +char *xml_file = NULL; +char *as_html_file = NULL; +char *pid_file = NULL; +gboolean as_console = FALSE; +gboolean simple_status = FALSE; +gboolean group_by_node = FALSE; +gboolean inactive_resources = FALSE; +gboolean web_cgi = FALSE; +int interval = 15000; +gboolean daemonize = FALSE; +GMainLoop* mainloop = NULL; +guint timer_id = 0; +cib_t *cib_conn = NULL; +int failed_connections = 0; +gboolean one_shot = FALSE; +gboolean has_warnings = FALSE; +gboolean print_failcount = FALSE; +gboolean print_operations = FALSE; +gboolean print_timing = FALSE; + +int Noption_count = 0; + +/* + * Non-mainloop signal handler. + */ +static void +mon_shutdown_wrapper(int nsig) +{ + clean_up(LSB_EXIT_OK); +} + +/* EXPANDED_ */ +static gboolean expanded = FALSE; +static IPC_Channel *chan = NULL; +static ll_cluster_t *hb = NULL; +const char *self_uname = NULL; +gboolean use_umcast = FALSE; +/* print data table. */ +struct pdata_s +{ + char *pre_text; + long options; + void *print_data; + node_t *node; +}; +/* I/F status information table. */ +struct ifinfo_s { + char *name; + char *node; + char *status; +}; +static GPtrArray *gIFTable = NULL; /* ifinfo_s* */ +/* + * This flag becomes TRUE when the client receives a msg + * which notices some I/F status changed, + * and becomes FALSE gIFTable is updated. + */ +static gboolean need_update_iftable = FALSE; +int node_num = -1; /* node number in the cluster. */ +#define FAIL_COUNT_ATTR "fail-count-" +/* print some message with status information. */ +#define PRINT_MSG 1 +#if PRINT_MSG +/* prototype declarations. */ +static int parse_config_file(void); +static int get_pingnode_num(const char *bp); +static void get_client_options(const char *bp); +static void set_clinfo_table(void); +static void set_clattr_table(gpointer data, gpointer user_data); +static void free_clinfo_table(void); +static void get_optvalue(const char *optp, const char *optname, const char* default_value, char *optvalue); +static gboolean need_parse_cf = FALSE; +/* client attribute table to print messages. */ +struct client_attr_s +{ + char *option_strs; + char *attr_name; + char *correct_value; +}; +/* client information table to print messages. */ +struct client_info_s +{ + const char *client_name; + GList *attrlist; + const char *normal_msg; + const char *err_msg; +}; +/* + * Freezed values to print messages. + * 2nd item, the pointer of the list of client_attr_s, must be NULL, + * because these are non-freezed. + * And one client links with not only one but two or more attributes. + * If you add some clients in this structure, you must write some processes + * in set_clattr_table(). Please see the function's comment for details. + * This struct is possibly unnecessary, but it is useful to collect clients' + * information in one place. + */ +struct client_info_s print_msg_clist[] = { + {"pingd", NULL, "Link is working", "Link is failure !!"}, + {"diskd", NULL, "Disk is working", "Disk is failure !!"} +}; +static int ping_node_num = 0; +#define PINGD_INDEX 0 +#define PINGD_DEF_ATTR "pingd" +#define PINGD_DEF_MULTIPLE "1" +#define PINGD_DEF_PINGTARGET "0.0.0.0" +#define DISKD_INDEX 1 +#define DISKD_DEF_ATTR "diskd" +#define DISKD_CORRECT_VALUE "normal" +#endif /* PRINT_MSG */ +/* _EXPANDED */ + +/* + * Mainloop signal handler. + */ +static gboolean +mon_shutdown(int nsig, gpointer unused) +{ + clean_up(-1); + if (mainloop && g_main_is_running(mainloop)) { + g_main_quit(mainloop); + + } else { + exit(LSB_EXIT_OK); + } + return FALSE; +} + +#if CURSES_ENABLED +# define print_as(fmt...) if(as_console) { \ + printw(fmt); \ + clrtoeol(); \ + refresh(); \ + } else { \ + fprintf(stdout, fmt); \ + } +#else +# define print_as(fmt...) fprintf(stdout, fmt); +#endif + +int +main(int argc, char **argv) +{ + int argerr = 0; + int flag; + +#ifdef HAVE_GETOPT_H + int option_index = 0; + static struct option long_options[] = { + /* Top-level Options */ + {"verbose", 0, 0, 'V'}, + {"help", 0, 0, '?'}, + {"interval", 1, 0, 'i'}, + {"group-by-node", 0, 0, 'n'}, + {"inactive", 0, 0, 'r'}, + {"failcounts", 0, 0, 'f'}, + {"operations", 0, 0, 'o'}, + {"timing-details", 0, 0, 't'}, + {"as-html", 1, 0, 'h'}, + {"web-cgi", 0, 0, 'w'}, + {"simple-status", 0, 0, 's'}, + {"as-console", 0, 0, 'c'}, + {"one-shot", 0, 0, '1'}, + {"daemonize", 0, 0, 'd'}, + {"pid-file", 0, 0, 'p'}, + {"xml-file", 1, 0, 'X'}, + {"expand", 0, 0, 'e'}, + + {0, 0, 0, 0} + }; +#endif + pid_file = crm_strdup("/tmp/hb_monitor.pid"); + crm_system_name = basename(argv[0]); + crm_log_init(crm_system_name, LOG_ERR-1, FALSE, FALSE, 0, NULL); + + if (strcmp(crm_system_name, "hb_monitor.cgi")==0) { + web_cgi = TRUE; + one_shot = TRUE; + } + + while (1) { +#ifdef HAVE_GETOPT_H + flag = getopt_long(argc, argv, OPTARGS, + long_options, &option_index); +#else + flag = getopt(argc, argv, OPTARGS); +#endif + if (flag == -1) + break; + + switch(flag) { + case 'V': + cl_log_enable_stderr(TRUE); + alter_debug(DEBUG_INC); + break; + case 'i': + interval = crm_get_msec(optarg); + break; + case 'n': + group_by_node = TRUE; + break; + case 'r': + inactive_resources = TRUE; + break; + case 'd': + daemonize = TRUE; + break; + case 't': + print_timing = TRUE; + print_operations = TRUE; + break; + case 'o': + print_operations = TRUE; + break; + case 'f': + print_failcount = TRUE; + break; + case 'p': + crm_free(pid_file); + pid_file = crm_strdup(optarg); + break; + case 'X': + xml_file = crm_strdup(optarg); + one_shot = TRUE; + break; + case 'h': + as_html_file = crm_strdup(optarg); + break; + case 'w': + web_cgi = TRUE; + one_shot = TRUE; + break; + case 'c': +#if CURSES_ENABLED + as_console = TRUE; +#else + printf("You need to have curses available at compile time to enable console mode\n"); + argerr++; +#endif + break; + case 's': + simple_status = TRUE; + one_shot = TRUE; + break; + case '1': + one_shot = TRUE; + break; + case '?': + usage(crm_system_name, LSB_EXIT_OK); + break; + case 'e': + expanded = TRUE; + break; + default: + printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); + ++argerr; + break; + } + } + + if (optind < argc) { + printf("non-option ARGV-elements: "); + while (optind < argc) + printf("%s ", argv[optind++]); + printf("\n"); + } + if (argerr) { + usage(crm_system_name, LSB_EXIT_GENERIC); + } + + /* Set signal callback function. */ + signal(SIGTERM, mon_shutdown_wrapper); + signal(SIGINT, mon_shutdown_wrapper); + + if(as_html_file == NULL && !web_cgi && !simple_status) { +#if CURSES_ENABLED + as_console = TRUE; +#else + printf("Defaulting to one-shot mode\n"); + printf("You need to have curses available at compile time to enable console mode\n"); + one_shot = TRUE; +#endif + } + + if(daemonize) { + as_console = FALSE; + } + + if(one_shot) { + daemonize = FALSE; + as_console = FALSE; + } + + if(daemonize && as_html_file == NULL) { + usage(crm_system_name, LSB_EXIT_GENERIC); + } +#if PRINT_MSG + if (expanded && simple_status == FALSE) { + need_parse_cf = TRUE; + } +#endif /* PRINT_MSG */ + + make_daemon(daemonize, pid_file); + +#if CURSES_ENABLED + if(as_console) { + initscr(); + cbreak(); + noecho(); + } +#endif + + crm_info("Starting %s", crm_system_name); + mainloop = g_main_new(FALSE); + + if(one_shot == FALSE) { + timer_id = Gmain_timeout_add( + interval, mon_timer_popped, NULL); + + } else if(xml_file != NULL) { + FILE *xml_strm = fopen(xml_file, "r"); + crm_data_t *cib_object = NULL; + if(strstr(xml_file, ".bz2") != NULL) { + cib_object = file2xml(xml_strm, TRUE); + } else { + cib_object = file2xml(xml_strm, FALSE); + } + if(xml_strm != NULL) { + fclose(xml_strm); + } + one_shot = TRUE; +#if PRINT_MSG + if (need_parse_cf && parse_config_file() != 0) { + crm_err("Unable to parse config file. Cannot print status message for Instance attributes"); + free_clinfo_table(); + } + need_parse_cf = FALSE; +#endif /* PRINT_MSG */ + mon_update(NULL, 0, cib_ok, cib_object, NULL); + } + + mon_timer_popped(NULL); + + G_main_add_SignalHandler( + G_PRIORITY_HIGH, SIGTERM, mon_shutdown, NULL, NULL); + G_main_add_SignalHandler( + G_PRIORITY_HIGH, SIGINT, mon_shutdown, NULL, NULL); + g_main_run(mainloop); + g_main_destroy(mainloop); + return_to_orig_privs(); + + crm_info("Exiting %s", crm_system_name); + +#if CURSES_ENABLED + if(as_console) { + echo(); + nocbreak(); + endwin(); + } +#endif + return 0; +} + +gboolean +mon_timer_popped(gpointer data) +{ + int rc = cib_ok; + int options = cib_scope_local; + + if(timer_id > 0) { + Gmain_timeout_remove(timer_id); + } + + if(as_console) { +#if CURSES_ENABLED + move(0, 0); + printw("Updating...\n"); + clrtoeol(); + refresh(); +#endif + + } else { + crm_notice("Updating..."); + } + + if(cib_conn == NULL) { + crm_debug_4("Creating CIB connection"); + cib_conn = cib_new(); + } + + CRM_DEV_ASSERT(cib_conn != NULL); + if(crm_assert_failed) { + return FALSE; + + } else if(cib_conn->state != cib_connected_query){ + crm_debug_4("Connecting to the CIB"); +#if CURSES_ENABLED + if(as_console) { + printw("Signing on...\n"); + clrtoeol(); + refresh(); + } +#endif + if(cib_ok == cib_conn->cmds->signon( + cib_conn, crm_system_name, cib_query)) { + failed_connections = 0; + + } else if (simple_status || one_shot) { + fprintf(stdout, "Critical: Unable to connect to the CIB\n"); + clean_up(LSB_EXIT_GENERIC); + + } else { + failed_connections++; + CRM_DEV_ASSERT(cib_conn->cmds->signoff(cib_conn) == cib_ok); + wait_for_refresh(0, "Not connected: ", 2*interval); + return FALSE; + } +#if CURSES_ENABLED + if(as_console) { + printw("Querying...\n"); + clrtoeol(); + refresh(); + } +#endif + } + if(as_console) { blank_screen(); } + + if (expanded && simple_status == FALSE) { + /* create I/F status information table. */ + if (gIFTable == NULL) { + if (init_iftable() != 0) { + if (one_shot) { + fprintf(stderr, + "Critical: Unable to create I/F status information table\n"); + free_storage(); + exit(2); + } + wait_for_refresh(0, + "Not created I/F status information table: ", 2*interval); + return FALSE; + } + need_update_iftable = TRUE; + } + /* + * Connect to Heartbeat for getting I/F status information. + */ + if (hb != NULL) { + chan = hb->llc_ops->ipcchan(hb); + } + if (hb == NULL || chan == NULL || chan->ch_status == IPC_DISCONNECT) { +#if CURSES_ENABLED + if(as_console) { + printw("Signing on to Heartbeat...\n"); + clrtoeol(); + refresh(); + } +#endif + /* signon to Heartbeat. */ + if (init_heartbeat() != 0) { + if (one_shot) { + fprintf(stderr, + "Critical: Unable to connect to Heartbeat\n"); + free_storage(); + exit(2); + } + if (hb != NULL) { + CRM_DEV_ASSERT(hb->llc_ops->signoff(hb, TRUE) == HA_OK); + } + wait_for_refresh(0, "Not connected to Heartbeat: ", 2*interval); + return FALSE; + } + if(as_console) { blank_screen(); } + if (one_shot == FALSE) { + /* + * set callback function to notice I/F status changed. + * It takes much time to walk I/F table every timer popped. + */ + if (hb->llc_ops->set_ifstatus_callback(hb, LinkStatus, NULL) + != HA_OK) { + CRM_DEV_ASSERT(hb->llc_ops->signoff(hb, TRUE) == HA_OK); + wait_for_refresh(0, + "Not set I/F status callback: ", 2*interval); + return FALSE; + } + chan = hb->llc_ops->ipcchan(hb); + G_main_add_IPC_Channel(G_PRIORITY_HIGH, chan, FALSE, + ifstatus_dispatch, NULL, ifstatus_dispatch_destroy); + } + need_update_iftable = TRUE; +#if PRINT_MSG + need_parse_cf = TRUE; +#endif /* PRINT_MSG */ + } + while (hb->llc_ops->msgready(hb)) { + struct ha_msg *msg; + if (chan->ch_status == IPC_DISCONNECT) { + return FALSE; + } + msg = hb->llc_ops->readmsg(hb, 0); + if (msg == NULL) { + continue; + } + + ha_msg_del(msg); + msg = NULL; + } + } +#if PRINT_MSG + if (need_parse_cf && parse_config_file() != 0) { + if (one_shot) { + crm_err("Unable to parse config file. Cannot print status message for Instance attributes"); + free_clinfo_table(); + } + else { + wait_for_refresh(0, "Not parsed config file: ", 2*interval); + return FALSE; + } + } + need_parse_cf = FALSE; +#endif /* PRINT_MSG */ + + rc = cib_conn->cmds->query(cib_conn, NULL, NULL, options); + add_cib_op_callback(rc, FALSE, NULL, mon_update); + return FALSE; +} + +void +mon_update(const HA_Message *msg, int call_id, int rc, + crm_data_t *output, void*user_data) +{ + const char *prefix = NULL; + if(rc == cib_ok) { + crm_data_t *cib = NULL; +#if CRM_DEPRECATED_SINCE_2_0_4 + if( safe_str_eq(crm_element_name(output), XML_TAG_CIB) ) { + cib = output; + } else { + cib = find_xml_node(output,XML_TAG_CIB,TRUE); + } +#else + cib = output; + CRM_DEV_ASSERT(safe_str_eq(crm_element_name(cib), XML_TAG_CIB)); +#endif + if(as_html_file || web_cgi) { + if (print_html_status(cib, as_html_file, web_cgi) != 0) { + fprintf(stderr, "Critical: Unable to output html file\n"); + clean_up(LSB_EXIT_GENERIC); + } + } else if (simple_status) { + print_simple_status(cib); + if (has_warnings) { + clean_up(LSB_EXIT_GENERIC); + } + } else { + print_status(cib); + } + if(one_shot) { + clean_up(LSB_EXIT_OK); + } + + + } else if(simple_status) { + fprintf(stderr, "Critical: query failed: %s\n", cib_error2string(rc)); + clean_up(LSB_EXIT_GENERIC); + + } else if(one_shot) { + fprintf(stderr, "Query failed: %s\n", cib_error2string(rc)); + clean_up(LSB_EXIT_OK); + + } else { + CRM_DEV_ASSERT(cib_conn->cmds->signoff(cib_conn) == cib_ok); + if (hb != NULL) { + CRM_DEV_ASSERT(hb->llc_ops->signoff(hb, TRUE) == HA_OK); + } + print_as("Query failed: %s", cib_error2string(rc)); + prefix = "Query failed! "; + + } + wait_for_refresh(0, prefix, interval); +} + +void +wait_for_refresh(int offset, const char *prefix, int msec) +{ + int lpc = msec / 1000; + struct timespec sleept = {1 , 0}; + + if(as_console == FALSE) { + timer_id = Gmain_timeout_add(msec, mon_timer_popped, NULL); + return; + } + + crm_notice("%sRefresh in %ds...", prefix?prefix:"", lpc); + while(lpc > 0) { +#if CURSES_ENABLED + move(0, 0); +/* printw("%sRefresh in \033[01;32m%ds\033[00m...", prefix?prefix:"", lpc); */ + printw("%sRefresh in %ds...\n", prefix?prefix:"", lpc); + clrtoeol(); + refresh(); +#endif + lpc--; + if(lpc == 0) { + timer_id = Gmain_timeout_add( + 1000, mon_timer_popped, NULL); + } else { + if (nanosleep(&sleept, NULL) != 0) { + return; + } + } + } +} + +#define mon_warn(fmt...) do { \ + if (!has_warnings) { \ + print_as("Warning:"); \ + } else { \ + print_as(","); \ + } \ + print_as(fmt); \ + has_warnings = TRUE; \ + } while(0) + +static int get_failcount(node_t *node, resource_t *rsc, int *last_failure, pe_working_set_t *data_set) +{ + int fail_count = 0; + resource_t *failed = rsc; + char *fail_attr = crm_concat("fail-count", rsc->id, '-'); + const char *value = g_hash_table_lookup(node->details->attrs, fail_attr); + + *last_failure = 0; + if(is_not_set(rsc->flags, pe_rsc_unique)) { + failed = uber_parent(rsc); + } + + if(value != NULL) { + fail_count = char2score(value); + crm_info("%s has failed %d times on %s", + rsc->id, fail_count, node->details->uname); + } + crm_free(fail_attr); + return fail_count; +} + +static void get_ping_score(node_t *node, pe_working_set_t *data_set) +{ + const char *attr = "pingd"; + const char *value = NULL; + value = g_hash_table_lookup(node->details->attrs, attr); + + if(value != NULL) { + print_as(" %s=%s", attr, value); + } +} + +static void print_date(time_t time) +{ + int lpc = 0; + char date_str[26]; + asctime_r(localtime(&time), date_str); + for(; lpc < 26; lpc++) { + if(date_str[lpc] == '\n') { + date_str[lpc] = 0; + } + } + print_as("'%s'", date_str); +} + +static void print_rsc_summary(pe_working_set_t *data_set, node_t *node, resource_t *rsc, gboolean all) +{ + gboolean printed = FALSE; + time_t last_failure = 0; + int failcount = get_failcount(node, rsc, (int*)&last_failure, data_set); + + if(all || failcount || last_failure > 0) { + printed = TRUE; + print_as(" %s: ", rsc->id); + } + + if(failcount > 0) { + printed = TRUE; + print_as(" fail-count=%d", failcount); + } + + if(last_failure > 0) { + printed = TRUE; + print_as(" last-failure="); + print_date(last_failure); + } + + if(printed) { + print_as("\n"); + } +} + + +static void print_rsc_history(pe_working_set_t *data_set, node_t *node, crm_data_t *rsc_entry) +{ + GListPtr op_list = NULL; + gboolean print_name = TRUE; + GListPtr sorted_op_list = NULL; + const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); + resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); + + xml_child_iter_filter( + rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, + op_list = g_list_append(op_list, rsc_op); + ); + + sorted_op_list = g_list_sort(op_list, sort_op_by_callid); + + slist_iter(xml_op, crm_data_t, sorted_op_list, lpc, + const char *value = NULL; + const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); + const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); + const char *interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); + int rc = crm_parse_int(op_rc, "0"); + + if(safe_str_eq(task, CRMD_ACTION_STATUS) + && safe_str_eq(interval, "0")) { + task = "probe"; + } + + if(rc == 7 && safe_str_eq(task, "probe")) { + continue; + + } else if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { + continue; + } + + if(print_name) { + print_name = FALSE; + print_rsc_summary(data_set, node, rsc, TRUE); + } + + print_as(" + %s:", task); + if(safe_str_neq(interval, "0")) { + print_as(" interval=%sms", interval); + } + + if(print_timing) { + int int_value; + value = crm_element_value(xml_op, "last_rc_change"); + if(value) { + int_value = crm_parse_int(value, NULL); + print_as(" last-rc-change="); + print_date(int_value); + } + + value = crm_element_value(xml_op, "last_run"); + if(value) { + int_value = crm_parse_int(value, NULL); + print_as(" last-run="); + print_date(int_value); + } + + value = crm_element_value(xml_op, "exec_time"); + if(value) { + int_value = crm_parse_int(value, NULL); + print_as(" exec-time="); + print_date(int_value); + } + + value = crm_element_value(xml_op, "queue_time"); + if(value) { + int_value = crm_parse_int(value, NULL); + print_as(" queue-time="); + print_date(int_value); + } + } + + print_as(" rc=%s (%s)\n", op_rc, execra_code2string(rc)); + + ); + + /* no need to free the contents */ + g_list_free(sorted_op_list); +} + +static void print_node_summary(pe_working_set_t *data_set, gboolean operations) +{ + crm_data_t *lrm_rsc = NULL; + crm_data_t *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); + + if(operations) { + print_as("\nOperations:\n"); + } else { + print_as("\nFailcount summary:\n"); + } + + xml_child_iter_filter( + cib_status, node_state, XML_CIB_TAG_STATE, + node_t *node = pe_find_node_id(data_set->nodes, ID(node_state)); + print_as("* Node %s: ", crm_element_value(node_state, XML_ATTR_UNAME)); + get_ping_score(node, data_set); + print_as("\n"); + + lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); + lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); + + xml_child_iter_filter( + lrm_rsc, rsc_entry, XML_LRM_TAG_RESOURCE, + + if(operations) { + print_rsc_history(data_set, node, rsc_entry); + + } else { + const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); + resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); + print_rsc_summary(data_set, node, rsc, FALSE); + } + ); + ); +} + +int +print_simple_status(crm_data_t *cib) +{ + node_t *dc = NULL; + int nodes_online = 0; + int nodes_standby = 0; + pe_working_set_t data_set; + + set_working_set_defaults(&data_set); + data_set.input = cib; + cluster_status(&data_set); + + dc = data_set.dc_node; + + if(dc == NULL) { + mon_warn("No DC "); + } + + slist_iter(node, node_t, data_set.nodes, lpc2, + if(node->details->standby && node->details->online) { + nodes_standby++; + } else if(node->details->online) { + nodes_online++; + } else { + mon_warn("offline node: %s", node->details->uname); + } + ); + + if (!has_warnings) { + print_as("Ok: %d nodes online", nodes_online); + if (nodes_standby > 0) { + print_as(", %d standby nodes", nodes_standby); + } + print_as(", %d resources configured", + g_list_length(data_set.resources)); + } + + print_as("\n"); + data_set.input = NULL; + cleanup_calculations(&data_set); + return 0; +} + +int +print_status(crm_data_t *cib) +{ + node_t *dc = NULL; + static int updates = 0; + pe_working_set_t data_set; + char *since_epoch = NULL; + time_t a_time = time(NULL); + int configured_resources = 0; + int print_opts = pe_print_ncurses; + if(as_console) { + blank_screen(); + } else { + print_opts = pe_print_printf; + } + + updates++; + set_working_set_defaults(&data_set); + data_set.input = cib; + cluster_status(&data_set); + + dc = data_set.dc_node; + + print_as("\n\n============\n"); + + if(a_time == (time_t)-1) { + cl_perror("set_node_tstamp(): Invalid time returned"); + return 1; + } + + since_epoch = ctime(&a_time); + if(since_epoch != NULL) { + print_as("Last updated: %s", since_epoch); + } + + if(dc == NULL) { + print_as("Current DC: NONE\n"); + } else { + print_as("Current DC: %s (%s)\n", + dc->details->uname, dc->details->id); + } + + slist_iter(rsc, resource_t, data_set.resources, lpc, + if(is_not_set(rsc->flags, pe_rsc_orphan)) { + configured_resources++; + } + ); + + print_as("%d Nodes configured.\n", g_list_length(data_set.nodes)); + print_as("%d Resources configured.\n", configured_resources); + print_as("============\n\n"); + + slist_iter(node, node_t, data_set.nodes, lpc2, + const char *node_mode = "OFFLINE"; + if(node->details->standby && node->details->online) { + node_mode = "standby"; + + } else if(node->details->standby) { + node_mode = "OFFLINE (standby)"; + + } else if(node->details->online) { + node_mode = "online"; + } + + print_as("Node: %s (%s): %s\n", + node->details->uname, node->details->id, + node_mode); + if(group_by_node) { + slist_iter(rsc, resource_t, + node->details->running_rsc, lpc2, + rsc->fns->print( + rsc, "\t", print_opts|pe_print_rsconly, stdout); + ); + if (expanded) { + struct pdata_s pdata; + char pre_text[] = "\t"; + + /* + * this check is included in each print functions too, but + * to avoid print titles in the case of standby && offline. + */ + if (!node->details->online) { + continue; + } + + /* set print data */ + pdata.pre_text = pre_text; + pdata.options = print_opts|pe_print_rsconly; + pdata.print_data = stdout; + pdata.node = node; + + /* Fail-count (at every node) */ + print_as("\t=== FAIL-COUNT ===\n"); + g_hash_table_foreach(node->details->attrs, + print_expand_failcount, &pdata); + + /* Instance attributes (at every node) */ + print_as("\t=== INSTANCE-ATTRIBUTES ===\n"); + g_hash_table_foreach(node->details->attrs, + print_expand_attrs_status, &pdata); + + /* I/F status (at every node) */ + if (xml_file == NULL) { + print_as("\t=== INTERCONNECT-LAN ===\n"); + print_expand_if_status(node, pre_text, + print_opts|pe_print_rsconly, stdout, &data_set); + } + } + } + ); + + if(group_by_node == FALSE && inactive_resources) { + print_as("\nFull list of resources:\n"); + + } else if(inactive_resources) { + print_as("\nInactive resources:\n"); + } + + if(group_by_node == FALSE || inactive_resources) { + print_as("\n"); + slist_iter(rsc, resource_t, data_set.resources, lpc2, + gboolean is_active = rsc->fns->active(rsc, TRUE); + gboolean partially_active = rsc->fns->active(rsc, FALSE); + if(is_set(rsc->flags, pe_rsc_orphan) && is_active == FALSE) { + continue; + + } else if(group_by_node == FALSE) { + if(partially_active || inactive_resources) { + rsc->fns->print(rsc, NULL, print_opts, stdout); + } + + } else if(is_active == FALSE && inactive_resources) { + rsc->fns->print(rsc, NULL, print_opts, stdout); + } + ); + } + + if (expanded && group_by_node == FALSE) { + struct pdata_s pdata; + char pre_text[] = " "; + + /* set print data */ + pdata.pre_text = pre_text; + pdata.options = print_opts; + pdata.print_data = stdout; + + /* Fail-count */ + print_as("\nFail-count:\n"); + slist_iter(node, node_t, data_set.nodes, lpc2, + pdata.node = node; + g_hash_table_foreach(node->details->attrs, + print_expand_failcount, &pdata); + ); + + /* Instance attributes */ + print_as("\nInstance attributes:\n"); + slist_iter(node, node_t, data_set.nodes, lpc2, + pdata.node = node; + g_hash_table_foreach(node->details->attrs, + print_expand_attrs_status, &pdata); + ); + + /* I/F status */ + if (xml_file == NULL) { + print_as("\nInterconnect-LAN:\n"); + slist_iter(node, node_t, data_set.nodes, lpc2, + print_expand_if_status(node, pre_text, print_opts, stdout, &data_set); + ); + } + } + + if(print_operations || print_failcount) { + print_node_summary(&data_set, print_operations); + } + + if(xml_has_children(data_set.failed)) { + print_as("\nFailed actions:\n"); + xml_child_iter(data_set.failed, xml_op, + const char *id = ID(xml_op); + const char *rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); + const char *node = crm_element_value(xml_op, XML_ATTR_UNAME); + const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); + const char *status_s = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); + int status = crm_parse_int(status_s, "0"); + + print_as(" %s (node=%s, call=%s, rc=%s): %s\n", + id, node, call, rc, op_status2text(status)); + ); + } + +#if CURSES_ENABLED + if(as_console) { + refresh(); + } +#endif + data_set.input = NULL; + cleanup_calculations(&data_set); + return 0; +} + +int +print_html_status(crm_data_t *cib, const char *filename, gboolean web_cgi) +{ + FILE *stream; + node_t *dc = NULL; + static int updates = 0; + pe_working_set_t data_set; + char *filename_tmp = NULL; + + if (web_cgi) { + stream=stdout; + fprintf(stream, "Content-type: text/html\n\n"); + + } else { + filename_tmp = crm_concat(filename, "tmp", '.'); + stream = fopen(filename_tmp, "w"); + if(stream == NULL) { + cl_perror("Cannot open %s for writing", filename_tmp); + crm_free(filename_tmp); + return -1; + } + } + + updates++; + set_working_set_defaults(&data_set); + data_set.input = cib; + cluster_status(&data_set); + + dc = data_set.dc_node; + + fprintf(stream, ""); + fprintf(stream, ""); + fprintf(stream, "Cluster status"); +/* content="%d;url=http://webdesign.about.com" */ + fprintf(stream, + "", interval/1000); + fprintf(stream, ""); + + /*** SUMMARY ***/ + + fprintf(stream, "

Cluster summary

"); + { + char *now_str = NULL; + time_t now = time(NULL); + now_str = ctime(&now); + now_str[24] = EOS; /* replace the newline */ + fprintf(stream, "Last updated: %s
\n", now_str); + } + + if(dc == NULL) { + fprintf(stream, "Current DC: NONE
"); + } else { + fprintf(stream, "Current DC: %s (%s)
", + dc->details->uname, dc->details->id); + } + fprintf(stream, "%d Nodes configured.
", + g_list_length(data_set.nodes)); + fprintf(stream, "%d Resources configured.
", + g_list_length(data_set.resources)); + + /*** CONFIG ***/ + + fprintf(stream, "

Config Options

\n"); + + fprintf(stream, "\n"); + fprintf(stream, "\n", + data_set.default_resource_stickiness); + + fprintf(stream, "\n", + data_set.stonith_enabled?"enabled":"disabled"); + + fprintf(stream, "\n", + data_set.symmetric_cluster?"":"a-"); + + fprintf(stream, "\n
Default resource stickiness:%d
STONITH of failed nodes:%s
Cluster is:%ssymmetric
No Quorum Policy:"); + switch (data_set.no_quorum_policy) { + case no_quorum_freeze: + fprintf(stream, "Freeze resources"); + break; + case no_quorum_stop: + fprintf(stream, "Stop ALL resources"); + break; + case no_quorum_ignore: + fprintf(stream, "Ignore"); + break; + } + fprintf(stream, "\n
\n"); + + /*** NODE LIST ***/ + + fprintf(stream, "

Node List

\n"); + fprintf(stream, "
    \n"); + slist_iter(node, node_t, data_set.nodes, lpc2, + fprintf(stream, "
  • "); + if(node->details->standby && node->details->online) { + fprintf(stream, "Node: %s (%s): %s",node->details->uname, node->details->id,"standby\n"); + } else if(node->details->standby) { + fprintf(stream, "Node: %s (%s): %s",node->details->uname, node->details->id,"OFFLINE (standby)\n"); + } else if(node->details->online) { + fprintf(stream, "Node: %s (%s): %s",node->details->uname, node->details->id,"online\n"); + } else { + fprintf(stream, "Node: %s (%s): %s",node->details->uname, node->details->id,"OFFLINE\n"); + } + if(group_by_node) { + fprintf(stream, "
      \n"); + slist_iter(rsc, resource_t, + node->details->running_rsc, lpc2, + fprintf(stream, "
    • "); + rsc->fns->print(rsc, NULL, + pe_print_html|pe_print_rsconly, stream); + fprintf(stream, "
    • \n"); + ); + fprintf(stream, "
    \n"); + if (expanded) { + struct pdata_s pdata; + + /* + * this check is included in each print functions too, but + * to avoid print titles in the case of standby && offline. + */ + if (!node->details->online) { + continue; + } + + /* set print data */ + pdata.pre_text = NULL; + pdata.options = pe_print_html|pe_print_rsconly; + pdata.print_data = stream; + pdata.node = node; + + /* Fail-count (at every node) */ + fprintf(stream, "
      \n"); + fprintf(stream, "
    • "); + fprintf(stream, "=== FAIL-COUNT ==="); + fprintf(stream, "
    • \n"); + g_hash_table_foreach(node->details->attrs, + print_expand_failcount, &pdata); + fprintf(stream, "
    \n"); + + /* Instance Attributes (at every node) */ + fprintf(stream, "
      \n"); + fprintf(stream, "
    • "); + fprintf(stream, "=== INSTANCE-ATTRIBUTES ==="); + fprintf(stream, "
    • \n"); + g_hash_table_foreach(node->details->attrs, + print_expand_attrs_status, &pdata); + fprintf(stream, "
    \n"); + + /* I/F status (at every node) */ + if (xml_file == NULL) { + fprintf(stream, "
      \n"); + fprintf(stream, "
    • "); + fprintf(stream, "=== INTERCONNECT-LAN ==="); + fprintf(stream, "
    • \n"); + print_expand_if_status(node, NULL, pe_print_html|pe_print_rsconly, stream, &data_set); + fprintf(stream, "
    \n"); + } + } + } + fprintf(stream, "
  • \n"); + ); + fprintf(stream, "
\n"); + + if(group_by_node && inactive_resources) { + fprintf(stream, "

(Partially) Inactive Resources

\n"); + + } else if(group_by_node == FALSE) { + fprintf(stream, "

Resource List

\n"); + } + + if(group_by_node == FALSE || inactive_resources) { + slist_iter(rsc, resource_t, data_set.resources, lpc2, + if(group_by_node && rsc->fns->active(rsc, TRUE)) { + continue; + } + rsc->fns->print(rsc, NULL, pe_print_html, stream); + ); + } + + if (expanded && group_by_node == FALSE) { + struct pdata_s pdata; + + /* set print data */ + pdata.pre_text = NULL; + pdata.options = pe_print_html; + pdata.print_data = stream; + + /*** Fail-count List ***/ + fprintf(stream, "

Fail-count List

\n"); + fprintf(stream, "
    \n"); + slist_iter(node, node_t, data_set.nodes, lpc2, + pdata.node = node; + g_hash_table_foreach(node->details->attrs, + print_expand_failcount, &pdata); + ); + fprintf(stream, "
\n"); + + /*** Instance attributes List ***/ + fprintf(stream, "

Instance attributes List

\n"); + fprintf(stream, "
    \n"); + slist_iter(node, node_t, data_set.nodes, lpc2, + pdata.node = node; + g_hash_table_foreach(node->details->attrs, + print_expand_attrs_status, &pdata); + ); + fprintf(stream, "
\n"); + + /*** I/F status List ***/ + if (xml_file == NULL) { + fprintf(stream, "

Interconnect-LAN List

\n"); + fprintf(stream, "
    \n"); + slist_iter(node, node_t, data_set.nodes, lpc2, + print_expand_if_status(node, NULL, pe_print_html, stream, &data_set); + ); + fprintf(stream, "
\n"); + } + } + + data_set.input = NULL; + cleanup_calculations(&data_set); + fprintf(stream, ""); + fflush(stream); + fclose(stream); + + if (!web_cgi) { + if(rename(filename_tmp, filename) != 0) { + cl_perror("Unable to rename %s->%s", filename_tmp, filename); + } + crm_free(filename_tmp); + } + return 0; +} + + +void +blank_screen(void) +{ +#if CURSES_ENABLED + int lpc = 0; + for(lpc = 0; lpc < LINES; lpc++) { + move(lpc, 0); + clrtoeol(); + } + move(0, 0); + refresh(); +#endif +} + + +void +usage(const char *cmd, int exit_status) +{ + FILE *stream; + + stream = exit_status ? stderr : stdout; + + fprintf(stream, "usage: %s [-%s]\n", cmd, OPTARGS); + fprintf(stream, "\t--%s (-%c) \t: This text\n", "help", '?'); + fprintf(stream, "\t--%s (-%c) \t: Increase the debug output\n", "verbose", 'V'); + fprintf(stream, "\t--%s (-%c) \t: Update frequency\n", "interval", 'i'); + fprintf(stream, "\t--%s (-%c) \t: Group resources by node\n", "group-by-node", 'n'); + fprintf(stream, "\t--%s (-%c) \t: Display inactive resources\n", "inactive", 'r'); + fprintf(stream, "\t--%s (-%c) \t: Display resource fail counts\n", "failcount", 'f'); + fprintf(stream, "\t--%s (-%c) \t: Display resource operation history\n", "operations", 'o'); + fprintf(stream, "\t--%s (-%c) \t: Display cluster status on the console\n", "as-console", 'c'); + fprintf(stream, "\t--%s (-%c) \t: Display the cluster status once as " + "a simple one line output (suitable for nagios)\n", "simple-status", 's'); + fprintf(stream, "\t--%s (-%c) \t: Display the cluster status once on " + "the console and exit (doesnt use ncurses)\n", "one-shot", '1'); + fprintf(stream, "\t--%s (-%c) \t: Write cluster status to the named file\n", "as-html", 'h'); + fprintf(stream, "\t--%s (-%c) \t: Web mode with output suitable for cgi\n", "web-cgi", 'w'); + fprintf(stream, "\t--%s (-%c) \t: Run in the background as a daemon\n", "daemonize", 'd'); + fprintf(stream, "\t--%s (-%c) \t: Daemon pid file location\n", "pid-file", 'p'); + fprintf(stream, "\t--%s (-%c) \t: Display fail-count, attribures, and hb-comm status\n", "expand", 'e'); + + fflush(stream); + + clean_up(exit_status); +} + +void +make_daemon(gboolean daemonize, const char *pidfile) +{ + long pid; + const char *devnull = "/dev/null"; + + if (daemonize == FALSE){ + return; + } + + pid = fork(); + if (pid < 0) { + fprintf(stderr, "%s: could not start daemon\n", + crm_system_name); + perror("fork"); + clean_up(LSB_EXIT_GENERIC); + } else if (pid > 0) { + clean_up(LSB_EXIT_OK); + } + + if (cl_lock_pidfile(pidfile) < 0 ){ + pid = cl_read_pidfile(pidfile); + fprintf(stderr, "%s: already running [pid %ld].\n", + crm_system_name, pid); + clean_up(LSB_EXIT_OK); + } + + umask(022); + close(FD_STDIN); + (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ + close(FD_STDOUT); + (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ + close(FD_STDERR); + (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ +} + +/* + * De-init ncurses, signoff from the CIB and deallocate memory. + */ +void clean_up(int rc) +{ +#if CURSES_ENABLED + if(as_console) { + as_console = FALSE; + echo(); + nocbreak(); + endwin(); + } +#endif + + if (cib_conn != NULL) { + cib_conn->cmds->signoff(cib_conn); + cib_delete(cib_conn); + cib_conn = NULL; + } + + crm_free(as_html_file); + crm_free(xml_file); + crm_free(pid_file); + + if(rc >= 0) { + exit(rc); + } + return; +} + +/* + * Print fail-count values of each resources. + */ +void +print_expand_failcount(gpointer key, gpointer value, gpointer user_data) +{ + const char *pre_text = ((struct pdata_s*)user_data)->pre_text; + long options = ((struct pdata_s*)user_data)->options; + void *print_data = ((struct pdata_s*)user_data)->print_data; + node_t *node = ((struct pdata_s*)user_data)->node; + + if (!node->details->online) { + return; + } + + if (strncmp((char *)key, FAIL_COUNT_ATTR, strlen(FAIL_COUNT_ATTR)) == 0) { + if(options & pe_print_html) { + status_print("
  • "); + if (atoi((char *)value) == 0) { + status_print(""); + } + else { + status_print(""); + } + } + /* + * print resource-id and fail-count value. + * resource-id is included fail-count's attribute name. + * (fail-count-) + */ + status_print("%sResourceID: %s (fail-count%s%s:%s)", + pre_text?pre_text:"", + ((char *)key + strlen(FAIL_COUNT_ATTR)), + !(options & pe_print_rsconly)?"::":"", + !(options & pe_print_rsconly)?node->details->uname:"", + (char *)value + ); +#if PRINT_MSG + if (strcmp((char *)value, "0") != 0) { + status_print(":\t%s", "Resource is failure !!"); + } +#endif /* PRINT_MSG */ + if(options & pe_print_html) { + status_print(""); + status_print("
  • "); + } + status_print("\n") + } + + return; +} + +/* + * Print instance attributes. + */ +void +print_expand_attrs_status(gpointer key, gpointer value, gpointer user_data) +{ + const char *pre_text = ((struct pdata_s*)user_data)->pre_text; + long options = ((struct pdata_s*)user_data)->options; + void *print_data = ((struct pdata_s*)user_data)->print_data; + node_t *node = ((struct pdata_s*)user_data)->node; + + if (!node->details->online) { + return; + } + /* + * To avoid redundancy, don't print some attrs. + * Some major attrs (like uname, is_dc etc.) has "#" at the head + * in the g_hash_table. + * (see: add_node_attrs() ./lib/crm/pengine/unpack.c) + */ + if ((strcmp((char *)key, "#"XML_ATTR_UNAME) != 0) && + (strcmp((char *)key, "#"XML_ATTR_ID) != 0) && + (strcmp((char *)key, "#"XML_ATTR_DC) != 0) && + (strncmp((char *)key, FAIL_COUNT_ATTR, strlen(FAIL_COUNT_ATTR)) != 0)) { + if(options & pe_print_html) { + status_print("
  • "); + } + status_print( "%sattribute (%s%s%s:%s)", + pre_text?pre_text:"", + (char *)key, + !(options & pe_print_rsconly)?"::":"", + !(options & pe_print_rsconly)?node->details->uname:"", + (char *)value + ); +#if PRINT_MSG + { + int i; + int j; + int listlen; + GList *plist = NULL; + gboolean printed = FALSE; + + for (i = 0; i < DIMOF(print_msg_clist); i++) { + plist = print_msg_clist[i].attrlist; + if (plist == NULL) { + continue; + } + listlen = g_list_length(plist); + for (j = 0; j < listlen; j ++) { + struct client_attr_s *c_attr = g_list_nth_data(plist, j); + if (c_attr != NULL && c_attr->attr_name != NULL && + c_attr->correct_value != NULL && + strcmp((char *)key, c_attr->attr_name) == 0) { + status_print(":\t%s", + (strcmp(value, c_attr->correct_value) == 0)?print_msg_clist[i].normal_msg:print_msg_clist[i].err_msg); + printed = TRUE; + break; + } + } + if (printed) { + break; + } + } + } +#endif /* PRINT_MSG */ + if(options & pe_print_html) { + status_print("
  • "); + } + status_print("\n") + } + + return; +} + +/* + * Print I/F status information. + */ +void +print_expand_if_status(node_t *node, const char *pre_text, long options, void *print_data, pe_working_set_t *pdata_set) +{ + int i; + struct ifinfo_s *ifinfo; + + if (!node->details->online) { + return; + } + if (gIFTable == NULL) { + return; + } + + /* if membership has changed, update I/F table. */ + if (node_num != g_list_length(pdata_set->nodes)) { + need_update_iftable = TRUE; + node_num = g_list_length(pdata_set->nodes); + } + /* I/F status information has changed, so update it's table. */ + if (need_update_iftable && walk_iftable(pdata_set->nodes) != 0) { + crm_err("Unable to walk I/F status information table"); + free_iftable(); + } + + for (i = 0; i < gIFTable->len; i++) { + ifinfo = (struct ifinfo_s *)g_ptr_array_index(gIFTable, i); + if (strcmp(ifinfo->node, node->details->uname) != 0) { + continue; + } + if (options & pe_print_html) { + status_print("
  • "); + if (safe_str_eq(ifinfo->status, LINKUP)) { + status_print(""); + } + else { + status_print(""); + } + } + status_print("%sheartbeat-comm (%s%s%s:%s)", + pre_text?pre_text:"", + ifinfo->name, + !(options & pe_print_rsconly)?"::":"", + !(options & pe_print_rsconly)?ifinfo->node:"", + ifinfo->status + ); +#if PRINT_MSG + /* LINKUP or DEADSTATUS */ + status_print(":\t%s", + (safe_str_eq(ifinfo->status, LINKUP))?"Heartbeat is working":"Heartbeat is failure !!"); +#endif /* PRINT_MSG */ + if(options & pe_print_html) { + status_print(""); + status_print("
  • "); + } + status_print("\n"); + } + return; +} + +/* + * Signon to Heartbeat. + */ +static int +init_heartbeat(void) +{ + struct passwd *hauser_pw; + + if (hb != NULL) { + CRM_DEV_ASSERT(hb->llc_ops->signoff(hb, TRUE) == HA_OK); + CRM_DEV_ASSERT(hb->llc_ops->delete(hb) == HA_OK); + hb = NULL; + } + crm_debug_4("Creating Heartbeat connection"); + hb = ll_cluster_new("heartbeat"); + + CRM_DEV_ASSERT(hb != NULL); + if(crm_assert_failed) { + return -1; + } + + /* + * To signon as casual client, it is necessary to be general user. + * If you signon as non-casual client, only one user can monitor + * cluster's status with this tool... + */ + hauser_pw = getpwnam(HA_CCMUSER); + if (setegid(hauser_pw->pw_gid) != 0) { + cl_perror("Cannot set egid to [%d]", hauser_pw->pw_gid); + return -1; + } + if (seteuid(hauser_pw->pw_uid) != 0) { + cl_perror("Cannot set euid to [%d]", hauser_pw->pw_uid); + return -1; + } + /* Now, signon! */ + if (hb->llc_ops->signon(hb, NULL) != HA_OK) { + cl_perror("Cannot signon to Heartbeat"); + return -1; + } + + self_uname = hb->llc_ops->get_mynodeid(hb); + if(self_uname == NULL) { + cl_perror("failed to get self node uname"); + return -1; + } + + return 0; +} + +/* + * Create a new array for I/F status information table. + * return 0: succeed. + * -1: an error occured. + */ +static int +init_iftable() +{ + gIFTable = g_ptr_array_new(); + if (gIFTable == NULL) { + return -1; + } + return 0; +} + +/* + * Free I/F status information table. + */ +static void +free_iftable(void) +{ + struct ifinfo_s *ifinfo; + if (gIFTable == NULL) { + return; + } + while (gIFTable->len) { + ifinfo = (struct ifinfo_s *)g_ptr_array_remove_index_fast(gIFTable, 0); + crm_free(ifinfo->name); + crm_free(ifinfo->node); + crm_free(ifinfo->status); + cl_free(ifinfo); + } + return; +} + +/* + * Get all I/F status information about all nodes in the cluster. + * return 0: getting I/F status info is succeed. + * -1: an error occured. + */ +static int +walk_iftable(GListPtr nodelist) +{ + const char *name, *status; + struct ifinfo_s *ifinfo; + int i; + + if (gIFTable) { + free_iftable(); + } + + slist_iter(node, node_t, nodelist, lpc1, + if (use_umcast && strcmp(self_uname, node->details->uname) == 0) { + continue; + } + + if (hb->llc_ops->init_ifwalk(hb, node->details->uname) != HA_OK) { + cl_perror("Cannot start if walk"); + return -1; + } + + while ((name = hb->llc_ops->nextif(hb))!= NULL) { + for (i = 0; i < gIFTable->len; i++) { + ifinfo = (struct ifinfo_s *)g_ptr_array_index(gIFTable, i); + if (strcmp(ifinfo->node, node->details->uname) == 0 && + strcmp(ifinfo->name, name) == 0) { + goto skip; + } + } + status = hb->llc_ops->if_status(hb, node->details->uname, name); + ifinfo = (struct ifinfo_s *) cl_malloc(sizeof(struct ifinfo_s)); + if (ifinfo == NULL) { + cl_perror("Cannot malloc for if info"); + return -1; + } + ifinfo->name = crm_strdup(name); + ifinfo->node = crm_strdup(node->details->uname); + ifinfo->status = crm_strdup(status); + g_ptr_array_add(gIFTable, (gpointer *)ifinfo); +skip: + continue; + } + if (hb->llc_ops->end_ifwalk(hb) != HA_OK) { + cl_perror("Cannot end if walk"); + return -1; + } + ); + need_update_iftable = FALSE; + return 0; +} + +/* + * Set a flag to update iftable. + * This function is called when the client receives and reads a message from + * heartbeat which notices I/F status is changed. + */ +static void +LinkStatus(const char *node, const char *lnk, const char *status, void *private) +{ + need_update_iftable = TRUE; + return; +} + +/* + * This function is called when the client receives a message from heartbeat + * which notices I/F staus is changed. + * And read the message with readmsg(), then, LinkStatus() function is called. + * return TRUE : read message and call LinkStatus() is succeed. + * FALSE: connection with heartbeat is cut off. then, + * ifstatus_dispatch_destoy() is called. + */ +static gboolean +ifstatus_dispatch(IPC_Channel* ipc, gpointer user_data) +{ + struct ha_msg *msg; + + while (hb->llc_ops->msgready(hb)) { + + if (ipc->ch_status == IPC_DISCONNECT) { + return FALSE; + } + msg = hb->llc_ops->readmsg(hb, 0); + if (msg == NULL) { + continue; + } + + ha_msg_del(msg); + msg = NULL; + } + + return TRUE; +} + +/* + * This function is called the following case. + * The client receives a message from heartbeat, but the connection is cut off. + */ +static void +ifstatus_dispatch_destroy(gpointer user_data) +{ + /* do nothing. */ + return; +} + +/* + * Free various memory storages. + */ +static void +free_storage(void) +{ + +#if PRINT_MSG + free_clinfo_table(); +#endif /* PRINT_MSG */ + + free_iftable(); + if (gIFTable != NULL) { + g_ptr_array_free(gIFTable, 1); + } + gIFTable = NULL; + + return; +} + +/********** + * The following is functions for printing messages. + *********/ +#if PRINT_MSG +/* + * Parse respawn command strings, and get its option strings. + * Then, copy them to client information table for printing message. + */ +static void +get_client_options(const char *bp) +{ + const char *cmdname, *optp = NULL; + size_t pathlen; + char cmdpath[MAXLINE]; + struct client_attr_s *c_attr = NULL; + int i; + + /* parse cmdpath and get cmdname */ + strcpy(cmdpath, bp); + pathlen = strcspn(cmdpath, WHITESPACE); + cmdpath[pathlen] = EOS; + cmdname = strrchr(cmdpath, '/'); + if (cmdname == NULL) { + return; + } + cmdname += 1; /* skip over a slash to move the head of command name. */ + + /* get the head of option strings. */ + optp = bp + pathlen; + optp += strspn(optp, WHITESPACE); + + /* copy each client's option strings. */ + for (i = 0; i < DIMOF(print_msg_clist); i++) { + if (strcmp(cmdname, print_msg_clist[i].client_name) == 0) { + crm_malloc0(c_attr, sizeof(struct client_attr_s)); + c_attr->option_strs = crm_strdup(optp); + print_msg_clist[i].attrlist = + g_list_append(print_msg_clist[i].attrlist, c_attr); + } + } + + return; +} + +/* + * Get option strings of each respawn client and set them to + * client information table. + */ +static void +set_clinfo_table(void) +{ + int i; + + for (i = 0; i < DIMOF(print_msg_clist); i++) { + if (print_msg_clist[i].attrlist != NULL) { + g_list_foreach(print_msg_clist[i].attrlist, + (GFunc)set_clattr_table, &i); + } + } +} + +/* + * Parse respawn commands' option strings, and get specified values. + * And set them to client attribute table for printing message. + * For the present, the targets to print message are the following clients. + * - pingd + * - diskd + * If you want to add target client, do the following. + * 1. add client name and normal/error messages in the print_msg_clist[]. + * 2. write some process to set attribute name and its value in this function. + */ +static void +set_clattr_table(gpointer data, gpointer user_data) +{ + char optvalue[MAXLINE]; + int index = *(int *)user_data; + struct client_attr_s *c_attr = NULL; + char *optp = NULL; + + if (data == NULL) { + return; + } + + /* Initialization */ + c_attr = (struct client_attr_s *)data; + optp = c_attr->option_strs; + crm_free(c_attr->attr_name); + crm_free(c_attr->correct_value); + + /* + * set each client's attribute name and correct value. + * it's ugly... but optargs for attribute name and correct value are + * different every client. + */ + if (index == PINGD_INDEX) { + /* + * here is for pingd client. + */ + char correct_value[MAXLINE]; + int moption_value = 0; + + /* set attribute name from ha.cf */ + get_optvalue(optp, "-a", PINGD_DEF_ATTR, optvalue); + c_attr->attr_name = crm_strdup(optvalue); + + /* set multiple * ping-node-number value */ + get_optvalue(optp, "-m", PINGD_DEF_MULTIPLE, optvalue); + moption_value = atoi(optvalue); + Noption_count = 0; + get_optvalue(optp, "-N", PINGD_DEF_PINGTARGET, optvalue); + get_optvalue(optp, "-h", PINGD_DEF_PINGTARGET, optvalue); + + if (Noption_count == 0) { + sprintf(correct_value, "%d", (ping_node_num * moption_value)); + } else { + sprintf(correct_value, "%d", (Noption_count * moption_value)); + } + c_attr->correct_value = crm_strdup(correct_value); + } + if (index == DISKD_INDEX) { + /* + * here is for diskd client. + */ + + /* set attribute name from ha.cf */ + get_optvalue(optp, "-a", DISKD_DEF_ATTR, optvalue); + c_attr->attr_name = crm_strdup(optvalue); + + /* diskd's correct value is freezed. */ + c_attr->correct_value = crm_strdup(DISKD_CORRECT_VALUE); + } + /* + * If you want to add target client, write some process here + * to set it's attribute name and its correct value. + */ + + return; +} + +/* + * Get value of specified option. + */ +static void +get_optvalue(const char *optp, const char *optname, const char* default_value, char *optvalue) +{ + const char *optvalp; + char skip_chars[MAXLINE]; + gboolean in_quotes = FALSE; + size_t strlength = 0; + + optvalp = optp; + *optvalue = '\0'; + while (strlen(optvalp) != 0) { + /* skip over Whitespaces. */ + optvalp += strspn(optvalp, WHITESPACE); + + if (strncmp(optvalp, optname, strlen(optname)) == 0) { + if (strncmp(optname, "-N", strlen(optname)) == 0 || + strncmp(optname, "-h", strlen(optname)) == 0) { + Noption_count ++; + } + *optvalue = '\0'; + + /* if found, get and set specified value. */ + /* move to the tail of option string. */ + optvalp += strlen(optname); + /* skip over Whitespaces. */ + optvalp += strspn(optvalp, WHITESPACE); + + while (1) { + if (strspn(optvalp, WHITESPACE) != 0 || strlen(optvalp) == 0) { + break; + } + /* if the value is put in quotes, get it without quotes. */ + if (optvalp[0] == '\"' || optvalp[0] == '\'') { + in_quotes = TRUE; + strncpy(skip_chars, optvalp, 1); + skip_chars[1] = EOS; + optvalp++; /* skip a quote character */ + } else { + strncpy(skip_chars, WHITESPACE"\"\'", + strlen(WHITESPACE) + 2); + skip_chars[strlen(WHITESPACE) + 2] = EOS; + } + + strlength = strcspn(optvalp, skip_chars); + strncat(optvalue, optvalp, strlength); + optvalp += strlength; + + if (in_quotes && strlen(optvalp) > 0) { + optvalp++; /* skip a quote character */ + in_quotes = FALSE; + } + + } + + } + /* check next characters. */ + optvalp += strcspn(optvalp, WHITESPACE); + } + + if (strlen(optvalue) == 0) { + /* if not found, set default value. */ + strncpy(optvalue, default_value, strlen(default_value) + 1); + } + + return; +} + +/* + * Free client attribute table for printing message. + */ +static void +free_attrlist(gpointer *data) +{ + struct client_attr_s *c_attr = NULL; + if (data == NULL) { + return; + } + c_attr = (struct client_attr_s *)data; + crm_free(c_attr->option_strs); + crm_free(c_attr->attr_name); + crm_free(c_attr->correct_value); + return; +} + +/* + * Free client information table for printing message. + */ +static void +free_clinfo_table(void) +{ + int i; + + for (i = 0; i < DIMOF(print_msg_clist); i++) { + g_list_foreach(print_msg_clist[i].attrlist, + (GFunc)free_attrlist, NULL); + print_msg_clist[i].attrlist = NULL; + } + return; +} + +/* + * Read and parse config file (ha.cf). + * return 0: parse is succeed. + * -1: an error occured. + * Refered to parse_config() in heartbeat/config.c + */ +static int +parse_config_file(void) +{ + FILE *fd; + char directive[MAXLINE]; + size_t dirlength; + char buff[MAXLINE]; + + /* init ping_node_num */ + ping_node_num = 0; + use_umcast = FALSE; + + /* init client info table */ + free_clinfo_table(); + + /* get respawn modules' info from ha.cf */ + if ((fd = fopen(CONFIG_NAME, "r")) == NULL) { + cl_perror("Cannot open config file [%s]", CONFIG_NAME); + return -1; + } + + while (fgets(buff, MAXLINE, fd) != NULL) { + char *bp = buff; + char *cp; + + /* Skip over white space */ + bp += strspn(bp, WHITESPACE); + + /* Zap comments on the line */ + if ((cp = strchr(bp, COMMENTCHAR)) != NULL) { + *cp = EOS; + } + + /* Strip '\n' and '\r' chars */ + if ((cp = strpbrk(bp, CRLF)) != NULL) { + *cp = EOS; + } + + /* Ignore blank (and comment) lines */ + if (*bp == EOS) { + continue; + } + + /* Now we expect a directive name */ + dirlength = strcspn(bp, WHITESPACE); + strncpy(directive, bp, dirlength); + directive[dirlength] = EOS; + + if (strcmp(directive, PINGNODE) == 0) { + const char *pnodes = NULL; + + pnodes = bp + strcspn(bp, WHITESPACE); + pnodes += strspn(pnodes, WHITESPACE); + ping_node_num += get_pingnode_num(pnodes); + continue; + } else if(strcmp(directive, "ping_group") == 0) { + ping_node_num++; + continue; + } + + if (strcmp(directive, "ucast") == 0 || strcmp(directive, "mcast") == 0) { + use_umcast = TRUE; + continue; + } + + if (strcmp(directive, KEY_CLIENT_CHILD) != 0) { + continue; + } + + /* + * Now, bp is the head of respawn client's information. + * move it to the head of command strings. + */ + bp += dirlength; /* move bp to the tail of directive. */ + bp += strspn(bp, DELIMS); /* skip over Delimiters. */ + bp += strcspn(bp, WHITESPACE); /* move bp to the tail of uid. */ + bp += strspn(bp, WHITESPACE); /* skip over Whitespaces. */ + + /* + * Now, bp is the head of command strings. + * So, parse it and get respawn client's option strings. + */ + get_client_options(bp); + } + fclose(fd); + + /* set each information to client information table. */ + set_clinfo_table(); + return 0; +} + +/* + * Parse a line which start with "ping" directive, and count ping nodes. + * return : ping node num + */ +static int +get_pingnode_num(const char *head) +{ + int pnum = 0; + size_t s; + + while (strlen(head) != 0) { + s = strcspn(head, WHITESPACE); /* get non-Whitespace string's length. */ + if (s != 0) { + pnum++; + head += s; /* move to the tail of the string. */ + head += strspn(head, WHITESPACE); /* skip over Whitespaces. */ + } + } + return pnum; +} + +#endif /* PRINT_MSG */ diff -r aa909246edb3 crm/admin/hb_monitor/hb_monitor.spec --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crm/admin/hb_monitor/hb_monitor.spec Thu Jan 13 20:58:57 2011 +0900 @@ -0,0 +1,85 @@ +######################################## +# Derived definitions +######################################## +%define name hb-monitor +%define version 1.03 +%define release 1.hb214 +%define prefix /usr +%define CMDNAME hb_monitor +%define ORGARCH Heartbeat-2-1-STABLE-2.1.4 +# +# +# +Summary: Heartbeat Cluster Status Monitor +Name: %{name} +Version: %{version} +Release: %{release} +Group: Applications +Source: %{ORGARCH}.tar.gz +Patch: %{name}-%{version}-%{release}.patch +License: GPL/LGPL +Vendor: NIPPON TELEGRAPH AND TELEPHONE CORPORATION +BuildRoot: %{_tmppath}/%{name}-%{version} +BuildRequires: autoconf, automake libtool +Requires: heartbeat = 2.1.4 +#BuildArch: i386 +#BuildArch: x86_64 + +######################################## +%description +######################################## +Cluster Status Monitor for Heartbeat. + +######################################## +%prep +######################################## +rm -rf $RPM_BUILD_ROOT +%setup -q -n %{ORGARCH} +%patch -p1 +pushd $RPM_BUILD_DIR/%{ORGARCH} +./ConfigureMe bootstrap +popd + +######################################## +%build +######################################## +pushd $RPM_BUILD_DIR/%{ORGARCH} +make DESTDIR=$RPM_BUILD_ROOT +popd + +######################################## +%install +######################################## +pushd $RPM_BUILD_DIR/%{ORGARCH}/crm/admin/%{CMDNAME} +make DESTDIR=$RPM_BUILD_ROOT install +popd + +######################################## +%clean +######################################## +if + [ -n "${RPM_BUILD_ROOT}" -a "${RPM_BUILD_ROOT}" != "/" ] +then + rm -rf $RPM_BUILD_ROOT +fi +rm -rf $RPM_BUILD_DIR/%{ORGARCH} + +######################################## +%post +######################################## +true +######################################## +%preun +######################################## +true +######################################## +%postun +######################################## +true + +######################################## +%files +######################################## +%defattr(-,root,root) +%{prefix}/sbin/%{CMDNAME} + diff -r aa909246edb3 heartbeat.spec.in --- a/heartbeat.spec.in Mon Aug 18 14:32:19 2008 +0200 +++ b/heartbeat.spec.in Thu Jan 13 20:58:57 2011 +0900 @@ -498,6 +498,7 @@ %{sbindir}/crm_failcount %{sbindir}/crm_sh %{sbindir}/ha_logger +%{sbindir}/hb_monitor %dir %attr (755, %{HA_CCMUSER}, %{HA_APIGROUP}) %{HA_VARRUNDIR}/ccm %dir %attr (750, %{HA_CCMUSER}, %{HA_APIGROUP}) %{HA_VARRUNDIR}/crm %dir %attr (750, %{HA_CCMUSER}, %{HA_APIGROUP}) %{localstatedir}/lib/%{HB_PKG}/crm