[CI] [PATCH] DLM-CI integration

Discussion:

Aneesh Kumar K.V

2002-03-27 10:01:10 UTC

Hi,

Attaching below is the latest patch.

known problem : Nothing :) [ I have to run some wonderful test cases now
]

-aneesh

diff -Nru --exclude=test dlm.old/source/Makefile dlm/source/Makefile
--- dlm.old/source/Makefile Wed Dec 5 02:27:47 2001
+++ dlm/source/Makefile Fri Mar 8 12:46:20 2002
@@ -55,7 +55,11 @@

KERNEL_DIRS = dlmdk dlmcccp dlmkernapi
+ifeq ($(CLUSTMGR),ci-linux)
+USER_DIRS = dlmdu libdlm
+else
USER_DIRS = dlmdu dlmdcl libdlm
+endif

.PHONY: clean kernel user kernel_dep user_dep $(KERNEL_DIRS) $(USER_DIRS)

diff -Nru --exclude=test dlm.old/source/dlmcccp/cccp_udp.c dlm/source/dlmcccp/cccp_udp.c
--- dlm.old/source/dlmcccp/cccp_udp.c Wed Oct 10 01:53:31 2001
+++ dlm/source/dlmcccp/cccp_udp.c Thu Mar 21 20:07:29 2002
@@ -25,6 +25,11 @@
*/
#ident "$Id: cccp_udp.c,v 1.14 2001/10/09 20:23:31 idr Exp $"

+/*
+ *21March02 Kai-Min Sung <Kai-***@compaq.com>
+ *make cccp_poll_thread() smarter about incoming signals.
+ */
+
/*
* COMPONENT_NAME: LIB
*
@@ -163,9 +168,23 @@
msg_header.msg_controllen = 0;
msg_header.msg_flags = 0;

+recvmsg:
bytes = CCCP_SOCK_RECVMSG( cccp_our_sock, & msg_header, bufferSize, 0 );
if ( bytes < 0 )
{
+
+ /* If we get any signals, other than SIGINT, just flush them and
+ * restart the socket read.
+ */
+ if ( ((-bytes == EINTR) || (-bytes == ERESTARTSYS))
+ && !sigismember(&current->pending.signal, SIGINT)) {
+ unsigned long _flags;
+ spin_lock_irqsave(&current->sigmask_lock, _flags);
+ flush_signals(current);
+ spin_unlock_irqrestore(&current->sigmask_lock, _flags);
+ goto recvmsg;
+ }
+
/* The poll task gets hit with a signal and cccp_time_to_die gets
* set when it's time for CCCP to shutdown. In this case, we should
* NOT log message that the recvmsg call failed. Otherwise, log a
@@ -347,7 +366,7 @@
}
else if ( err != 0 )
{
- cccp_time_to_die = TRUE;
+ cccp_time_to_die = TRUE;
}
}
}
diff -Nru --exclude=test dlm.old/source/dlmdk/Makefile dlm/source/dlmdk/Makefile
--- dlm.old/source/dlmdk/Makefile Sat Nov 3 03:43:55 2001
+++ dlm/source/dlmdk/Makefile Fri Mar 8 12:53:43 2002
@@ -42,21 +42,23 @@
endif

DEFS = -DDBPRINT
-#DEFS += -DDEBUG
+DEFS += -DDEBUG

+KERN_HEADER=/usr/src/linux/include
ifneq ($(SOURCE_PATH),)
VPATH = $(SOURCE_PATH)/dlmdk
-INC = -I. -I$(SOURCE_PATH)/dlmdk -I$(SOURCE_PATH)/include
+INC = -I. -I$(SOURCE_PATH)/dlmdk -I$(SOURCE_PATH)/include -I$(KERN_HEADER)
MKDEP = $(SOURCE_PATH)/mkdep
DLM_DEVMSG_H = $(SOURCE_PATH)/include/dlm_devmsg.h
CLMSTRUCTS_H = $(SOURCE_PATH)/include/clmstructs.h
else
-INC = -I. -I../include
+INC = -I. -I../include -I$(KERN_HEADER)
MKDEP = ../mkdep
DLM_DEVMSG_H = ../include/dlm_devmsg.h
CLMSTRUCTS_H = ../include/clmstructs.h
endif

+
OPT = -O2
DEBUG = -Wall
LIBS =
diff -Nru --exclude=test dlm.old/source/dlmdk/clm_info.c dlm/source/dlmdk/clm_info.c
--- dlm.old/source/dlmdk/clm_info.c Wed Oct 24 03:51:44 2001
+++ dlm/source/dlmdk/clm_info.c Fri Mar 8 14:21:55 2002
@@ -652,6 +652,7 @@
#endif

node_count = clm_node_count();
+
#ifdef DEBUG
count = node_count;
#endif
@@ -666,6 +667,7 @@
i = (i + 1) % node_count;
node_id = INDEX_TO_NODEID( i );

+
#ifdef DEBUG
CLM_ASSERT( count != 0, "Too many loop iterations!" );
count--;
diff -Nru --exclude=test dlm.old/source/dlmdk/dlm_kerndd.c dlm/source/dlmdk/dlm_kerndd.c
--- dlm.old/source/dlmdk/dlm_kerndd.c Sat Oct 20 04:12:38 2001
+++ dlm/source/dlmdk/dlm_kerndd.c Fri Mar 22 18:47:42 2002
@@ -71,6 +71,12 @@
* proc_print_vdevice
*/

+/* 8 MARCH 2002 ( ***@digital.com)
+ * Added support for kernel cluster manager
+ *define KERN_CLMGR in the user space and
+ * and CONFIG_CLMS in the kernel space for Cluster Infrastructure for Linux
+ */
+
/* ============================================================ */
#ifndef __KERNEL__
#define __KERNEL__
@@ -100,6 +106,14 @@
#include <linux/timer.h>
#include <asm/uaccess.h>

+
+#ifdef CONFIG_CLMS
+#include <cluster/clms.h>
+/* To avoid conflicting type with nsc.h and dlm header files*/
+#define BOOL_T_DEFINED
+#endif
+
+
#include "dlm.h"
#include "ast.h"
#include "ast_stub.h"
@@ -112,6 +126,9 @@
#include "dlm_devmsg.h"
#include "dlm_workqueue.h"
#include "cltrc.h"
+#ifdef CONFIG_CLMS
+#include "dlm_clust.h"
+#endif

#include "dlm_proto.h" /* Should be LAST included file. */

@@ -131,6 +148,11 @@
int haDLM_admin_minor = 0; /* Minor number for "admin" device */
int haDLM_locks_minor = 0; /* Minor number for "locks" device */

+#ifdef CONFIG_CLMS
+ static MQ_DLM_TOP_INFO_t *cli_topo;
+ static int first_clmgr_call = 1 ;
+#endif
+
MODULE_AUTHOR("Peter Wombat");
MODULE_DESCRIPTION("The ha Distributed Lock Manager device.");
MODULE_PARM(haDLM_major_number, "i"); /* an int */
@@ -164,6 +186,22 @@
struct file *file,
unsigned int cmd,
unsigned long arg);
+#ifdef CONFIG_CLMS
+
+int dlm_cli_nodeup( void *clms_handle,
+ int service,
+ clusternode_t node,
+ clusternode_t surrogate,
+ void *private) ;
+int dlm_cli_nodedown( void *clms_handle,
+ int service,
+ clusternode_t node,
+ clusternode_t surrogate,
+ void *private) ;
+
+MQ_DLM_TOP_INFO_t * get_upnode_list(void);
+#endif
+

/* ------------------------------------------------------------ */
/*
@@ -231,6 +269,7 @@

static unsigned long jiffies_per_second;

+
/* ------------------------------------------------------------ */
/*
* Keep track of admin client.
@@ -328,6 +367,7 @@
{
int _i;

+
if ( haDLM_name == NULL ) {
bsdlog( LOG_ERR, "haDLM_name not set by module loader. Terminating.\n" );
return( -1 );
@@ -363,12 +403,24 @@
dlm_proc_start( haDLM_name );
#endif /* CONFIG_PROC_FS */

- printk("[%s] Success, register device [%d/%s]\n",
- haDLM_name,
- haDLM_major_number,
- haDLM_name);

- return(0);
+#ifdef CONFIG_CLMS
+ return (register_clms_subsys("dlm",
+ -1,
+ dlm_cli_nodeup,
+ dlm_cli_nodedown,
+ NULL,
+ NULL,
+ NULL));
+
+#endif
+
+ printk("[%s] Success, register device [%d/%s]\n",
+ haDLM_name,
+ haDLM_major_number,
+ haDLM_name);
+
+ return(0);
}

/* ------------------------------------------------------------ */
@@ -792,8 +844,10 @@
dlm_MSG_t *message;
dlm_MSG_types_t command;
MQ_DLM_TOP_INFO_t *topo;
+
dlm_workunit_t *wu;

+
_rc = lbuf;

_device = (struct our_vdevice *)filp->private_data;
@@ -868,7 +922,9 @@
haDLM_name,
kproc_pid);
}
+#ifndef CONFIG_CLMS
haDLM_allow_locks = 1;
+#endif
break;
case haDLM_MSG_Stop:
printk("[%s] STOP message received\n",
@@ -877,6 +933,16 @@
case haDLM_MSG_CMGR:
printk("[%s] CMGR message received\n",
haDLM_name);
+ if( message->msg_ptr_size == 0 ) {
+
+ /* This is the message from the user space daemon
+ * requesting the kernel cluster manager to
+ * to update the up node list
+ */
+
+ topo = get_upnode_list();
+ goto update_nodelist;
+ }
if (NULL ==
(topo = kmalloc_something(message->msg_ptr_size,
"CMGR topology block"))) {
@@ -899,6 +965,22 @@
haDLM_name,
lbuf);
}
+
+#ifdef CONFIG_CLMS
+ /*check here if the call is the first call. If so use this for
+ * building the array of nodes in which dlm is running
+ */
+ if(first_clmgr_call)
+ {
+ cli_topo = topo;
+ first_clmgr_call = 0;
+ /* Only after building the initial topology i should accept locks */
+ haDLM_allow_locks = 1;
+ /* Preparing the new topology with node up information */
+ topo = get_upnode_list();
+ }
+#endif
+update_nodelist:
if (NULL ==
(wu = kmalloc_something(sizeof(dlm_workunit_t),
"WorkUnit for topology block"))) {
@@ -1337,3 +1419,103 @@

return(_rc);
}
+
+#ifdef CONFIG_CLMS
+
+int dlm_cli_nodeup( void *clms_handle,
+ int service,
+ clusternode_t node,
+ clusternode_t surrogate,
+ void *private)
+{
+ MQ_DLM_TOP_INFO_t *new_topo;
+ dlm_workunit_t *wu;
+
+ if (!haDLM_allow_locks) {
+ printk("locks: not yet initialized.\n");
+ clms_nodeup_callback(clms_handle, service, node);
+ return 0;
+ }
+
+ new_topo = get_upnode_list();
+ if (new_topo == NULL)
+ return(-ENOMEM); /* Check for the return */
+ if (NULL ==
+ (wu = kmalloc_something(sizeof(dlm_workunit_t),
+ "WorkUnit for topology block"))) {
+ return(-ENOMEM);
+ }
+ wu->data = new_topo;
+ wu->free_data = kfree_topo_block;
+ wu->type = MQ_DLM_TOP_INFO_MSG;
+ dlm_workqueue_put_work(dlm_master_work_queue, wu);
+ clms_nodeup_callback(clms_handle, service, node);
+ return 0;
+}
+
+
+int dlm_cli_nodedown( void *clms_handle,
+ int service,
+ clusternode_t node,
+ clusternode_t surrogate,
+ void *private)
+{
+ MQ_DLM_TOP_INFO_t *new_topo;
+ dlm_workunit_t *wu;
+
+ if (!haDLM_allow_locks) {
+ printk("locks: not yet initialized.\n");
+ clms_nodedown_callback(clms_handle, service, node);
+ return 0;
+ }
+
+
+ new_topo = get_upnode_list();
+ if (new_topo == NULL)
+ return(-ENOMEM);
+ if (NULL ==
+ (wu = kmalloc_something(sizeof(dlm_workunit_t),
+ " WorkUnit for topology block"))) {
+ return(-ENOMEM);
+ }
+ wu->data = new_topo;
+ wu->free_data = kfree_topo_block;
+ wu->type = MQ_DLM_TOP_INFO_MSG;
+ dlm_workqueue_put_work(dlm_master_work_queue, wu);
+ clms_nodedown_callback(clms_handle, service, node);
+ return 0;
+}
+/* Getting the list of already up nodes */
+MQ_DLM_TOP_INFO_t * get_upnode_list(void)
+{
+ MQ_DLM_TOP_INFO_t *new_topo;
+ MQ_DLM_USEADDR_t *addr;
+ int i ;
+ int size = (cli_topo->n_nodes * sizeof(MQ_DLM_USEADDR_t)) + sizeof(MQ_DLM_TOP_INFO_t);
+
+ if (NULL ==
+ (new_topo = kmalloc_something(size, "CMGR topology block"))) {
+ return(NULL);
+ }
+
+ memcpy(new_topo,cli_topo,size);
+
+ for(i= 0 ; i <new_topo->n_nodes;i++ ) {
+ addr = &(new_topo->addrs[i]);
+ /* You can't use clms_isnodeup here because during NODEUP
+ * event clms_isnodeup on the particular node doesn't
+ * return 1
+ */
+
+ if(clms_isnodedown((clusternode_t)addr->nodeid)) {
+ addr->dlm_major = 0;
+ addr->dlm_minor = 0;
+ addr->useaddr.s_addr = 0;
+ }
+ }
+
+ return new_topo;
+
+ }
+
+#endif /* CONFIG_CLMS */
diff -Nru --exclude=test dlm.old/source/dlmdk/dlm_recover.c dlm/source/dlmdk/dlm_recover.c
--- dlm.old/source/dlmdk/dlm_recover.c Fri Nov 30 03:29:05 2001
+++ dlm/source/dlmdk/dlm_recover.c Wed Mar 27 23:41:42 2002
@@ -946,7 +946,6 @@
{
struct resource * res;
unsigned i;
- bool_t done;

/* Handle any directory updates read during early rebuild.
@@ -958,22 +957,14 @@
/* Rebuild lost resources.
*/

- done = FALSE;
- for ( i = 0 ; i < dlm_buff_per_try ; i++ )
- {
- done = iterator_next( iter, & res );
- if ( done )
- {
- break;
- }
+ while(!iterator_next(iter,&res)) {

if ( (res->rsrc_flags & RSRC_ACTIVE) != 0 )
{
clmr_res( res );
}
}
-
- if ( done && (atomic_read( & dlm_recov_updates_inflight ) == 0) )
+ if ( atomic_read( & dlm_recov_updates_inflight ) == 0 )
{
STATE_TRAN( me, RC_BARRIER4 );
}
@@ -1123,17 +1114,9 @@
{
struct resource * res;
unsigned i;
- bool_t done;

- done = FALSE;
- for ( i = 0 ; i < dlm_buff_per_try ; i++ )
- {
- done = iterator_next( iter, & res );
- if ( done )
- {
- break;
- }
+ while(!iterator_next(iter,&res)) {

if ( (res->rsrc_flags & RSRC_ACTIVE) != 0 )
{
@@ -1146,7 +1129,7 @@
* move on to the next state.
*/

- if ( done && (atomic_read( & dlm_recov_updates_inflight ) == 0) )
+ if ( atomic_read( & dlm_recov_updates_inflight ) == 0 )
{
STATE_TRAN( me, RC_BARRIER3 );
}
diff -Nru --exclude=test dlm.old/source/dlmdk/dlm_workqueue.c dlm/source/dlmdk/dlm_workqueue.c
--- dlm.old/source/dlmdk/dlm_workqueue.c Wed Oct 17 00:30:11 2001
+++ dlm/source/dlmdk/dlm_workqueue.c Fri Mar 22 18:25:03 2002
@@ -24,6 +24,11 @@
* http://www.gnu.org/copyleft/gpl.html
*/

+ /* 22MAR02 Aneesh Kumar KV ( ***@digital.com )
+ * Educating dlm_workqueue_get_work about SIGCLUSTER
+ * Thanks to Kai.
+ */
+
#ident "$Id: dlm_workqueue.c,v 1.7 2001/10/16 19:00:11 idr Exp $"

/* ============================================================ */
@@ -105,8 +110,18 @@

if ( ! non_blocking )
{
+dlm_get_work:
ret = down_interruptible( & work->sema );
if (-EINTR == ret) {
+#ifdef CONFIG_CLMS
+ if(sigismember(&current->pending.signal,SIGCLUSTER)) {
+ unsigned long _flags;
+ spin_lock_irqsave(&current->sigmask_lock,_flags);
+ flush_signals(current);
+ spin_unlock_irqrestore(&current->sigmask_lock,_flags);
+ goto dlm_get_work;
+ }
+#endif
dlm_start_shutdown();
}
get_work = TRUE;
diff -Nru --exclude=test dlm.old/source/dlmdu/Makefile dlm/source/dlmdu/Makefile
--- dlm.old/source/dlmdu/Makefile Fri Mar 9 07:05:16 2001
+++ dlm/source/dlmdu/Makefile Wed Mar 20 01:38:54 2002
@@ -78,13 +78,19 @@
# which one (either rsct or heartbeat.)

+ifeq ($(CLUSTMGR),ci-linux)
+DEFS += -DKERN_CLMGR -DCI_LINUX
+LIBS = -lpthread -ldl -lcluster
+else
ifneq ($(CLUSTMGR),)
DEFS += -DDIRECT_CLUSTINT

ifeq ($(CLUSTMGR),rsct)
LIBS = -lha_gs_r -lpthread
else
+ifeq ($(CLUSTMGR),hearbeat)
LIBS = -lhbclient -lpthread
+endif
endif

else
@@ -93,6 +99,7 @@
LIBS = -lpthread -ldl

endif
+endif

CPPFLAGS += $(DEFS) $(INC)
PROTOFLAGS = -e -DCPROTO $(CPPFLAGS)
@@ -109,6 +116,7 @@
daemon_sources += dlm_heartbeat.c
endif

+
OFILES_U= $(daemon_sources:.c=.o)
CFILES = $(daemon_sources)

diff -Nru --exclude=test dlm.old/source/dlmdu/dlm_daemon.c dlm/source/dlmdu/dlm_daemon.c
--- dlm.old/source/dlmdu/dlm_daemon.c Thu Oct 25 00:03:03 2001
+++ dlm/source/dlmdu/dlm_daemon.c Fri Mar 22 16:39:32 2002
@@ -124,9 +124,11 @@
#include <time.h>
#include <assert.h>

+#ifndef KERN_CLMGR
#ifndef DIRECT_CLUSTINT
#include <dlfcn.h> /* Dynamic load support. */
#endif /* !DIRECT_CLUSTINT */
+#endif /* KERN_CLMGR */

#include "dlm.h"
#include "dlm_devmsg.h"
@@ -146,6 +148,13 @@
#define MSGSTR(Num, Str) Str
#endif /* HAVING_NLS_ON_LINUX */

+#ifdef CI_LINUX
+#ifndef SIGCLUSTER
+#define SIGCLUSTER 33
+#endif
+#endif
+
+
/* ============================================================ */
/*
* File descriptor for open file against the admin device port
@@ -176,10 +185,15 @@
* Functions in this mainline manual that are used to interact
* with the cluster interface code.
*/
+#ifndef KERN_CLMGR
+
int cluster_manager_setup(dlm_node_array_t *nodes);
#ifndef DIRECT_CLUSTINT
void *cluster_interface_call(void *arg);
#endif /* !DIRECT_CLUSTINT */
+
+#endif /* KERN_CLMGR */
+
int drain_event_queue(MQ_DLM_TOP_INFO_t *node_events);
int find_configured_node(dlm_node_t *node);

@@ -246,17 +260,21 @@
admin_fd = -1;
}

+#ifndef KERN_CLMGR
+
if (0 != cmgr_thread_id) {
#ifdef DIRECT_CLUSTINT
cmgr_thread_id = 0;
fprintf(stderr,
"Shutting down cluster interface.\n");
- cluster_interface_shutdown();
+ cluster_interface_shutdown(); /* do I need to say need_to_quit here*/
#else /* DIRECT_CLUSTINT */
(*cluster_interface_stop)();
#endif /* DIRECT_CLUSTINT */
}

+#endif /* KERN_CLMGR */
+
#ifdef _ON_AIX_DLM_
mid_t kmid;
int status;
@@ -380,6 +398,10 @@
signal(SIGUSR2, catch_and_die);
signal(SIGCHLD, SIG_IGN);
signal(SIGIO, handle_sigio);
+#ifdef CI_LINUX
+ signal(SIGCLUSTER, SIG_IGN);
+#endif
+

/*
* Simple, really.
@@ -501,12 +523,14 @@
* work. Once we are connected, start up the kernel
* thread so that it is ready to work.
*/
+#ifndef KERN_CLMGR
if (-1 == cluster_manager_setup(configured_nodes)) {
fprintf(stderr,
"Error loading cluster interface module. Exiting.\n");
cleanup();
exit(5);
}
+#endif

message.msg_code = haDLM_MSG_Init;
message.msg_ptr_size = sizeof(args);
@@ -557,9 +581,67 @@
/* initialize message buffer; reused for each updata
*/
initialize_node_events(node_events);
+#ifdef KERN_CLMGR
+ message.msg_code = haDLM_MSG_CMGR;
+ message.msg_ptr_size = msize;
+ message.topo_array = node_events;
+
+ j = initial_topology(node_events);
+
+ if (0 < j) {
+
+ rc = pthread_mutex_unlock(&notify_mutex);
+ if (0 != rc) {
+ fprintf(stderr,
+ "Cannot unlock notify_mutex in main loop, rc = [%d]\n",
+ rc);
+ cleanup();
+ exit(rc);
+ }
+
+ i = write(admin_fd, &message, sizeof(message)); /* Tell kernel. */
+ if (i != sizeof(message)) {
+ fprintf(stderr,
+ "Error on write, i/err[%d/%s], code/size[%d/%d]\n",
+ i,
+ strerror(errno),
+ message.msg_code,
+ sizeof(message));
+ cleanup();
+ assert(!"Error writing through DLM admin device");
+ } else {
+ admin_calls++;
+
+ fprintf(stderr,
+ "Wrote code/size[%d/%d], blocks so far [%d]\n",
+ message.msg_code,
+ sizeof(message),
+ admin_calls);
+ }
+ rc = pthread_mutex_lock(&notify_mutex);
+ if (0 != rc) {
+ fprintf(stderr,
+ "Cannot lock notify_mutex in main loop, rc = [%d]\n",
+ rc);
+ cleanup();
+ exit(rc);
+ }
+ time_since = 0;
+
+ }
+ else {
+ fprintf(stderr, "Cannot find the initial topology\n");
+ cleanup();
+ exit(rc);
+
+ }
+#endif
+

while(!need_to_quit) {

+#ifndef KERN_CLMGR
+ /* Do I have a nodeq_count for kernel cluster_manager ? */
if (0 < nodeq_count) {
/*
* Work to do! Node block(s) waiting.
@@ -576,6 +658,18 @@
* nodes.
*/
j = drain_event_queue(node_events);
+#else
+ /* The msg_prt_size is used to notify the kernel
+ * cluster manager to update the upnode list.
+ * Instead of running a separate thread inside kernel
+ * making user space daemon to notify the kernel to
+ * update
+ */
+ message.msg_code = haDLM_MSG_CMGR;
+ message.msg_ptr_size = 0;
+ message.topo_array = NULL;
+ j = 1;
+#endif

if (0 < j) {

@@ -620,16 +714,20 @@

time_since = 0; /* Had events! */
}
+#ifndef KERN_CLMGR /* if( 0 < nodeq_count ) */

} else {
fprintf(stderr,
"No work to do in handling event queue.\n");
}

+
/*
* Mutex is held at this point, regardless of path above.
*/
if (0 == nodeq_count) {
+
+#endif
/*
* No blocks appeared while we were in the kernel, wait
* on the condition.
@@ -640,6 +738,10 @@
rc = pthread_cond_timedwait(&notify_cond,
&notify_mutex,
&event_timer);
+#ifndef KERN_CLMGR
+ /* for kernel cluster manager I always time out
+ * So no need to log.
+ */
if (ETIMEDOUT == rc) {
time_since += EVENT_WAIT_TIME;
fprintf(stderr,
@@ -653,6 +755,7 @@
exit(rc);
}
}
+#endif

/*
* Loop around anyway, and check to see if any events, even
@@ -974,6 +1077,8 @@
return(found);
}

+#ifndef KERN_CLMGR
+
/* ============================================================ */
/*
* Set up to call the cluster manager.
@@ -1105,6 +1210,7 @@
}
#endif /* !DIRECT_CLUSTINT */

+#endif /* KERN_CLMGR */
/* ------------------------------------------------------------ */
/*
* With a view to supporting dynamic reconfiguration of the
@@ -1250,3 +1356,32 @@

return(ret);
}
+
+#ifdef KERN_CLMGR
+
+int
+initial_topology( MQ_DLM_TOP_INFO_t *node_events )
+{
+ int i ;
+ node_events->n_nodes = configured_nodes->dlm_node_count;
+ node_events->msg_version = TAM_version;
+#ifdef CI_LINUX
+ this_node_number = clusternode_num();
+#endif
+ node_events->this_nodeid = this_node_number;
+ node_events->event = admin_calls; /* Check this ? */
+
+ for( i = 0 ; i < configured_nodes->dlm_node_count; i++)
+ {
+ node_events->addrs[i].nodeid =configured_nodes->dlm_node_entries[i].dlm_node_number;
+ node_events->addrs[i].useaddr.s_addr =configured_nodes->dlm_node_entries[i].dlm_node_address.s_addr;
+ node_events->addrs[i].dlm_major= DLM_MAJOR_VERSION;
+ node_events->addrs[i].dlm_minor= DLM_MINOR_VERSION;
+
+
+ }
+
+printf("Returing the intial topology no of nodes=%d\m",configured_nodes->dlm_node_count);
+ return(configured_nodes->dlm_node_count );
+}
+#endif
diff -Nru --exclude=test dlm.old/source/include/dlm_kernel.h dlm/source/include/dlm_kernel.h
--- dlm.old/source/include/dlm_kernel.h Thu Nov 8 05:27:09 2001
+++ dlm/source/include/dlm_kernel.h Fri Mar 8 12:58:30 2002
@@ -234,12 +234,15 @@
struct timeval *t1,
struct timeval *t2);

+#ifndef LINUX_VERSION_CODE
+#include <linux/version.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10)
# define snprintf(buf,count,fmt, args...) \
sprintf( buf, fmt, ## args )
# define vsnprintf(buf,count,fmt,args) \
vsprintf( buf, fmt, args )
#endif
+#endif

/*
* Fake float stuff.

Bruce Walker

2002-03-27 19:43:04 UTC

Permalink

Aneesh,
Great news. Congratulations. Is there any chance you could
put a short note together describing how the DLM-cluster is formed
and how a node failure is handled? I think that would be
quite interesting and useful for other subsystems. I have
been giving some thought on how to integrate the DLM into the
SSI cluster, which has the added constraint that it must be
up and running before the root is mounted. Understanding how
it forms would be quite useful.

thanks,
bruce

Post by Aneesh Kumar K.V
Hi,
Attaching below is the latest patch.
known problem : Nothing :) [ I have to run some wonderful test cases now
]
-aneesh
diff -Nru --exclude=test dlm.old/source/Makefile dlm/source/Makefile
--- dlm.old/source/Makefile Wed Dec 5 02:27:47 2001
+++ dlm/source/Makefile Fri Mar 8 12:46:20 2002
@@ -55,7 +55,11 @@
KERNEL_DIRS = dlmdk dlmcccp dlmkernapi
+ifeq ($(CLUSTMGR),ci-linux)
+USER_DIRS = dlmdu libdlm
+else
USER_DIRS = dlmdu dlmdcl libdlm
+endif
.PHONY: clean kernel user kernel_dep user_dep $(KERNEL_DIRS) $(USER_DIRS)
diff -Nru --exclude=test dlm.old/source/dlmcccp/cccp_udp.c dlm/source/dlmcccp/cccp_udp.c
--- dlm.old/source/dlmcccp/cccp_udp.c Wed Oct 10 01:53:31 2001
+++ dlm/source/dlmcccp/cccp_udp.c Thu Mar 21 20:07:29 2002
@@ -25,6 +25,11 @@
*/
#ident "$Id: cccp_udp.c,v 1.14 2001/10/09 20:23:31 idr Exp $"
+/*
+ *make cccp_poll_thread() smarter about incoming signals.
+ */
+
/*
* COMPONENT_NAME: LIB
*
@@ -163,9 +168,23 @@
msg_header.msg_controllen = 0;
msg_header.msg_flags = 0;
bytes = CCCP_SOCK_RECVMSG( cccp_our_sock, & msg_header, bufferSize, 0 );
if ( bytes < 0 )
{
+
+ /* If we get any signals, other than SIGINT, just flush them and
+ * restart the socket read.
+ */
+ if ( ((-bytes == EINTR) || (-bytes == ERESTARTSYS))
+ && !sigismember(&current->pending.signal, SIGINT)) {
+ unsigned long _flags;
+ spin_lock_irqsave(&current->sigmask_lock, _flags);
+ flush_signals(current);
+ spin_unlock_irqrestore(&current->sigmask_lock, _flags);
+ goto recvmsg;
+ }
+
/* The poll task gets hit with a signal and cccp_time_to_die gets
* set when it's time for CCCP to shutdown. In this case, we should
* NOT log message that the recvmsg call failed. Otherwise, log a
@@ -347,7 +366,7 @@
}
else if ( err != 0 )
{
- cccp_time_to_die = TRUE;
+ cccp_time_to_die = TRUE;
}
}
}
diff -Nru --exclude=test dlm.old/source/dlmdk/Makefile dlm/source/dlmdk/Makefile
--- dlm.old/source/dlmdk/Makefile Sat Nov 3 03:43:55 2001
+++ dlm/source/dlmdk/Makefile Fri Mar 8 12:53:43 2002
@@ -42,21 +42,23 @@
endif
DEFS = -DDBPRINT
-#DEFS += -DDEBUG
+DEFS += -DDEBUG
+KERN_HEADER=/usr/src/linux/include
ifneq ($(SOURCE_PATH),)
VPATH = $(SOURCE_PATH)/dlmdk
-INC = -I. -I$(SOURCE_PATH)/dlmdk -I$(SOURCE_PATH)/include
+INC = -I. -I$(SOURCE_PATH)/dlmdk -I$(SOURCE_PATH)/include -I$(KERN_HEADER)
MKDEP = $(SOURCE_PATH)/mkdep
DLM_DEVMSG_H = $(SOURCE_PATH)/include/dlm_devmsg.h
CLMSTRUCTS_H = $(SOURCE_PATH)/include/clmstructs.h
else
-INC = -I. -I../include
+INC = -I. -I../include -I$(KERN_HEADER)
MKDEP = ../mkdep
DLM_DEVMSG_H = ../include/dlm_devmsg.h
CLMSTRUCTS_H = ../include/clmstructs.h
endif
+
OPT = -O2
DEBUG = -Wall
LIBS =
diff -Nru --exclude=test dlm.old/source/dlmdk/clm_info.c dlm/source/dlmdk/clm_info.c
--- dlm.old/source/dlmdk/clm_info.c Wed Oct 24 03:51:44 2001
+++ dlm/source/dlmdk/clm_info.c Fri Mar 8 14:21:55 2002
@@ -652,6 +652,7 @@
#endif
node_count = clm_node_count();
+
#ifdef DEBUG
count = node_count;
#endif
@@ -666,6 +667,7 @@
i = (i + 1) % node_count;
node_id = INDEX_TO_NODEID( i );
+
#ifdef DEBUG
CLM_ASSERT( count != 0, "Too many loop iterations!" );
count--;
diff -Nru --exclude=test dlm.old/source/dlmdk/dlm_kerndd.c dlm/source/dlmdk/dlm_kerndd.c
--- dlm.old/source/dlmdk/dlm_kerndd.c Sat Oct 20 04:12:38 2001
+++ dlm/source/dlmdk/dlm_kerndd.c Fri Mar 22 18:47:42 2002
@@ -71,6 +71,12 @@
* proc_print_vdevice
*/
+ * Added support for kernel cluster manager
+ *define KERN_CLMGR in the user space and
+ * and CONFIG_CLMS in the kernel space for Cluster Infrastructure for Linux
+ */
+
/* ============================================================ */
#ifndef __KERNEL__
#define __KERNEL__
@@ -100,6 +106,14 @@
#include <linux/timer.h>
#include <asm/uaccess.h>
+
+#ifdef CONFIG_CLMS
+#include <cluster/clms.h>
+/* To avoid conflicting type with nsc.h and dlm header files*/
+#define BOOL_T_DEFINED
+#endif
+
+
#include "dlm.h"
#include "ast.h"
#include "ast_stub.h"
@@ -112,6 +126,9 @@
#include "dlm_devmsg.h"
#include "dlm_workqueue.h"
#include "cltrc.h"
+#ifdef CONFIG_CLMS
+#include "dlm_clust.h"
+#endif
#include "dlm_proto.h" /* Should be LAST included file. */
@@ -131,6 +148,11 @@
int haDLM_admin_minor = 0; /* Minor number for "admin" device */
int haDLM_locks_minor = 0; /* Minor number for "locks" device */
+#ifdef CONFIG_CLMS
+ static MQ_DLM_TOP_INFO_t *cli_topo;
+ static int first_clmgr_call = 1 ;
+#endif
+
MODULE_AUTHOR("Peter Wombat");
MODULE_DESCRIPTION("The ha Distributed Lock Manager device.");
MODULE_PARM(haDLM_major_number, "i"); /* an int */
@@ -164,6 +186,22 @@
struct file *file,
unsigned int cmd,
unsigned long arg);
+#ifdef CONFIG_CLMS
+
+int dlm_cli_nodeup( void *clms_handle,
+ int service,
+ clusternode_t node,
+ clusternode_t surrogate,
+ void *private) ;
+int dlm_cli_nodedown( void *clms_handle,
+ int service,
+ clusternode_t node,
+ clusternode_t surrogate,
+ void *private) ;
+
+MQ_DLM_TOP_INFO_t * get_upnode_list(void);
+#endif
+
/* ------------------------------------------------------------ */
/*
@@ -231,6 +269,7 @@
static unsigned long jiffies_per_second;
+
/* ------------------------------------------------------------ */
/*
* Keep track of admin client.
@@ -328,6 +367,7 @@
{
int _i;
+
if ( haDLM_name == NULL ) {
bsdlog( LOG_ERR, "haDLM_name not set by module loader. Terminating.\n" );
return( -1 );
@@ -363,12 +403,24 @@
dlm_proc_start( haDLM_name );
#endif /* CONFIG_PROC_FS */
- printk("[%s] Success, register device [%d/%s]\n",
- haDLM_name,
- haDLM_major_number,
- haDLM_name);
- return(0);
+#ifdef CONFIG_CLMS
+ return (register_clms_subsys("dlm",
+ -1,
+ dlm_cli_nodeup,
+ dlm_cli_nodedown,
+ NULL,
+ NULL,
+ NULL));
+
+#endif
+
+ printk("[%s] Success, register device [%d/%s]\n",
+ haDLM_name,
+ haDLM_major_number,
+ haDLM_name);
+
+ return(0);
}
/* ------------------------------------------------------------ */
@@ -792,8 +844,10 @@
dlm_MSG_t *message;
dlm_MSG_types_t command;
MQ_DLM_TOP_INFO_t *topo;
+
dlm_workunit_t *wu;
+
_rc = lbuf;
_device = (struct our_vdevice *)filp->private_data;
@@ -868,7 +922,9 @@
haDLM_name,
kproc_pid);
}
+#ifndef CONFIG_CLMS
haDLM_allow_locks = 1;
+#endif
break;
printk("[%s] STOP message received\n",
@@ -877,6 +933,16 @@
printk("[%s] CMGR message received\n",
haDLM_name);
+ if( message->msg_ptr_size == 0 ) {
+
+ /* This is the message from the user space daemon
+ * requesting the kernel cluster manager to
+ * to update the up node list
+ */
+
+ topo = get_upnode_list();
+ goto update_nodelist;
+ }
if (NULL ==
(topo = kmalloc_something(message->msg_ptr_size,
"CMGR topology block"))) {
@@ -899,6 +965,22 @@
haDLM_name,
lbuf);
}
+
+#ifdef CONFIG_CLMS
+ /*check here if the call is the first call. If so use this for
+ * building the array of nodes in which dlm is running
+ */
+ if(first_clmgr_call)
+ {
+ cli_topo = topo;
+ first_clmgr_call = 0;
+ /* Only after building the initial topology i should accept locks */
+ haDLM_allow_locks = 1;
+ /* Preparing the new topology with node up information */
+ topo = get_upnode_list();
+ }
+#endif
if (NULL ==
(wu = kmalloc_something(sizeof(dlm_workunit_t),
"WorkUnit for topology block"))) {
@@ -1337,3 +1419,103 @@
return(_rc);
}
+
+#ifdef CONFIG_CLMS
+
+int dlm_cli_nodeup( void *clms_handle,
+ int service,
+ clusternode_t node,
+ clusternode_t surrogate,
+ void *private)
+{
+ MQ_DLM_TOP_INFO_t *new_topo;
+ dlm_workunit_t *wu;
+
+ if (!haDLM_allow_locks) {
+ printk("locks: not yet initialized.\n");
+ clms_nodeup_callback(clms_handle, service, node);
+ return 0;
+ }
+
+ new_topo = get_upnode_list();
+ if (new_topo == NULL)
+ return(-ENOMEM); /* Check for the return */
+ if (NULL ==
+ (wu = kmalloc_something(sizeof(dlm_workunit_t),
+ "WorkUnit for topology block"))) {
+ return(-ENOMEM);
+ }
+ wu->data = new_topo;
+ wu->free_data = kfree_topo_block;
+ wu->type = MQ_DLM_TOP_INFO_MSG;
+ dlm_workqueue_put_work(dlm_master_work_queue, wu);
+ clms_nodeup_callback(clms_handle, service, node);
+ return 0;
+}
+
+
+int dlm_cli_nodedown( void *clms_handle,
+ int service,
+ clusternode_t node,
+ clusternode_t surrogate,
+ void *private)
+{
+ MQ_DLM_TOP_INFO_t *new_topo;
+ dlm_workunit_t *wu;
+
+ if (!haDLM_allow_locks) {
+ printk("locks: not yet initialized.\n");
+ clms_nodedown_callback(clms_handle, service, node);
+ return 0;
+ }
+
+
+ new_topo = get_upnode_list();
+ if (new_topo == NULL)
+ return(-ENOMEM);
+ if (NULL ==
+ (wu = kmalloc_something(sizeof(dlm_workunit_t),
+ " WorkUnit for topology block"))) {
+ return(-ENOMEM);
+ }
+ wu->data = new_topo;
+ wu->free_data = kfree_topo_block;
+ wu->type = MQ_DLM_TOP_INFO_MSG;
+ dlm_workqueue_put_work(dlm_master_work_queue, wu);
+ clms_nodedown_callback(clms_handle, service, node);
+ return 0;
+}
+/* Getting the list of already up nodes */
+MQ_DLM_TOP_INFO_t * get_upnode_list(void)
+{
+ MQ_DLM_TOP_INFO_t *new_topo;
+ MQ_DLM_USEADDR_t *addr;
+ int i ;
+ int size = (cli_topo->n_nodes * sizeof(MQ_DLM_USEADDR_t)) + sizeof(MQ_DLM_TOP_INFO_t);
+
+ if (NULL ==
+ (new_topo = kmalloc_something(size, "CMGR topology block"))) {
+ return(NULL);
+ }
+
+ memcpy(new_topo,cli_topo,size);
+
+ for(i= 0 ; i <new_topo->n_nodes;i++ ) {
+ addr = &(new_topo->addrs[i]);
+ /* You can't use clms_isnodeup here because during NODEUP
+ * event clms_isnodeup on the particular node doesn't
+ * return 1
+ */
+
+ if(clms_isnodedown((clusternode_t)addr->nodeid)) {
+ addr->dlm_major = 0;
+ addr->dlm_minor = 0;
+ addr->useaddr.s_addr = 0;
+ }
+ }
+
+ return new_topo;
+
+ }
+
+#endif /* CONFIG_CLMS */
diff -Nru --exclude=test dlm.old/source/dlmdk/dlm_recover.c dlm/source/dlmdk/dlm_recover.c
--- dlm.old/source/dlmdk/dlm_recover.c Fri Nov 30 03:29:05 2001
+++ dlm/source/dlmdk/dlm_recover.c Wed Mar 27 23:41:42 2002
@@ -946,7 +946,6 @@
{
struct resource * res;
unsigned i;
- bool_t done;
/* Handle any directory updates read during early rebuild.
@@ -958,22 +957,14 @@
/* Rebuild lost resources.
*/
- done = FALSE;
- for ( i = 0 ; i < dlm_buff_per_try ; i++ )
- {
- done = iterator_next( iter, & res );
- if ( done )
- {
- break;
- }
+ while(!iterator_next(iter,&res)) {
if ( (res->rsrc_flags & RSRC_ACTIVE) != 0 )
{
clmr_res( res );
}
}
-
- if ( done && (atomic_read( & dlm_recov_updates_inflight ) == 0) )
+ if ( atomic_read( & dlm_recov_updates_inflight ) == 0 )
{
STATE_TRAN( me, RC_BARRIER4 );
}
@@ -1123,17 +1114,9 @@
{
struct resource * res;
unsigned i;
- bool_t done;
- done = FALSE;
- for ( i = 0 ; i < dlm_buff_per_try ; i++ )
- {
- done = iterator_next( iter, & res );
- if ( done )
- {
- break;
- }
+ while(!iterator_next(iter,&res)) {
if ( (res->rsrc_flags & RSRC_ACTIVE) != 0 )
{
@@ -1146,7 +1129,7 @@
* move on to the next state.
*/
- if ( done && (atomic_read( & dlm_recov_updates_inflight ) == 0) )
+ if ( atomic_read( & dlm_recov_updates_inflight ) == 0 )
{
STATE_TRAN( me, RC_BARRIER3 );
}
diff -Nru --exclude=test dlm.old/source/dlmdk/dlm_workqueue.c dlm/source/dlmdk/dlm_workqueue.c
--- dlm.old/source/dlmdk/dlm_workqueue.c Wed Oct 17 00:30:11 2001
+++ dlm/source/dlmdk/dlm_workqueue.c Fri Mar 22 18:25:03 2002
@@ -24,6 +24,11 @@
* http://www.gnu.org/copyleft/gpl.html
*/
+ * Educating dlm_workqueue_get_work about SIGCLUSTER
+ * Thanks to Kai.
+ */
+
#ident "$Id: dlm_workqueue.c,v 1.7 2001/10/16 19:00:11 idr Exp $"
/* ============================================================ */
@@ -105,8 +110,18 @@
if ( ! non_blocking )
{
ret = down_interruptible( & work->sema );
if (-EINTR == ret) {
+#ifdef CONFIG_CLMS
+ if(sigismember(&current->pending.signal,SIGCLUSTER)) {
+ unsigned long _flags;
+ spin_lock_irqsave(&current->sigmask_lock,_flags);
+ flush_signals(current);
+ spin_unlock_irqrestore(&current->sigmask_lock,_flags);
+ goto dlm_get_work;
+ }
+#endif
dlm_start_shutdown();
}
get_work = TRUE;
diff -Nru --exclude=test dlm.old/source/dlmdu/Makefile dlm/source/dlmdu/Makefile
--- dlm.old/source/dlmdu/Makefile Fri Mar 9 07:05:16 2001
+++ dlm/source/dlmdu/Makefile Wed Mar 20 01:38:54 2002
@@ -78,13 +78,19 @@
# which one (either rsct or heartbeat.)
+ifeq ($(CLUSTMGR),ci-linux)
+DEFS += -DKERN_CLMGR -DCI_LINUX
+LIBS = -lpthread -ldl -lcluster
+else
ifneq ($(CLUSTMGR),)
DEFS += -DDIRECT_CLUSTINT
ifeq ($(CLUSTMGR),rsct)
LIBS = -lha_gs_r -lpthread
else
+ifeq ($(CLUSTMGR),hearbeat)
LIBS = -lhbclient -lpthread
+endif
endif
else
@@ -93,6 +99,7 @@
LIBS = -lpthread -ldl
endif
+endif
CPPFLAGS += $(DEFS) $(INC)
PROTOFLAGS = -e -DCPROTO $(CPPFLAGS)
@@ -109,6 +116,7 @@
daemon_sources += dlm_heartbeat.c
endif
+
OFILES_U= $(daemon_sources:.c=.o)
CFILES = $(daemon_sources)
diff -Nru --exclude=test dlm.old/source/dlmdu/dlm_daemon.c dlm/source/dlmdu/dlm_daemon.c
--- dlm.old/source/dlmdu/dlm_daemon.c Thu Oct 25 00:03:03 2001
+++ dlm/source/dlmdu/dlm_daemon.c Fri Mar 22 16:39:32 2002
@@ -124,9 +124,11 @@
#include <time.h>
#include <assert.h>
+#ifndef KERN_CLMGR
#ifndef DIRECT_CLUSTINT
#include <dlfcn.h> /* Dynamic load support. */
#endif /* !DIRECT_CLUSTINT */
+#endif /* KERN_CLMGR */
#include "dlm.h"
#include "dlm_devmsg.h"
@@ -146,6 +148,13 @@
#define MSGSTR(Num, Str) Str
#endif /* HAVING_NLS_ON_LINUX */
+#ifdef CI_LINUX
+#ifndef SIGCLUSTER
+#define SIGCLUSTER 33
+#endif
+#endif
+
+
/* ============================================================ */
/*
* File descriptor for open file against the admin device port
@@ -176,10 +185,15 @@
* Functions in this mainline manual that are used to interact
* with the cluster interface code.
*/
+#ifndef KERN_CLMGR
+
int cluster_manager_setup(dlm_node_array_t *nodes);
#ifndef DIRECT_CLUSTINT
void *cluster_interface_call(void *arg);
#endif /* !DIRECT_CLUSTINT */
+
+#endif /* KERN_CLMGR */
+
int drain_event_queue(MQ_DLM_TOP_INFO_t *node_events);
int find_configured_node(dlm_node_t *node);
@@ -246,17 +260,21 @@
admin_fd = -1;
}
+#ifndef KERN_CLMGR
+
if (0 != cmgr_thread_id) {
#ifdef DIRECT_CLUSTINT
cmgr_thread_id = 0;
fprintf(stderr,
"Shutting down cluster interface.\n");
- cluster_interface_shutdown();
+ cluster_interface_shutdown(); /* do I need to say need_to_quit here*/
#else /* DIRECT_CLUSTINT */
(*cluster_interface_stop)();
#endif /* DIRECT_CLUSTINT */
}
+#endif /* KERN_CLMGR */
+
#ifdef _ON_AIX_DLM_
mid_t kmid;
int status;
@@ -380,6 +398,10 @@
signal(SIGUSR2, catch_and_die);
signal(SIGCHLD, SIG_IGN);
signal(SIGIO, handle_sigio);
+#ifdef CI_LINUX
+ signal(SIGCLUSTER, SIG_IGN);
+#endif
+
/*
* Simple, really.
@@ -501,12 +523,14 @@
* work. Once we are connected, start up the kernel
* thread so that it is ready to work.
*/
+#ifndef KERN_CLMGR
if (-1 == cluster_manager_setup(configured_nodes)) {
fprintf(stderr,
"Error loading cluster interface module. Exiting.\n");
cleanup();
exit(5);
}
+#endif
message.msg_code = haDLM_MSG_Init;
message.msg_ptr_size = sizeof(args);
@@ -557,9 +581,67 @@
/* initialize message buffer; reused for each updata
*/
initialize_node_events(node_events);
+#ifdef KERN_CLMGR
+ message.msg_code = haDLM_MSG_CMGR;
+ message.msg_ptr_size = msize;
+ message.topo_array = node_events;
+
+ j = initial_topology(node_events);
+
+ if (0 < j) {
+
+ rc = pthread_mutex_unlock(&notify_mutex);
+ if (0 != rc) {
+ fprintf(stderr,
+ "Cannot unlock notify_mutex in main loop, rc = [%d]\n",
+ rc);
+ cleanup();
+ exit(rc);
+ }
+
+ i = write(admin_fd, &message, sizeof(message)); /* Tell kernel. */
+ if (i != sizeof(message)) {
+ fprintf(stderr,
+ "Error on write, i/err[%d/%s], code/size[%d/%d]\n",
+ i,
+ strerror(errno),
+ message.msg_code,
+ sizeof(message));
+ cleanup();
+ assert(!"Error writing through DLM admin device");
+ } else {
+ admin_calls++;
+
+ fprintf(stderr,
+ "Wrote code/size[%d/%d], blocks so far [%d]\n",
+ message.msg_code,
+ sizeof(message),
+ admin_calls);
+ }
+ rc = pthread_mutex_lock(&notify_mutex);
+ if (0 != rc) {
+ fprintf(stderr,
+ "Cannot lock notify_mutex in main loop, rc = [%d]\n",
+ rc);
+ cleanup();
+ exit(rc);
+ }
+ time_since = 0;
+
+ }
+ else {
+ fprintf(stderr, "Cannot find the initial topology\n");
+ cleanup();
+ exit(rc);
+
+ }
+#endif
+
while(!need_to_quit) {
+#ifndef KERN_CLMGR
+ /* Do I have a nodeq_count for kernel cluster_manager ? */
if (0 < nodeq_count) {
/*
* Work to do! Node block(s) waiting.
@@ -576,6 +658,18 @@
* nodes.
*/
j = drain_event_queue(node_events);
+#else
+ /* The msg_prt_size is used to notify the kernel
+ * cluster manager to update the upnode list.
+ * Instead of running a separate thread inside kernel
+ * making user space daemon to notify the kernel to
+ * update
+ */
+ message.msg_code = haDLM_MSG_CMGR;
+ message.msg_ptr_size = 0;
+ message.topo_array = NULL;
+ j = 1;
+#endif
if (0 < j) {
@@ -620,16 +714,20 @@
time_since = 0; /* Had events! */
}
+#ifndef KERN_CLMGR /* if( 0 < nodeq_count ) */
} else {
fprintf(stderr,
"No work to do in handling event queue.\n");
}
+
/*
* Mutex is held at this point, regardless of path above.
*/
if (0 == nodeq_count) {
+
+#endif
/*
* No blocks appeared while we were in the kernel, wait
* on the condition.
@@ -640,6 +738,10 @@
rc = pthread_cond_timedwait(&notify_cond,
&notify_mutex,
&event_timer);
+#ifndef KERN_CLMGR
+ /* for kernel cluster manager I always time out
+ * So no need to log.
+ */
if (ETIMEDOUT == rc) {
time_since += EVENT_WAIT_TIME;
fprintf(stderr,
@@ -653,6 +755,7 @@
exit(rc);
}
}
+#endif
/*
* Loop around anyway, and check to see if any events, even
@@ -974,6 +1077,8 @@
return(found);
}
+#ifndef KERN_CLMGR
+
/* ============================================================ */
/*
* Set up to call the cluster manager.
@@ -1105,6 +1210,7 @@
}
#endif /* !DIRECT_CLUSTINT */
+#endif /* KERN_CLMGR */
/* ------------------------------------------------------------ */
/*
* With a view to supporting dynamic reconfiguration of the
@@ -1250,3 +1356,32 @@
return(ret);
}
+
+#ifdef KERN_CLMGR
+
+int
+initial_topology( MQ_DLM_TOP_INFO_t *node_events )
+{
+ int i ;
+ node_events->n_nodes = configured_nodes->dlm_node_count;
+ node_events->msg_version = TAM_version;
+#ifdef CI_LINUX
+ this_node_number = clusternode_num();
+#endif
+ node_events->this_nodeid = this_node_number;
+ node_events->event = admin_calls; /* Check this ? */
+
+ for( i = 0 ; i < configured_nodes->dlm_node_count; i++)
+ {
+ node_events->addrs[i].nodeid =configured_nodes->dlm_node_entries[i].dlm_node_number;
+ node_events->addrs[i].useaddr.s_addr =configured_nodes->dlm_node_entries[i].dlm_node_address.s_addr;
+ node_events->addrs[i].dlm_major= DLM_MAJOR_VERSION;
+ node_events->addrs[i].dlm_minor= DLM_MINOR_VERSION;
+
+
+ }
+
+printf("Returing the intial topology no of nodes=%d\m",configured_nodes->dlm_node_count);
+ return(configured_nodes->dlm_node_count );
+}
+#endif
diff -Nru --exclude=test dlm.old/source/include/dlm_kernel.h dlm/source/include/dlm_kernel.h
--- dlm.old/source/include/dlm_kernel.h Thu Nov 8 05:27:09 2001
+++ dlm/source/include/dlm_kernel.h Fri Mar 8 12:58:30 2002
@@ -234,12 +234,15 @@
struct timeval *t1,
struct timeval *t2);
+#ifndef LINUX_VERSION_CODE
+#include <linux/version.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10)
# define snprintf(buf,count,fmt, args...) \
sprintf( buf, fmt, ## args )
# define vsnprintf(buf,count,fmt,args) \
vsprintf( buf, fmt, args )
#endif
+#endif
/*
* Fake float stuff.
_______________________________________________
ci-linux-devel mailing list
https://lists.sourceforge.net/lists/listinfo/ci-linux-devel

Kai-Min Sung

2002-03-27 20:40:05 UTC

Permalink

Aneesh's previous patch was missing the signal patches. Here's an
updated (hopefully complete) patch. If you plan to run this code, please
make sure you're using the latest version of CI checked out from the
SourceForge repository (CI-0.6.0 will not work with DLM).

http://sourceforge.net/cvs/?group_id=32543

-Kai

Kai-Min Sung

2002-03-27 20:42:03 UTC

Permalink

Haha...including the patch file would be a good idea ;-)

Kai-Min Sung

2002-03-28 01:35:01 UTC

Permalink

Sorry, please disregard my previous 2 messages. Apparently, I was looking at the wrong patch file. The patch Aneesh
sent previously was indeed complete. Sorry for the confusion...

-Kai

Aneesh Kumar K.V

2002-03-28 02:10:05 UTC

Permalink

Hi,

I have put the latest patch at.

http://ci-linux.sourceforge.net/contrib/dlm-ci-linux-4.patch

Some info of what happens and how to build :) :

1) First how to build, so that every one can try DLM with CI and fill
me with bugs :)

Check out the CVS version of ci-linux from sourceforge. Build the kernel
as mentioned in the INSTALL file there.

download the dlm source code from dlm website and apply the
dlm-ci-linux-4.patch.

follow the /dlm/dlm/documents/BUILD file to build DLM and use
CLUSTMGR=ci-linux

add in /etc/modules.conf
alias haDLM dlmdk.core

That's it, you have DLM ready to run.

2) What happens during nodeup and nodedown. ( This is what I understood.
Please correct me if I am wrong )

We have dlmdu which is the user level daemon that reads the config file
that you specify at the command prompt. This deamon after reading the
config file will build a structure called configured_nodes. DLM normally
with user level cluster manager like heartbeat will spawn a separate
thread for node monitoring. This thread will notify the main line code
using dlm_notify* functions( dlmdcl/*.c ). The mainline code will wait
for notification ( dlm_daemon.c ) by calling pthread_cond_timedwait.
Once the notification is received it prepare a new nodelist
(MQ_DLM_TOP_INFO_t *node_events ) and pass it to the kernel through a
write of /dev/haDLM/admin.

What we do in CI.

We consider CI as a kernel cluster manager which is identified by
#define KERN_CLMGR now for CI we don't need to spawn a new thread
instead we from the configured_nodes struct prepare a nodelist (
MQ_DLM_TOP_INFO_t *node_events ) and pass it to the kernel by writing it
to /dev/haDLM/admin.After that the user level daemon dlmdu do a write
with following message format
message.msg_code = haDLM_MSG_CMGR;
message.msg_ptr_size = 0;
message.topo_array = NULL;
in every 300 sec . This message is used as an indication to the kernel
based cluster manager to update it up node list. This help to make sure
that we have the correct up node list always.

modification to /dev/haDLM/admin driver.

In the init_module we register the nodeup and nodedown routine but we
don't allow locks untill( haDLM_allow_locks ) we get the configured
node information from the user level daemon dlmdu. The first write to
/dev/haDLM/admin with command haDLM_MSG_CMGR is considered to carry to
the configured node list

Kai-Min Sung

2002-03-28 20:22:20 UTC

Permalink

Post by Aneesh Kumar K.V
download the dlm source code from dlm website and apply the
dlm-ci-linux-4.patch.
follow the /dlm/dlm/documents/BUILD file to build DLM and use
CLUSTMGR=ci-linux
add in /etc/modules.conf
alias haDLM dlmdk.core
That's it, you have DLM ready to run.

I also had to chmod 755 dlm/source/mkdep before running the makes.

Post by Aneesh Kumar K.V
you cannot start dlm in a cluster where the nodes are already in the
cluster and are also a part of dlm . I mean for dlm to work dlm daemon
need to be started when there is only one node. later you can add more
nodes. ( see DLM documentation ) That means you may need to start dlm
along with the cluster master.

I'm not sure about the restriction that DLM must be started on a 1 node
cluster. I was able to run the following bootup sequence:

Node 1: cluster_start (1 node cluster formed)
Node 2: cluster_start (2 node cluster formed)
Node 1: start dlm (DLM blocks, trying to contact Node 2)
Node 2: start dlm (DLM begins running on both Nodes)

-Kai

Aneesh Kumar K.V

2002-03-29 04:58:02 UTC

Permalink

Post by Kai-Min Sung
I'm not sure about the restriction that DLM must be started on a 1 node
Node 1: cluster_start (1 node cluster formed)
Node 2: cluster_start (2 node cluster formed)
Node 1: start dlm (DLM blocks, trying to contact Node 2)
Node 2: start dlm (DLM begins running on both Nodes)
-Kai

from recovery_design.html

" This is a fine point of the DLM that needs to be explicitly explained.
When a node starts up, the DLM expects to receive directory information
from
some node already in the cluster. If the node is alone in the cluster,
it
will decided that there is no state available, and will move on. This
is
the decision made here when the CLM_DO_NODE_INIT message is processed.
The
DLM therefore has very specific expectations of the cluster manager when
multiple nodes are booted to form the first cluster. The expectation is
that the cluster mananger will only allow one node to join the cluster
at a
time. This provides a guarantee that one of the nodes will see that it
is
alone in the (initial) cluster, and it will be able to make progress. "

I guess dlm expect to receive the directory information from an already
existing node. So if we start dlm in a already existing cluster say of
node1 and node2 ( let's say we start dlm on node1 ) Now for dlm on node1
the up node list will include node1 and node2 so it will try to receive
the directory information from node2 , whereas the dlm is not yet
started on node2.This will keep node1 in *INIT state. To avoid
follow the following step,

1) start cluster in node1
2) start dlm on node1
3) start cluster on node2
4) start dlm on node2.

-aneesh