Understanding Process ID Management in Linux Kernel
Core Data Structures for Process Identification
The Linux kernel employs several key data structures to manage process identifiers across hierarcihcal namespaces:
/* PID namespace management */
struct pid_namespace {
struct pidmap pidmap[PIDMAP_ENTRIES];
struct kmem_cache *pid_cachep;
unsigned int level;
struct pid_namespace *parent;
};
/* Namespace proxy containing PID references */
struct nsproxy {
struct pid_namespace *pid_ns_for_children;
};
/* PID types for different process groupings */
enum pid_classification {
PIDTYPE_PID,
PIDTYPE_PGID,
PIDTYPE_SID,
PIDTYPE_MAX,
__PIDTYPE_TGID
};
/* Per-namespace PID representation */
struct upid {
int nr;
struct pid_namespace *ns;
struct hlist_node pid_chain;
};
/* Main PID structure spanning namespace levels */
struct pid {
unsigned int level;
struct hlist_head tasks[PIDTYPE_MAX];
struct upid numbers[1];
};
/* Link between processes and their PIDs */
struct pid_link {
struct hlist_node node;
struct pid *pid;
};
/* Process control block with PID links */
struct task_struct {
struct task_struct *group_leader;
struct nsproxy *nsproxy;
struct pid_link pids[PIDTYPE_MAX];
};
Initialization of the First Process
The initial kernel process is statical configured during boot:
/* Static initialization of the first task */
struct task_struct init_task = INIT_TASK(init_task);
#define INIT_TASK(tsk) \
{
.group_leader = &tsk,
.nsproxy = &init_nsproxy,
.pids = {
[PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID),
[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),
[PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID),
}
}
/* Initial PID namespace configuration */
struct pid_namespace init_pid_ns = {
.level = 0,
.child_reaper = &init_task,
};
/* Static PID structure for init */
struct pid init_struct_pid = {
.level = 0,
.numbers = {{
.nr = 0,
.ns = &init_pid_ns,
}}
};
Creating Subsequent Processes
New processes are created through fork operations with dynamic PID allocation:
/* Process creation entry point */
long _do_fork(unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr,
unsigned long tls)
{
struct task_struct *p;
struct pid *process_id;
long nr;
p = copy_process(clone_flags, stack_start, stack_size,
child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
if (!IS_ERR(p)) {
process_id = get_task_pid(p, PIDTYPE_PID);
nr = pid_vnr(process_id);
put_pid(process_id);
} else {
nr = PTR_ERR(p);
}
return nr;
}
/* Process duplication implementation */
static struct task_struct *copy_process(
unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *child_tidptr,
struct pid *pid,
int trace,
unsigned long tls,
int node)
{
struct task_struct *new_task;
new_task = dup_task_struct(current, node);
if (pid != &init_struct_pid) {
pid = alloc_pid(new_task->nsproxy->pid_ns_for_children);
}
new_task->pid = pid_nr(pid);
new_task->group_leader = new_task;
new_task->tgid = new_task->pid;
if (likely(new_task->pid)) {
init_task_pid(new_task, PIDTYPE_PID, pid);
if (thread_group_leader(new_task)) {
init_task_pid(new_task, PIDTYPE_PGID, task_pgrp(current));
init_task_pid(new_task, PIDTYPE_SID, task_session(current));
attach_pid(new_task, PIDTYPE_PGID);
attach_pid(new_task, PIDTYPE_SID);
}
attach_pid(new_task, PIDTYPE_PID);
}
return new_task;
}
PID Allocation and Management
Dynamic PID assignment involves namespace-aware allocation:
/* Allocate new PID structure */
struct pid *alloc_pid(struct pid_namespace *ns)
{
struct pid *pid_entry;
enum pid_type type;
int i, nr;
struct pid_namespace *temp_ns;
struct upid *upid_ptr;
pid_entry = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
temp_ns = ns;
pid_entry->level = ns->level;
for (i = ns->level; i >= 0; i--) {
nr = alloc_pidmap(temp_ns);
pid_entry->numbers[i].nr = nr;
pid_entry->numbers[i].ns = temp_ns;
temp_ns = temp_ns->parent;
}
for (type = 0; type < PIDTYPE_MAX; ++type)
INIT_HLIST_HEAD(&pid_entry->tasks[type]);
upid_ptr = pid_entry->numbers + ns->level;
spin_lock_irq(&pidmap_lock);
for (; upid_ptr >= pid_entry->numbers; --upid_ptr) {
hlist_add_head_rcu(&upid_ptr->pid_chain,
&pid_hash[pid_hashfn(upid_ptr->nr, upid_ptr->ns)]);
upid_ptr->ns->nr_hashed++;
}
spin_unlock_irq(&pidmap_lock);
return pid_entry;
}
/* Establish bidirectional process-PID relationship */
static inline void
init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
{
task->pids[type].pid = pid;
}
void attach_pid(struct task_struct *task, enum pid_type type)
{
struct pid_link *link = &task->pids[type];
hlist_add_head_rcu(&link->node, &link->pid->tasks[type]);
}
Namespace Hierarchy Creation
New PID namespaces are established through system calls:
/* Create new PID namespace */
static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
struct pid_namespace *parent_ns)
{
struct pid_namespace *new_ns;
unsigned int level = parent_ns->level + 1;
new_ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
new_ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
new_ns->pid_cachep = create_pid_cachep(level + 1);
new_ns->level = level;
new_ns->parent = get_pid_ns(parent_ns);
return new_ns;
}
/* Configure PID cache for namespace level */
static struct kmem_cache *create_pid_cachep(int nr_ids)
{
struct pid_cache *pcache;
struct kmem_cache *cachep;
pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
cachep = kmem_cache_create(pcache->name,
sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
0, SLAB_HWCACHE_ALIGN, NULL);
return cachep;
}
User Space Interfaces
Standard library functions provide access to process identifires:
#include <unistd.h>
/* Core PID retrieval functions */
pid_t getpid(void); // Thread group leader ID
pid_t getppid(void); // Parent process ID
pid_t gettid(void); // Current thread ID
/* Session and group management */
pid_t getsid(pid_t pid); // Session ID
int setpgid(pid_t pid, pid_t pgid); // Set process group
pid_t getpgid(pid_t pid); // Get process group