1. Linux内核源码目录结构
目录名称 | 作用 |
arch | 包含了不同架构相关的代码 |
block | 块设备相关的源代码 |
crypto | 加密算法相关的源代码 |
drivers | 包含了系统中设备驱动的源代码 |
firmware | 一些固件的源代码 |
fs | 系统支持的文件系统相关的源代码 |
include | 内核共用的头文件 |
init | 内核启动的源代码 |
ipc | 进程间通信的源代码 |
kernel | 内核自身核心代码 |
lib | 内核的库代码 |
mm | 内存管理代码 |
net | 网络相关的代码 |
scripts | 配置脚本文件 |
security | |
sound | |
tools | |
usr | |
virt |
2.内核配置和编译
2.1 检查内核主Makefile
(1)检查编译工具链CROSS_COMPILE
(2)检查架构ARCH
2.2 内核编译
(1)make distclean 清除编译结果
(2)make xx_defconfig,用于生成.config文件,配置内核需要.config文件,内核编译根据.config中的内容进行编译,xx_defconfig在内核源码中/arch/arm/configs中有,里面有不同cpu的xx_defconfig文件。
(3)make menuconfig 配置内核
(4)make 编译内核
2.3 内核配置原理
(1).config 内核编译配置文件,内核编译根据.config中的配置项进行编译
(2)Kconfig,内核配置菜单,menuconfig中的菜单项来自于Kconfig,menuconfig中的菜单配置结果保存在.config中,
(a)menu,表示菜单,config表示菜单项
menu "Floating point emulation"comment "At least one emulation must be selected"config FPE_NWFPEbool "NWFPE math emulation"depends on !AEABI || OABI_COMPAT---help---Say Y to include the NWFPE floating point emulator in the kernel.This is necessary to run most binaries. Linux does not currentlysupport floating point hardware so you need to say Y here even ifyour machine has an FPA or floating point co-processor podule.You may say N here if you are going to load the Acorn FPEmulatorearly in the bootup.config FPE_NWFPE_XPbool "Support extended precision"depends on FPE_NWFPEhelpSay Y to include 80-bit support in the kernel floating-pointemulator. Otherwise, only 32 and 64-bit support is compiled in.Note that gcc does not generate 80-bit operations by default,so in most cases this option only enlarges the size of thefloating point emulator without any good reason.You almost surely want to say N here.config FPE_FASTFPEbool "FastFPE math emulation (EXPERIMENTAL)"depends on (!AEABI || OABI_COMPAT) && !CPU_32v3 && EXPERIMENTAL---help---Say Y here to include the FAST floating point emulator in the kernel.This is an experimental much faster emulator which now also has fullprecision for the mantissa. It does not support any exceptions.It is very simple, and approximately 3-6 times faster than NWFPE.It should be sufficient for most programs. It may be not suitablefor scientific calculations, but you have to check this for yourself.If you do not feel you need a faster FP emulation you should betterchoose NWFPE.config VFPbool "VFP-format floating point maths"depends on CPU_V6 || CPU_ARM926T || CPU_V7 || CPU_FEROCEONhelpSay Y to include VFP support code in the kernel. This is neededif your hardware includes a VFP unit.Please see <file:Documentation/arm/VFP/release-notes.txt> forrelease notes and additional status information.Say N if your target does not have VFP hardware.config VFPv3booldepends on VFPdefault y if CPU_V7config NEONbool "Advanced SIMD (NEON) Extension support"depends on VFPv3 && CPU_V7helpSay Y to include support code for NEON, the ARMv7 Advanced SIMDExtension.endmenu
(b)tristate和bool,tristate:表示三种状态(Y,N,M),bool两种状态(Y,N),M表示这部分编译成模块,需要时动态加载到内核中
(c)depends:表示这部分依赖的内容
(d)help:帮助信息
3. linux内核启动分析
3.1 linux启动入口
从linux内核链接脚本中寻找启动入口,链接脚本:/arch/arm/kernel/vmlinux.lds.S
从链接脚本可以找到程序入口在stext这个位置,内核源码中head-nommu.S和head.S都有stext,我这里的内核是使用了MMC,所以启动入口在head.S中
3.2 内核启动汇编阶段
__HEAD
ENTRY(stext)setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode@ and irqs disabledmrc p15, 0, r9, c0, c0 @ get processor idbl __lookup_processor_type @ r5=procinfo r9=cpuidmovs r10, r5 @ invalid processor (r5=0)?beq __error_p @ yes, error 'p'bl __lookup_machine_type @ r5=machinfomovs r8, r5 @ invalid machine (r5=0)?beq __error_a @ yes, error 'a'bl __vet_atagsbl __create_page_tables/** The following calls CPU specific code in a position independent* manner. See arch/arm/mm/proc-*.S for details. r10 = base of* xxx_proc_info structure selected by __lookup_machine_type* above. On return, the CPU will be ready for the MMU to be* turned on, and r0 will hold the CPU control register value.*/ldr r13, __switch_data @ address to jump to after
(1)从cp15协处理器中c0寄存器中读出CPU的ID号,然后用__lookup_processor_type检验ID号是否合法
(2)__lookup_machine_type用来检验机器码
(3)__vet_atags,用来检查uboot给内核传参的格式是否正确
(4)__create_page_tables,建立内存页表
(5)__switch_data,这是一个函数指针数组,在__mmap_switched中b start_kernel跳转到c语言阶段
.align 2.type __switch_data, %object
__switch_data:.long __mmap_switched.long __data_loc @ r4.long _data @ r5.long __bss_start @ r6.long _end @ r7.long processor_id @ r4.long __machine_arch_type @ r5.long __atags_pointer @ r6.long cr_alignment @ r7.long init_thread_union + THREAD_START_SP @ sp/** The following fragment of code is executed with the MMU on in MMU mode,* and uses absolute addresses; this is not position independent.** r0 = cp#15 control register* r1 = machine ID* r2 = atags pointer* r9 = processor ID*/
__mmap_switched:adr r3, __switch_data + 4ldmia r3!, {r4, r5, r6, r7}cmp r4, r5 @ Copy data segment if needed
1: cmpne r5, r6ldrne fp, [r4], #4strne fp, [r5], #4bne 1bmov fp, #0 @ Clear BSS (and zero fp)
1: cmp r6, r7strcc fp, [r6],#4bcc 1bARM( ldmia r3, {r4, r5, r6, r7, sp})THUMB( ldmia r3, {r4, r5, r6, r7} )THUMB( ldr sp, [r3, #16] )str r9, [r4] @ Save processor IDstr r1, [r5] @ Save machine typestr r2, [r6] @ Save atags pointerbic r4, r0, #CR_A @ Clear 'A' bitstmia r7, {r0, r4} @ Save control register valuesb start_kernel
ENDPROC(__mmap_switched)
3.3 内核启动c语言阶段之start_kernel
asmlinkage void __init start_kernel(void)
{char * command_line;extern struct kernel_param __start___param[], __stop___param[];smp_setup_processor_id();/** Need to run as early as possible, to initialize the* lockdep hash:*/lockdep_init();debug_objects_early_init();/** Set up the the initial canary ASAP:*/boot_init_stack_canary();cgroup_init_early();local_irq_disable();early_boot_irqs_off();early_init_irq_lock_class();/** Interrupts are still disabled. Do necessary setups, then* enable them*/lock_kernel();tick_init();boot_cpu_init();page_address_init();printk(KERN_NOTICE "%s", linux_banner);setup_arch(&command_line);mm_init_owner(&init_mm, &init_task);setup_command_line(command_line);setup_nr_cpu_ids();setup_per_cpu_areas();smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */build_all_zonelists(NULL);page_alloc_init();printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);parse_early_param();parse_args("Booting kernel", static_command_line, __start___param,__stop___param - __start___param,&unknown_bootoption);/** These use large bootmem allocations and must precede* kmem_cache_init()*/pidhash_init();vfs_caches_init_early();sort_main_extable();trap_init();mm_init();/** Set up the scheduler prior starting any interrupts (such as the* timer interrupt). Full topology setup happens at smp_init()* time - but meanwhile we still have a functioning scheduler.*/sched_init();/** Disable preemption - early bootup scheduling is extremely* fragile until we cpu_idle() for the first time.*/preempt_disable();if (!irqs_disabled()) {printk(KERN_WARNING "start_kernel(): bug: interrupts were ""enabled *very* early, fixing it\n");local_irq_disable();}rcu_init();radix_tree_init();/* init some links before init_ISA_irqs() */early_irq_init();init_IRQ();prio_tree_init();init_timers();hrtimers_init();softirq_init();timekeeping_init();time_init();profile_init();if (!irqs_disabled())printk(KERN_CRIT "start_kernel(): bug: interrupts were ""enabled early\n");early_boot_irqs_on();local_irq_enable();/* Interrupts are enabled now so all GFP allocations are safe. */gfp_allowed_mask = __GFP_BITS_MASK;kmem_cache_init_late();/** HACK ALERT! This is early. We're enabling the console before* we've done PCI setups etc, and console_init() must be aware of* this. But we do want output early, in case something goes wrong.*/console_init();if (panic_later)panic(panic_later, panic_param);lockdep_info();/** Need to run this when irqs are enabled, because it wants* to self-test [hard/soft]-irqs on/off lock inversion bugs* too:*/locking_selftest();#ifdef CONFIG_BLK_DEV_INITRDif (initrd_start && !initrd_below_start_ok &&page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - ""disabling it.\n",page_to_pfn(virt_to_page((void *)initrd_start)),min_low_pfn);initrd_start = 0;}
#endifpage_cgroup_init();enable_debug_pagealloc();kmemtrace_init();kmemleak_init();debug_objects_mem_init();idr_init_cache();setup_per_cpu_pageset();numa_policy_init();if (late_time_init)late_time_init();sched_clock_init();calibrate_delay();pidmap_init();anon_vma_init();
#ifdef CONFIG_X86if (efi_enabled)efi_enter_virtual_mode();
#endifthread_info_cache_init();cred_init();fork_init(totalram_pages);proc_caches_init();buffer_init();key_init();security_init();dbg_late_init();vfs_caches_init(totalram_pages);signals_init();/* rootfs populating might need page-writeback */page_writeback_init();
#ifdef CONFIG_PROC_FSproc_root_init();
#endifcgroup_init();cpuset_init();taskstats_init_early();delayacct_init();check_bugs();acpi_early_init(); /* before LAPIC and SMP init */sfi_init_late();ftrace_init();/* Do the rest non-__init'ed, we're now alive */rest_init();
}
(1)smp_setup_processor_id,多处理器相关的
(2)lockdep_init,处理内核自旋锁死锁相关的
(3)cgroup_init_early,内核提供的进程组机制
(4)setup_arch(&command_line),确定内核当前的机器码
void __init setup_arch(char **cmdline_p)
{struct tag *tags = (struct tag *)&init_tags;struct machine_desc *mdesc;char *from = default_command_line;unwind_init();setup_processor();mdesc = setup_machine(machine_arch_type);machine_name = mdesc->name;if (mdesc->soft_reboot)reboot_setup("s");if (__atags_pointer){tags = phys_to_virt(__atags_pointer);printk("@@@@@@@ atags_pointer not null\n");}else if (mdesc->boot_params){tags = phys_to_virt(mdesc->boot_params);printk("@@@@@@@ boot params not null\n");}printk("@@@@@@@linter#####boot_params:%p,mdesc->boot_params:%p\n",tags);/** If we have the old style parameters, convert them to* a tag list.*/if (tags->hdr.tag != ATAG_CORE)convert_to_tag_list(tags);if (tags->hdr.tag != ATAG_CORE)tags = (struct tag *)&init_tags;if (mdesc->fixup)mdesc->fixup(mdesc, tags, &from, &meminfo);if (tags->hdr.tag == ATAG_CORE) {if (meminfo.nr_banks != 0)squash_mem_tags(tags);save_atags(tags);parse_tags(tags);}init_mm.start_code = (unsigned long) _text;init_mm.end_code = (unsigned long) _etext;init_mm.end_data = (unsigned long) _edata;init_mm.brk = (unsigned long) _end;/* parse_early_param needs a boot_command_line */strlcpy(boot_command_line, from, COMMAND_LINE_SIZE);/* populate cmd_line too for later use, preserving boot_command_line */strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE);*cmdline_p = cmd_line;printk("$$$$$$$$$cmdline:%s\n",cmd_line);parse_early_param();paging_init(mdesc);request_standard_resources(&meminfo, mdesc);#ifdef CONFIG_SMPsmp_init_cpus();
#endifcpu_init();tcm_init();/** Set up various architecture-specific pointers*/init_arch_irq = mdesc->init_irq;system_timer = mdesc->timer;init_machine = mdesc->init_machine;#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)conswitchp = &dummy_con;
#endif
#endifearly_trap_init();
}
(5)rest_init,启动了两个内核进程(kernel_init,kthreadd),然后在函数schedule启动内核的调度系统,cpu_idle是内核的空闲进程。cpu_idle是内核进程0,空闲进程;kernel_init是进程1,也叫init进程;kthreadd是进程2,也就是守护进程。
static noinline void __init_refok rest_init(void)__releases(kernel_lock)
{int pid;rcu_scheduler_starting();/** We need to spawn init first so that it obtains pid 1, however* the init task will end up wanting to create kthreads, which, if* we schedule it before we create kthreadd, will OOPS.*/kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);numa_default_policy();pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);rcu_read_lock();kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);rcu_read_unlock();complete(&kthreadd_done);unlock_kernel();/** The boot idle thread must execute schedule()* at least once to get things moving:*/init_idle_bootup_task(current);preempt_enable_no_resched();schedule();preempt_disable();/* Call into cpu_idle with preempt disabled */cpu_idle();
}
init进程
linux系统中每个进程都有自己的文件描述符,在init进程中打开了控制台,并进行了2次文件描述符复制,一共得到了3个文件描述符,分别是0,1,2,对应标准输入、标准输出、标准错误。由进程1衍生的进程默认都有这三个描述符。
init进程中调用prepare_namespace函数来挂载根文件系统
static int __init kernel_init(void * unused)
{/** Wait until kthreadd is all set-up.*/wait_for_completion(&kthreadd_done);lock_kernel();/** init can allocate pages on any node*/set_mems_allowed(node_states[N_HIGH_MEMORY]);/** init can run on any cpu.*/set_cpus_allowed_ptr(current, cpu_all_mask);/** Tell the world that we're going to be the grim* reaper of innocent orphaned children.** We don't want people to have to make incorrect* assumptions about where in the task array this* can be found.*/init_pid_ns.child_reaper = current;cad_pid = task_pid(current);smp_prepare_cpus(setup_max_cpus);do_pre_smp_initcalls();start_boot_trace();smp_init();sched_init_smp();do_basic_setup();/* Open the /dev/console on the rootfs, this should never fail */if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)printk(KERN_WARNING "Warning: unable to open an initial console.\n");(void) sys_dup(0);(void) sys_dup(0);/** check if there is an early userspace init. If yes, let it do all* the work*/if (!ramdisk_execute_command)ramdisk_execute_command = "/init";if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {ramdisk_execute_command = NULL;prepare_namespace();}/** Ok, we have completed the initial bootup, and* we're essentially up and running. Get rid of the* initmem segments and start the user-mode stuff..*/init_post();return 0;
}