Foliensatz 9
v1.0, 02.12.2013
|
⟨constants⟩≡
#define MAX_ADDR_SPACES 1024 |
|
⟨elementary type definitions⟩≡
typedef unsigned int addr_space_id; |
|
⟨more TCB entries⟩≡
addr_space_id addr_space; // memory usage |
Speicher-Layout für Prozesse:
|
⟨constants⟩+≡
#define BINARY_LOAD_ADDRESS 0x0 #define TOP_OF_USER_MODE_STACK 0xb0000000 #define TOP_OF_KERNEL_MODE_STACK 0xc0000000 |
⟨type definitions⟩≡
typedef struct { void *pd; // pointer to the page directory int pid; // process ID (if used by a process; -1 if not) short status; // are we using this address space? unsigned int memstart; // first address below 0xc000.0000 unsigned int memend; // last address below 0xc000.0000 unsigned int stacksize; // size of user mode stack unsigned int kstack_pt; // stack page table (for kernel stack) unsigned int refcount; // how many threads use this address space? } address_space; |
|
⟨constants⟩+≡
#define AS_FREE 0 #define AS_USED 1 #define AS_DELETE 2 |
|
⟨global variables⟩≡
address_space address_spaces[MAX_ADDR_SPACES] = ↩ …{ 0 }; |
|
⟨function prototypes⟩≡
int get_free_address_space (); |
⟨function implementations⟩≡
int get_free_address_space () { addr_space_id id = 0; while ((address_spaces[id].status != AS_FREE) && (id if (id==MAX_ADDR_SPACES) id = -1; return id; } |
|
⟨enable paging for the kernel⟩≡
address_spaces[0].status = AS_USED; address_spaces[0].pd = &kernel_pd; address_spaces[0].pid = -1; // not ↩ …a process |
|
⟨function prototypes⟩+≡
int create_new_address_space (int initial_ram, i↩ …nt initial_stack); |
⟨macro definitions⟩≡
#define MAKE_MULTIPLE_OF_PAGESIZE(x) x = ((x+PAGE_SIZE-1)/PAGE_SIZE)*PAGE_SIZE |
(Einfaches Makro, das Größenangaben so anpasst, dass sie immer ein Vielfaches der Seitengröße sind)
⟨function implementations⟩+≡
int create_new_address_space (int initial_ram, int initial_stack) { MAKE_MULTIPLE_OF_PAGESIZE (initial_ram); MAKE_MULTIPLE_OF_PAGESIZE (initial_stack); // reserve address space table entry addr_space_id id; if ( (id = get_free_address_space()) == -1 ) return -1; // fail address_spaces[id].status = AS_USED; address_spaces[id].memstart = BINARY_LOAD_ADDRESS; address_spaces[id].memend = BINARY_LOAD_ADDRESS + initial_ram; address_spaces[id].stacksize = initial_stack; address_spaces[id].refcount = 1; // default: used by one process ⟨reserve memory for new page directory⟩ // sets new_pd address_spaces[id].pd = new_pd; ⟨copy master page directory to new directory⟩ int frameno, pageno; // used in the following two code chunks if (initial_ram > 0) { ⟨create initial user mode memory⟩ } if (initial_stack > 0) { ⟨create initial user mode stack⟩ } return id; }; |
⟨reserve memory for new page directory⟩≡
page_directory *new_pd = (void*)request_new_page (); if (new_pd == NULL) { // Error printf ("\nERROR: no free page, aborting create_new_address_space\n"); return -1; }; memset (new_pd, 0, sizeof(page_directory)); |
⟨copy master page directory to new directory⟩≡
*new_pd = kernel_pd; memset ((char*)new_pd, 0, 4); // clear first entry (kernel pd contains // old reference to some page table) |
|
⟨function prototypes⟩+≡
int as_map_page_to_frame (int as, unsigned int p↩ …ageno, unsigned int frameno); |
|
⟨create initial user mode memory⟩≡
pageno = 0; while (initial_ram > 0) { if ((frameno = request_new_frame ()) < 0) { printf ("\nERROR: no free frame, aborting cr↩ …eate_new_address_space\n"); return -1; }; as_map_page_to_frame (id, pageno, frameno); pageno++; initial_ram -= PAGE_SIZE; }; |
|
⟨create initial user mode stack⟩≡
pageno = TOP_OF_USER_MODE_STACK / PAGE_SIZE; while (initial_stack > 0) { if ((frameno = request_new_frame ()) < 0) { printf ("\nERROR: no free frame, aborting cr↩ …eate_new_address_space\n"); return -1; }; pageno--; as_map_page_to_frame (id, pageno, frameno); initial_stack -= PAGE_SIZE; } |
⟨function implementations⟩+≡
int as_map_page_to_frame (int as, unsigned int pageno, unsigned int frameno) { // for address space as, map page #pageno to frame #frameno page_table* pt; page_directory* pd; pd = address_spaces[as].pd; // use the right address space unsigned int pdindex = pageno/1024; // calculuate pd entry unsigned int ptindex = pageno%1024; // ... and pt entry if ( ! pd->ptds[pdindex].present ) { // page table is not present ⟨create new page table for this address space⟩ // sets pt } else { // get the page table pt = (page_table*) PHYSICAL(pd->ptds[pdindex].frame_addr << 12); }; if (pdindex < 704) // address below 0xb0000000 -> user access UMAP ( &(pt->pds[ptindex]), frameno << 12 ); else KMAP ( &(pt->pds[ptindex]), frameno << 12 ); return 0; }; |
⟨create new page table for this address space⟩≡
int new_frame_id = request_new_frame (); unsigned int address = PHYSICAL (new_frame_id << 12); pt = (page_table *) address; memset (pt, 0, sizeof(page_table)); UMAPD ( &(pd->ptds[pdindex]), new_frame_id << 12); |
|
⟨function prototypes⟩+≡
void destroy_address_space (addr_space_id id); |
⟨function implementations⟩+≡
void destroy_address_space (addr_space_id id) { // called only from syscall_exit(), holding thread_list_lock, interrupts off if (--address_spaces[id].refcount > 0) return; addr_space_id as = current_as; // remember current address space current_as = id; // set current_as: we call release_page() ⟨destroy AS: release user mode pages⟩ // all pages used by the process ⟨destroy AS: release user mode stack⟩ // all its user mode stack pages ⟨destroy AS: release page tables⟩ // the page tables (0..703) current_as = as; // restore current_as address_spaces[id].status = AS_DELETE; // change AS status // remove kernel stack (cannot do this here, this stack is in use right now) add_to_kstack_delete_list (id); return; } |
⟨destroy AS: release user mode pages⟩≡
for ( int i = address_spaces[id].memstart / PAGE_SIZE; i < address_spaces[id].memend / PAGE_SIZE; i++ ) { release_page (i); }; |
⟨destroy AS: release user mode stack⟩≡
for ( int i = TOP_OF_USER_MODE_STACK / PAGE_SIZE - 1; i > (TOP_OF_USER_MODE_STACK-address_spaces[id].stacksize) / PAGE_SIZE - 1; i-- ) { release_page (i); }; |
⟨destroy AS: release page tables⟩≡
page_directory *tmp_pd = address_spaces[id].pd; for ( int i = 0; i < 704; i++ ) { if ( tmp_pd->ptds[i].present ) release_frame ( tmp_pd->ptds[i].frame_addr ); } |
|
⟨function prototypes⟩+≡
void add_to_kstack_delete_list (addr_space_id id↩ …); |
|
⟨constants⟩+≡
#define KSTACK_DELETE_LIST_SIZE 1024 |
|
⟨global variables⟩+≡
addr_space_id kstack_delete_list[KSTACK_DELETE_L↩ …IST_SIZE] = { 0 }; lock kstack_delete_list_lock; |
|
⟨initialize kernel global variables⟩≡
kstack_delete_list_lock = get_new_lock ("kstack"↩ …); |
⟨function implementations⟩+≡
void add_to_kstack_delete_list (addr_space_id id) { int i; LOCK (kstack_delete_list_lock); for (i = 0; i < KSTACK_DELETE_LIST_SIZE; i++) { // try to enter it here if (kstack_delete_list[i] == 0) { // found a free entry kstack_delete_list[i] = id; break; } } UNLOCK (kstack_delete_list_lock); if (i == KSTACK_DELETE_LIST_SIZE) printf ("ERROR ADDING ADDRESS SPACE TO KSTACK DELETE LIST!\n"); } |
⟨scheduler: free old kernel stacks⟩≡
// check all entries in the to-be-freed list int i, entry, frameno; page_directory *tmp_pd; page_table *tmp_pt; LOCK (kstack_delete_list_lock); for (entry=0; entry < KSTACK_DELETE_LIST_SIZE; entry++) { if (kstack_delete_list[entry] != 0 && kstack_delete_list[entry] != current_as) { // remove it addr_space_id id = kstack_delete_list[entry]; tmp_pd = address_spaces[id].pd; tmp_pt = (page_table *) address_spaces[id].kstack_pt; // this is the page table which maps the last 4 MB below 0xC0000000 for (i=0; i < KERNEL_STACK_PAGES; i++) { frameno = tmp_pt->pds[1023-i].frame_addr; release_frame (frameno); } kstack_delete_list[entry] = 0; // remove entry from kstack delete list release_page (((unsigned int)tmp_pt) >> 12); // free memory for page table release_page (((unsigned int)tmp_pd) >> 12); // ... and page directory address_spaces[id].status = AS_FREE; // mark address space as free } } UNLOCK (kstack_delete_list_lock); |
|
⟨constants⟩+≡
// kernel stack (per process): 1 page = 4 KByte #define KERNEL_STACK_PAGES 4 #define KERNEL_STACK_SIZE PAGE_SIZE * KERNEL_STA↩ …CK_PAGES |
|
⟨function prototypes⟩+≡
inline void activate_address_space (addr_space_i↩ …d id) __attribute__((always_inline)); |
|
⟨global variables⟩+≡
addr_space_id current_as = 0; // global variabl↩ …e: current address space addr_space_id tmp_as; // temp. address ↩ …space variable, for context switch |
⟨function implementations⟩+≡
inline void activate_address_space (addr_space_id id) { // NOTE: Do not call this from the scheduler; where needed, replicate the code unsigned int virt = (unsigned int)address_spaces[id].pd; // get PD address unsigned int phys = mmu (0, virt); // and find its physical address asm volatile ("mov %0, %%cr3" : : "r"(phys)); // write CR3 register current_as = id; // set current address space current_pd = address_spaces[id].pd; // set current page directory return; }; |
Adressübersetzung
|
⟨function prototypes⟩+≡
unsigned int mmu_p (addr_space_id id, unsigned i↩ …nt pageno); // pageno -> frameno unsigned int mmu (addr_space_id id, unsigned int↩ … vaddress); // virt. -> phys. addr. |
⟨function implementations⟩+≡
unsigned int mmu_p (addr_space_id id, unsigned int pageno) { unsigned int pdindex, ptindex; page_directory *pd; page_table *pt; pdindex = pageno/1024; ptindex = pageno%1024; pd = address_spaces[id].pd; if ( ! pd->ptds[pdindex].present ) { return -1; } else { pt = (page_table*) PHYSICAL(pd->ptds[pdindex].frame_addr << 12); if ( pt->pds[ptindex].present ) { return pt->pds[ptindex].frame_addr; } else { return -1; }; } }; |
|
⟨function implementations⟩+≡
unsigned int mmu (addr_space_id id, unsigned int↩ … vaddress) { unsigned int tmp = mmu_p (id, (vaddress >> 12)↩ …); if (tmp == -1) return -1; // fail else return (tmp << 12) + (vaddress % PAGE_SIZE); } |
|
⟨function prototypes⟩+≡
void *u_sbrk (int incr); |
⟨function implementations⟩+≡
void *u_sbrk (int incr) { int pages = incr / PAGE_SIZE; int i, frame; address_space *aspace = &address_spaces[current_as]; unsigned int oldbrk = aspace->memend; for (i=0; i < pages; i++) { frame = request_new_frame (); if (frame == -1) { return (void*)(-1); } // error! as_map_page_to_frame (current_as, aspace->memend/PAGE_SIZE, frame); aspace->memend += PAGE_SIZE; } return (void*) oldbrk; } |
|
⟨syscall prototypes⟩≡
void syscall_sbrk (context_t *r); |
⟨syscall functions⟩≡
void syscall_sbrk (context_t *r) { // ebx: increment r->eax = (unsigned int)u_sbrk (r->ebx); return; } |
|
⟨initialize syscalls⟩≡
install_syscall_handler (__NR_brk, syscall_sbrk)↩ …; |
⟨type definitions⟩+≡
typedef struct { thread_id tid; // thread id thread_id ppid; // parent process int state; // state of the process context_t regs; // context unsigned int esp0; // kernel stack pointer unsigned int eip; // program counter unsigned int ebp; // base pointer ⟨more TCB entries⟩ } TCB; |
|
⟨constants⟩+≡
#define MAX_THREADS 1024 |
|
⟨global variables⟩+≡
TCB thread_table[MAX_THREADS]; |
|
⟨more TCB entries⟩+≡
thread_id next; // id of the ``next'' thread thread_id prev; // id of the ``previous'' thread |
(Bild: Felix Freiling)
|
⟨declaration of blocked queue⟩≡
typedef struct { thread_id next; // id of the ``next'' thread thread_id prev; // id of the ``previous'' thre↩ …ad } blocked_queue; |
⟨function implementations⟩+≡
void initialize_blocked_queue (blocked_queue* q) { q->prev = 0; q->next = 0; } |
|
⟨function prototypes⟩+≡
void add_to_ready_queue (thread_id t); void remove_from_ready_queue (thread_id t); |
⟨function implementations⟩+≡
void add_to_ready_queue (thread_id t) { thread_id last = thread_table[0].prev; thread_table[0].prev = t; thread_table[t].next = 0; thread_table[t].prev = last; thread_table[last].next = t; thread_table[t].state = TSTATE_READY; // set its state to ready } |
⟨function implementations⟩+≡
void remove_from_ready_queue (thread_id t) { thread_id prev_thread = thread_table[t].prev; thread_id next_thread = thread_table[t].next; thread_table[prev_thread].next = next_thread; thread_table[next_thread].prev = prev_thread; } |
|
⟨initialize kernel global variables⟩+≡
thread_table[0].prev = 0; thread_table[0].next = 0; |
⟨function prototypes⟩+≡
void add_to_blocked_queue (thread_id t, blocked_queue* bq); void remove_from_blocked_queue (thread_id t, blocked_queue* bq); thread_id front_of_blocked_queue (blocked_queue bq); |
⟨function implementations⟩+≡
thread_id front_of_blocked_queue (blocked_queue bq) { return bq.next; } |
⟨function implementations⟩+≡
void add_to_blocked_queue (thread_id t, blocked_queue* bq) { thread_id last = bq->prev; bq->prev = t; thread_table[t].next = 0; // [[t]] is ``last'' thread thread_table[t].prev = last; if (last == 0) { bq->next = t; } else { thread_table[last].next = t; } } |
⟨function implementations⟩+≡
void remove_from_blocked_queue (thread_id t, blocked_queue* bq) { thread_id prev_thread = thread_table[t].prev; thread_id next_thread = thread_table[t].next; if (prev_thread == 0) { bq->next = next_thread; } else { thread_table[prev_thread].next = next_thread; } if (next_thread == 0) { bq->prev = prev_thread; } else { thread_table[next_thread].prev = prev_thread; } } |
|
⟨more TCB entries⟩+≡
boolean used; |
|
⟨global variables⟩+≡
int next_pid = 1; |
⟨find free TCB entry⟩≡
boolean tcbfound = false; int tcbid; for ( tcbid=next_pid; ((tcbid < MAX_THREADS) && (!tcbfound)); tcbid++ ) { if (thread_table[tcbid].used == false) { tcbfound = true; break; } }; |
⟨find free TCB entry⟩+≡
if (!tcbfound) { // continue searching at 1 for ( tcbid=1; ((tcbid < next_pid) && (!tcbfound)); tcbid++ ) { if (thread_table[tcbid].used == false) { tcbfound = true; break; } }; }; if (tcbfound) next_pid = tcbid+1; // update next_pid: // either tcbfound == false or tcbid == index of first free TCB |
|
⟨function prototypes⟩+≡
int register_new_tcb (addr_space_id as_id); |
⟨function implementations⟩+≡
int register_new_tcb (addr_space_id as_id) { // called by ulix_fork() which aquires LOCK (thread_list_lock) ⟨find free TCB entry⟩ if (!tcbfound) { return -1; // no free TCB! }; thread_table[tcbid].used = true; // mark as used thread_table[tcbid].addr_space = as_id; // enter address space ID return tcbid; } |
Schritte:
|
⟨global variables⟩+≡
volatile int current_task; |
|
⟨constants⟩+≡
#define PROGSIZE 32768 |
|
⟨global variables⟩+≡
lock thread_list_lock = NULL; // initialize ↩ …this when the first process starts |
⟨function implementations⟩+≡
void start_program_from_disk (char *progname) { if (thread_list_lock == NULL) // initialize lock for thread list thread_list_lock = get_new_lock ("thread list"); ⟨start program from disk: prepare address space and TCB entry⟩ ⟨start program from disk: load binary⟩ ⟨start program from disk: create kernel stack⟩ current_task = tid; // make this the current task add_to_ready_queue (tid); // add process to ready queue ENABLE_SCHEDULER; cpu_usermode (BINARY_LOAD_ADDRESS, TOP_OF_USER_MODE_STACK); // jump to user mode }; |
⟨start program from disk: prepare address space and TCB entry⟩≡
addr_space_id as; thread_id tid; as = create_new_address_space(64*1024, 4096); // 64 KB + 4 KB stack tid = register_new_tcb (as); // get a fresh TCB thread_table[tid].tid = tid; thread_table[tid].ppid = 0; // parent: 0 (none) thread_table[tid].new = false; // not freshly created via fork() thread_table[tid].terminal = 0; // default terminal: 0 memcpy (thread_table[tid].cwd, "/", 2); // set current directory memcpy (thread_table[tid].cmdline, "new", 4); // set temporary command line activate_address_space (as); // activate the new address space |
⟨start program from disk: load binary⟩≡
// read binary int mfd = mx_open (DEV_HDA, progname, O_RDONLY); mx_read (DEV_HDA, mfd, (char*)BINARY_LOAD_ADDRESS, PROGSIZE); // load to virtual address 0 mx_close (DEV_HDA, mfd); |
⟨start program from disk: create kernel stack⟩≡
unsigned int framenos[KERNEL_STACK_PAGES]; // frame numbers of kernel stack pages int i; for (i=0; i<KERNEL_STACK_PAGES; i++) framenos[i] = request_new_frame(); page_table* stackpgtable = (page_table*)request_new_page(); memset (stackpgtable, 0, sizeof(page_table)); KMAPD ( ¤t_pd->ptds[767], mmu (0, (unsigned int)stackpgtable) ); for (i=0; i<KERNEL_STACK_PAGES; i++) as_map_page_to_frame (current_as, 0xbffff - i, framenos[i]); char* kstack = (char*) (TOP_OF_KERNEL_MODE_STACK-KERNEL_STACK_SIZE); unsigned int adr = (uint)kstack; // one page for kernel stack tss_entry.esp0 = adr+KERNEL_STACK_SIZE; thread_table[tid].esp0 = (uint)kstack + KERNEL_STACK_SIZE; thread_table[tid].ebp = (uint)kstack + KERNEL_STACK_SIZE; |
|
⟨install GDTs for User Mode⟩≡
fill_gdt_entry (3, 0, 0xFFFFFFFF, 0xFA, 0b1100); fill_gdt_entry (4, 0, 0xFFFFFFFF, 0xF2, 0b1100); |
⟨type definitions⟩+≡
typedef struct { unsigned int prev_tss : 32; // unused: previous TSS unsigned int esp0, ss0 : 32; // ESP and SS to load when we switch to ring 0 long long u1, u2 : 64; // unused: esp1, ss1, esp2, ss2 for rings 1 and 2 unsigned int cr3 : 32; // unused: page directory unsigned int eip, eflags : 32; unsigned int eax, ecx, edx, ebx, esp, ebp, esi, edi, es, cs, ss, ds, fs, gs : 32; // unused (dynamic, filled by CPU) long long u3 : 64; // unused: ldt, trap, iomap } __attribute__((packed)) tss_entry_struct; ⟨global variables⟩+≡ tss_entry_struct tss_entry; |
|
⟨install GDTs for User Mode⟩+≡
write_tss (5, 0x10, 0xc0000000); |
⟨function implementations⟩+≡
void write_tss (int num, unsigned short ss0, unsigned int esp0) { unsigned int base = (unsigned int) &tss_entry; unsigned int limit = sizeof (tss_entry) - 1; fill_gdt_entry (num, base, limit, 0xE9, 0b0000); // enter it in GDT memset (&tss_entry, 0, sizeof(tss_entry)); // fill with zeros tss_entry.ss0 = ss0; // Set the kernel stack segment. tss_entry.esp0 = esp0; // Set the kernel stack pointer. } |
|
⟨start.asm⟩≡
[section .text] global tss_flush tss_flush: mov ax, 0x28 | 0x03 ltr ax ; load the tas↩ …k register ret |
|
⟨function prototypes⟩+≡
extern void cpu_usermode (unsigned int address, ↩ …unsigned int stack); // assembler |
(Bild: http://www.cs.cmu.edu/~410/doc/segments/segments.html)
⟨start.asm⟩+≡
; code from http://f.osdev.org/viewtopic.php?t=23890&p=194213 (Jezze) ; modified and comments added global cpu_usermode cpu_usermode: cli ; disable interrupts mov ebp, esp ; remember current stack address mov ax, 0x20 | 0x03 ; code selector 0x20 | RPL3: 0x03 ; RPL = requested protection level mov ds, ax mov es, ax mov fs, ax mov gs, ax mov eax, esp push 0x20 | 0x03 ; code selector 0x20 | RPL3: 0x03 mov eax, [ebp + 8] ; stack address is 2nd argument push eax ; stack pointer pushf ; EFLAGS pop eax ; trick: reenable interrupts when doing iret or eax, 0x200 push eax push 0x18 | 0x03 ; code selector 0x18 | RPL3: 0x03 mov eax, [ebp + 4] ; return address (1st argument) for iret push eax iret |