aboutsummaryrefslogtreecommitdiffstats
path: root/core/bcopyxx.inc
blob: aacf5a6dc4b0525dd974c8ab00bab127c9ecb694 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
;; -----------------------------------------------------------------------
;;
;;   Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
;;
;;   This program is free software; you can redistribute it and/or modify
;;   it under the terms of the GNU General Public License as published by
;;   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
;;   Boston MA 02111-1307, USA; either version 2 of the License, or
;;   (at your option) any later version; incorporated herein by reference.
;;
;; -----------------------------------------------------------------------

;;
;; bcopy32xx.inc
;;


;
; 32-bit bcopy routine
;
; This is the actual 32-bit portion of the bcopy and shuffle and boot
; routines.  ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the
; sole exception being the actual relocation code at the beginning of
; pm_shuffle_boot.
;
; It also really needs to live all in a single segment, for the
; address calculcations to actually work.
;

		bits 32
		section .bcopyxx
bcopyxx_start	equ $
;
; pm_bcopy:
;
;	This is the protected-mode core of the "bcopy" routine.
;	Try to do aligned transfers; if the src and dst are relatively
;	misaligned, align the dst.
;
;	ECX is guaranteed to not be zero on entry.
;
	
pm_bcopy:
		cmp esi,-1
		je .bzero

		cmp esi,edi		; If source < destination, we might
		jb .reverse		; have to copy backwards

.forward:
		; Initial alignment
		mov dx,di
		shr dx,1
		jnc .faa1
		a32 movsb
		dec ecx
.faa1:
		mov al,cl
		cmp ecx,2
		jb .f_tiny

		shr dx,1
		jnc .faa2
		a32 movsw
		sub ecx,2
.faa2:

		; Bulk transfer
		mov al,cl		; Save low bits
		shr ecx,2		; Convert to dwords
		a32 rep movsd		; Do our business
		; At this point ecx == 0

		test al,2
		jz .fab2
		a32 movsw
.fab2:
.f_tiny:
		test al,1
		jz .fab1
		a32 movsb
.fab1:
		ret

.reverse:
		std			; Reverse copy

		lea esi,[esi+ecx-1]	; Point to final byte
		lea edi,[edi+ecx-1]

		; Initial alignment
		mov dx,di
		shr dx,1
		jnc .raa1
		a32 movsb
		dec ecx
.raa1:

		dec esi
		dec edi
		mov al,cl
		cmp ecx,2
		jb .r_tiny
		shr dx,1
		jnc .raa2
		a32 movsw
		sub ecx,2
.raa2:

		; Bulk copy
		sub esi,2
		sub edi,2
		mov al,cl		; Save low bits
		shr ecx,2
		a32 rep movsd

		; Final alignment
.r_final:
		add esi,2
		add edi,2
		test al,2
		jz .rab2
		a32 movsw
.rab2:
.r_tiny:
		inc esi
		inc edi
		test al,1
		jz .rab1
		a32 movsb
.rab1:
		cld
		ret

.bzero:
		xor eax,eax

		; Initial alignment
		mov dx,di
		shr dx,1
		jnc .zaa1
		a32 stosb
		dec ecx
.zaa1:

		mov bl,cl
		cmp ecx,2
		jb .z_tiny
		shr dx,1
		jnc .zaa2
		a32 stosw
		sub ecx,2
.zaa2:

		; Bulk
		mov bl,cl		; Save low bits
		shr ecx,2
		a32 rep stosd

		test bl,2
		jz .zab2
		a32 stosw
.zab2:
.z_tiny:
		test bl,1
		jz .zab1
		a32 stosb
.zab1:
		ret

;
; shuffle_and_boot:
;
; This routine is used to shuffle memory around, followed by
; invoking an entry point somewhere in low memory.  This routine
; can clobber any memory outside the bcopy special area.
;
; IMPORTANT: This routine does not set up any registers.
; It is the responsibility of the caller to generate an appropriate entry
; stub; *especially* when going to real mode.
;
; Inputs:
;	EBX		-> Pointer to list of (dst, src, len) pairs(*)
;	EDX		-> Pointer to safe memory area
;
;     If src == -1: then the memory pointed to by (dst, len) is bzeroed;
;		    this is handled inside the bcopy routine.
;
;     If len == 0:  this marks the end of the list; dst indicates
;		    the entry point and src the mode (0 = pm, 1 = rm)
pm_shuffle:
		mov esi,bcopyxx_start
		mov edi,bcopyxx_end
		cmp edx,esi
		je .safe		; This was too easy
		cmp edx,edi
		jae .at_end		; Safe area >= end

		; Safe area < end; we may have an overlap, so copy
		; ourselves to a safe distance beyond the end...
		mov ecx,bcopyxx_dwords
		lea edi,[esi+ecx*8]
		mov eax,edi
		rep movsd
		mov esi,eax
		jmp .at_end+(8*bcopyxx_dwords)	; Relative jump, is safe

.at_end:
		mov ecx,bcopyxx_dwords
		mov edi,edx
		rep movsd
		lea eax,[edx+.safe-bcopyxx_start]
		jmp eax			; Jump to safe location
.safe:
		; Give ourselves a safe stack
		lea esp,[edx+bcopyxx_stack+bcopyxx_end-bcopyxx_start]
		add edx,bcopy_gdt-bcopyxx_start
		mov [edx+2],edx		; GDT self-pointer
		lgdt [edx]		; Switch to local GDT

		; Now for the actual shuffling...
.loop:
		mov edi,[ebx]
		mov esi,[ebx+4]
		mov ecx,[ebx+8]
		jecxz .done
		call pm_bcopy
		add ebx,12
		jmp .loop
.done:
		and esi,esi
		jnz pm_shuffle_real_mode
		jmp edi			; Protected mode entry

		; We have a real-mode entry point, so we need to return
		; to real mode... 
pm_shuffle_real_mode:
		call .here
.here:		pop eax
		mov ebx,eax
		add eax,.next-.here
		mov [ebx-.here+.rm_entry],edi
		mov [ebx-.here+bcopy_gdt.CS16+2],ax
		shr eax,16
		mov [ebx-.here+bcopy_gdt.CS16+4],al
		mov [ebx-.here+bcopy_gdt.CS16+7],ah
		mov eax,PM_DS16_RM
		mov ds,eax
		mov es,eax
		mov fs,eax
		mov gs,eax
		mov ss,eax
		jmp PM_CS16:0
		bits 16
.next:
		mov eax,cr0
		and al,~1
		mov cr0,eax
		jmp 0:0
.rm_entry	equ $-4

		bits 32

		align	16
; GDT descriptor entry
%macro desc 1
bcopy_gdt.%1:
PM_%1		equ bcopy_gdt.%1-bcopy_gdt
%endmacro

bcopy_gdt:
		dw bcopy_gdt_size-1	; Null descriptor - contains GDT
		dd bcopy_gdt		; pointer for LGDT instruction
		dw 0

	desc CS16
		dd 0000ffffh		; 08h Code segment, use16, readable,
		dd 00009b00h		; present, dpl 0, cover 64K
	desc DS16_4G
		dd 0000ffffh		; 10h Data segment, use16, read/write,
		dd 008f9300h		; present, dpl 0, cover all 4G
	desc DS16_RM
		dd 0000ffffh		; 18h Data segment, use16, read/write,
		dd 00009300h		; present, dpl 0, cover 64K
	desc CS32
		dd 0000ffffh		; 20h Code segment, use32, readable,
		dd 00cf9b00h		; present, dpl 0, cover all 4G
	desc DS32
		dd 0000ffffh		; 28h Data segment, use32, read/write,
		dd 00cf9300h		; present, dpl 0, cover all 4G

		; TSS segment to keep Intel VT happy.  Intel VT is
		; unhappy about anything that doesn't smell like a
		; full-blown 32-bit OS.
	desc TSS
		dw 104-1, DummyTSS	; 30h 32-bit task state segment
		dd 00008900h		; present, dpl 0, 104 bytes @DummyTSS

		; 16-bit stack segment, which may have a different
		; base from DS16 (e.g. if we're booted from PXELINUX)
	desc SS16
		dd 0000ffffh		; 38h Data segment, use16, read/write,
		dd 00009300h		; present, dpl 0, cover 64K
	
bcopy_gdt_size:	equ $-bcopy_gdt

		align 4, db 0
bcopyxx_end	equ $
bcopyxx_len	equ $-bcopyxx_start
bcopyxx_dwords	equ bcopyxx_len >> 2

bcopyxx_stack	equ 128			; We want this much stack
bcopyxx_safe	equ bcopyxx_len + bcopyxx_stack

;
; Space for a dummy task state segment.  It should never be actually
; accessed, but just in case it is, point to a chunk of memory not used
; for anything real.
;
DummyTSS	equ 0x800

		bits 16
		section .text