From the Archives of Endless Skye
JMP2PM
I originally wrote this in '96 to access a memory-mapped i/o register
that was inaccessibe in real mode. If the segment limit size wasn't
reset on the return to real mode, it was possible to address more
than 64K of memory. This quirk was utilized to access memory beyond
the reach of real mode on processors of the time.
Source and Build Files
Contact the author
Back to Endless Skye
;*******************************************************************************
;
; DEVREG.ASM: Jumps to protected mode to read or write a 32-bit
; value from or to any physical memory address and
; then returns to real mode. See notes.
;
; Notes:
;
; There is no checking done to see if we are running in a Virtual
; DOS machine or in protected mode. In either case the results
; are unpredictable, but most likely the system will crash. Adding
; these checks are not a big problem if they are needed.
;
; This is a minimalist switcher: there is no IDT or LDT, and only four
; entries in the GDT. We turn off interrupts, except the NMI and SMI,
; before we jmp to protected mode and keep them off until we switch back
; to real mode. If a s/w fault or exception occurs, the machine will lock
; ignominiously. Paging is disabled. The three entries in the GDT are:
; the required NULL entry at offset 0, the CS descriptor, and the
; descriptor for the DS, SS, ES, and FS selectors. GS will point to the
; the high memory address we want to access. Further, we don't need
; a TSS since we don't do ring transitions and we don't need to use
; the task segment.
;
; The segments are defined manually because we need to use the .386P
; directive for some of the operating-system type instructions. This
; directive creates "USE32" segments, and not the the 16-bit segments
; that are needed. Even though the original development was done
; using the Borland Assembler, there are no Borland-specific usages
; in this file. I have successfully assembled this file using ML 6.11c.
;
; I have placed the data in Borland's FAR data segment just to reinforce
; the notion that this module is in it's own world. I use
; "lea" to determine offsets within the data segment, would should compute
; the correct address whether this data segment is in a segment by itself
; or part of a group. There is no reason why this module needs to be linked
; in with large model modules. It can just as easily be linked in with medium
; model, which is the minimum (though you may need to change the name and
; other chacteristics.
;
;
; Not all code needs to use 32-bit instructions and addressing. This causes
; a small performance penalty because of the read of the "66h" or "67h"
; prefixes. Also I didn't make any deliberate attempt to reorder instructions
; to suit the Pentium. This is another area of potential optimization if
; to try and keep execution time to a minimum.
;
; Borland Turbo Assembler v1.41: tasm /mx devreg.asm
; [-kdb-] 11:47:52.62 08-20-1996
;*******************************************************************************
.386p
;*******************************************************************************
; M A C R O S
;*******************************************************************************
;*******************************************************************************
; C O N S T A N T S
;*******************************************************************************
;[---selectors & misc. constants---]
SIZEOF_GDT EQU (OFFSET GDT_END - OFFSET GDT)
OTHER_SELECTOR EQU (OFFSET OTHER_DESC - OFFSET NULL_DESC)
GS_SELECTOR EQU (OFFSET GS_DESC - OFFSET NULL_DESC)
CS_SELECTOR EQU (OFFSET CS_DESC - OFFSET NULL_DESC)
;[---descriptor bits:assume relative addressing w/in "access" byte in DESC---]
DESC_PRESENT EQU 80h
DESC_DPL_RING0 EQU 0
DESC_DPL_RING1 EQU 020h
DESC_DPL_RING2 EQU 040h
DESC_DPL_RING3 EQU 060h
DESC_DATA_RW EQU 002h
DESC_CODE_READ EQU 00Ah
DESC_APPTYPE EQU 010h
;[---descriptor bits:assume relative addressing w/in "gran" byte in DESC---]
DESC_PAGEGRAN EQU 080h
DESC_32BITCODE EQU 040h ; if set, then 32-bit operands and 32-bit effective
; addressing is assumed
DESC_BIGDATA EQU 040h ; if set, PUSHes, POPes, and CALLs use ESP instead
; of SP
DATA_ACCESS_BITS EQU \
(DESC_PRESENT OR DESC_DPL_RING0 OR DESC_DATA_RW OR DESC_APPTYPE)
CODE_ACCESS_BITS EQU \
(DESC_PRESENT OR DESC_DPL_RING0 OR DESC_CODE_READ OR DESC_APPTYPE)
;*******************************************************************************
; T Y P E D E F S
;*******************************************************************************
;
; 386 Descriptor Template
;
DESC STRUC
wLim_0_15 DW 0 ; limit bits (0..15)
wBas_0_15 DW 0 ; base bits (0..15)
bBas_16_23 DB 0 ; base bits (16..23)
bAccess DB 0 ; access byte
bGran DB 0 ; granularity byte
bBas_24_31 DB 0 ; base bits (24..31)
DESC ENDS
;
; base for IDT & GDT
;
LIMBAS STRUC
wLimit dw 0
dwBase dd 0
LIMBAS ENDS
;*******************************************************************************
; D A T A
;*******************************************************************************
_FARDATA SEGMENT DWORD PUBLIC 'FAR_DATA' USE16
assume ds:_FARDATA
stak DD 128 DUP(12345678h)
GDT LABEL DESC
NULL_DESC DESC <>
CS_DESC DESC <>
OTHER_DESC DESC <>
GS_DESC DESC <>
GDT_END LABEL BYTE
NULL_LIMBAS LIMBAS<>
lbRealModeIDT LIMBAS<>
lbGDT LIMBAS<>
lbTemp LIMBAS<>
_FARDATA ENDS
;*******************************************************************************
; C O D E
;*******************************************************************************
DEVREG_TEXT SEGMENT DWORD PUBLIC 'CODE' USE16
assume cs:DEVREG_TEXT
;-------------------------------------------------------------------------------
; _WriteDeviceRegister: Switches temporarily to PM to write a
; 32-bit value to any physical address.
;
; void __cdecl WriteDeviceRegister(DWORD dwPhysicalBase Address,
; WORD wOffset,DWORD dwValue);
;-------------------------------------------------------------------------------
$CALL$STAK STRUC
dd ? ; original EBP
dw ? ; 16-bit offset of RET
dw ? ; 16-bit CS of RET
?dwPhysicalBase dd ?
?wOffset dw ?
?dwValue dd ?
$CALL$STAK ENDS
PUBLIC _WriteDeviceRegister
_WriteDeviceRegister PROC FAR
push ebp
mov ebp,esp ; establish stack frame addressability
push ebx ; save the standard registers
push esi
push edi
push ds
mov ax,_FARDATA ; establish addressability of our data
mov ds,ax
;
; do any real mode housekeeping
;
db 66h ; use 32-bit version of load
sidt lbRealModeIDT ; save the real mode IDT
;----------------------------------------------------------------
; construct the LIMIT16:BASE32 to load the GDT
;----------------------------------------------------------------
lea ebx,lbGDT ; EBX <-- ptr to LIM:BAS structure for GDT
mov [ebx.wLimit],SIZEOF_GDT-1 ; get the size of the GDT less 1
xor eax,eax ; EAX <-- ensure the high bits are clear
mov ax,ds ; AX <-- DS
shl eax,4 ; EAX <-- convert segment to a physical address
lea edx,GDT ; EDX <-- get the offset of our GDT
add eax,edx ; EAX <-- add offset to physical segment address
mov [ebx.dwBase],eax ; set the physical addr of the GDT
;----------------------------------------------------------------
; set up the GDT
;----------------------------------------------------------------
; (1) set up the descriptor for the GS selector
mov eax,[ebp.?dwPhysicalBase] ; EAX <-- physical base
lea ebx,GS_DESC ; EBX <-- ptr to the GS descriptor
mov [ebx.wBas_0_15],ax ; set low 16-bits of physical address
shr eax,16 ; move the high word to the low word
mov [ebx.bBas_16_23],al ; set bits 23..16
mov [ebx.bBas_24_31],ah ; set bits 31..24
mov [ebx.wLim_0_15],4096-1 ; limit is # of bytes minus 1
mov [ebx.bAccess],DATA_ACCESS_BITS ;set these access bits:
; present, DPL=0, read/write access
; application type selector
; (2) set up the descriptor for the other selectors
xor eax,eax ; EAX <-- ensure the high bits are clear
mov ax,ds ; AX <-- DS
shl eax,4 ; EAX <-- convert segment to a physical address
lea ebx,OTHER_DESC ; EBX <-- offset of the "Other" descriptor
mov [ebx.wBas_0_15],ax ; set the low 16 bits of the base
shr eax,16 ; move the high word to the low word
mov [ebx.bBas_16_23],al ; set bits 23..16
mov [ebx.bBas_24_31],0 ; set bits 24..31
mov [ebx.wLim_0_15],0ffffh ; set the limit to 64K
; We set the descriptor to this limit
; because it makes things easier on
; the way back to real mode.
mov [ebx.bAccess],DATA_ACCESS_BITS ; set the std data access bits
; (3) set up the CS descriptor we'll use in PM
xor eax,eax ; EAX <-- ensure the high bits are clear
mov ax,cs ; AX <-- CS
shl eax,4 ; EAX <-- convert segment to a physical address
lea edx,$PMCodeExecute ; EDX <-- offset of code where we're
; running in full PM
add eax,edx ; EAX <-- add in offset contribution
lea ebx,CS_DESC ; EBX <-- offset to CS selector we'll
; use in protected mode
mov [ebx.wBas_0_15],ax ; set the low 16-bits of the base
shr eax,16 ; EAX <-- move hi to lo word
mov [ebx.bBas_16_23],al ; set bits 23..16
mov [ebx.bBas_24_31],0 ; set bits 24..31
mov [ebx.wLim_0_15],0ffffh ; set the limit to 64K
; We set the descriptor to this limit
; because it makes things easier on
; the way back to real mode.
mov [ebx.bAccess],CODE_ACCESS_BITS ;set these access bits: present,
; DPL=0, execute/read access
; application type selector
;
; get the other parameters of interest
;
mov esi,[ebp.?dwValue] ; ESI <-- value to write to hi memory
movzx edi,[ebp.?wOffset] ; EDI <-- 16-bit offset w/in hi memory
;
; save the flags, we don't want to reenable interrupts if
; the caller has them disabled
;
pushf
;
; save all segment registers, except SS, on the real mode (RM) stack
;
push ds
push es
push fs
push gs
mov bx,ss ; BX <-- save RM stack segment
mov ebp,esp ; EBP <-- save RM stack ptr
cli ; mask off all maskable interrupts
; a NMI or SMI could still come in
;
; We don't handle any faults or exceptions. A GP fault,
; among others, would bring the machine to a screeching halt.
;
db 66h ; use 32-bit version of load
lidt NULL_LIMBAS ; force shutdown in case of a fault or exception
db 66h ; use 32-bit version of load
lgdt lbGDT ; tell the CPU about the GDT's LIMIT:BASE
;
; switch to Protected Mode
;
mov eax,cr0
or eax,1 ; set the Protection Enable (PE) bit
and eax,NOT 80000000h ; turn off the PaGing bit
mov cr0,eax ; here we go
;
; flush the prefetch queue
;
jmp SHORT $+2
;
; Switch all segment registers except for GS to
; the selector pointing to the "Other" descriptor.
; Note that once this is done, all data offsets are
; zero based.
;
mov dx,OTHER_SELECTOR ; DX <-- "Other" data selector
mov ds,dx
mov es,dx
mov fs,dx
;
; switch to the PM stack
;
lea ecx,stak + ((SIZE stak) - 4)
mov ss,dx
mov esp,ecx
mov dx,GS_SELECTOR
mov gs,dx
push ebx ; save the original stack frame pointers
push ebp
mov bx,cs ; BX <-- save the RM code segment
push WORD PTR CS_SELECTOR; push CS PM selector onto stack
push WORD PTR 0 ; initial offset in PM
jmp DWORD PTR [esp] ; set CS to a PM selector
$PMCodeExecute:
add esp,4 ; clear off the entry address
xor ax,ax ; we don't have a LDT
lldt ax ; (this instruction would GP fault in RM)
; this is also a Pentium serializing instruction
;
; This is all we want to do!
;
mov gs:[edi],esi
;
; Start the trip back. Prior to returning to real
; mode, we need to ensure that the segment registers
; have selectors that point to descriptors with 64K
; limits. That is why during PM setup we set the
; limits to 64K instead of the size of the code and
; and data in this module. We set the GS selector to
; its actual size, but since we're done with it we
; can now set it to one of the other segment register
; values.
;
push ds ; DS <-- has a 64K limit
pop gs ; set GS to point to 64K limit also
;
; clear the PE flag
;
mov eax,cr0
and eax,NOT 1 ; turn off the PE bit
mov cr0,eax
;
; return to real mode
;
push bx ; this holds the original real mode CS
push OFFSET $RealModeReturn ; this is based relative to
; to the start of the RM code segment
jmp DWORD PTR [esp] ; flush the cache
; (only needed for pre-Pentium processors)
$RealModeReturn:
add esp,4 ; clear off the return address
pop eax
pop ebx
mov ss,bx ; restore the RM
mov esp,eax ; stack
pop gs ; restore all other stack registers
pop fs
pop es
pop ds
lidt lbRealModeIDT ; restore the RM IDT
popf ; this will reenable interrupts
; if the caller had them on
;
; exit
;
xor dx,dx ; set to 32-bit 0
xor ax,ax ; just for grins
pop ds
pop edi ; restore the usual
pop esi ; suspects
pop ebx
pop ebp
ret
_WriteDeviceRegister ENDP
;-------------------------------------------------------------------------------
; _ReadDeviceRegister: Switches temporarily to PM to read a
; 32-bit value from an offset somewhere in
; high memory.
;
; DWORD __cdecl ReadDeviceRegister(DWORD dwPhysicalBase Address,
; WORD wOffset);
;-------------------------------------------------------------------------------
$CALL?STAK STRUC
dd ? ; original EBP
dw ? ; 16-bit offset of RET
dw ? ; 16-bit CS of RET
@dwPhysicalBase dd ?
@wOffset dw ?
$CALL?STAK ENDS
PUBLIC _ReadDeviceRegister
_ReadDeviceRegister PROC FAR
push ebp
mov ebp,esp ; establish stack frame addressability
push ebx ; save the standard registers
push esi
push edi
push ds
mov ax,_FARDATA ; establish addressability of our data
mov ds,ax
;
; do any real mode housekeeping
;
db 66h ; use 32-bit version of load
sidt lbRealModeIDT ; save the real mode IDT
;----------------------------------------------------------------
; construct the LIMIT16:BASE32 to load the GDT
;----------------------------------------------------------------
lea ebx,lbGDT ; EBX <-- ptr to LIM:BAS structure for GDT
mov [ebx.wLimit],SIZEOF_GDT-1 ; get the size of the GDT less 1
xor eax,eax ; EAX <-- ensure the high bits are clear
mov ax,ds ; AX <-- DS
shl eax,4 ; EAX <-- convert segment to a physical address
lea edx,GDT ; EDX <-- get the offset of our GDT
add eax,edx ; EAX <-- add offset to physical segment address
mov [ebx.dwBase],eax ; set the physical addr of the GDT
;----------------------------------------------------------------
; set up the GDT
;----------------------------------------------------------------
; (1) set up the descriptor for the GS selector
mov eax,[ebp.@dwPhysicalBase] ; EAX <-- physical base
lea ebx,GS_DESC ; EBX <-- ptr to the GS descriptor
mov [ebx.wBas_0_15],ax ; set low 16-bits of physical address
shr eax,16 ; move the high word to the low word
mov [ebx.bBas_16_23],al ; set bits 23..16
mov [ebx.bBas_24_31],ah ; set bits 31..24
mov [ebx.wLim_0_15],4096-1 ; limit is # of bytes minus 1
mov [ebx.bAccess],DATA_ACCESS_BITS ;set these access bits:
; present, DPL=0, read/write access
; application type selector
; (2) set up the descriptor for the other selectors
xor eax,eax ; EAX <-- ensure the high bits are clear
mov ax,ds ; AX <-- DS
shl eax,4 ; EAX <-- convert segment to a physical address
lea ebx,OTHER_DESC ; EBX <-- offset of the "Other" descriptor
mov [ebx.wBas_0_15],ax ; set the low 16 bits of the base
shr eax,16 ; move the high word to the low word
mov [ebx.bBas_16_23],al ; set bits 23..16
mov [ebx.bBas_24_31],0 ; set bits 24..31
mov [ebx.wLim_0_15],0ffffh ; set the limit to 64K
; We set the descriptor to this limit
; because it makes things easier on
; the way back to real mode.
mov [ebx.bAccess],DATA_ACCESS_BITS ; set the std data access bits
; (3) set up the CS descriptor we'll use in PM
xor eax,eax ; EAX <-- ensure the high bits are clear
mov ax,cs ; AX <-- CS
shl eax,4 ; EAX <-- convert segment to a physical address
lea edx,?PMCodeExecute ; EDX <-- offset of code where we're
; running in full PM
add eax,edx ; EAX <-- add in offset contribution
lea ebx,CS_DESC ; EBX <-- offset to CS selector we'll
; use in protected mode
mov [ebx.wBas_0_15],ax ; set the low 16-bits of the base
shr eax,16 ; EAX <-- move hi to lo word
mov [ebx.bBas_16_23],al ; set bits 23..16
mov [ebx.bBas_24_31],0 ; set bits 24..31
mov [ebx.wLim_0_15],0ffffh ; set the limit to 64K
; We set the descriptor to this limit
; because it makes things easier on
; the way back to real mode.
mov [ebx.bAccess],CODE_ACCESS_BITS ;set these access bits: present,
; DPL=0, execute/read access
; application type selector
;
; get the other parameters of interest
;
movzx edi,[ebp.@wOffset] ; EDI <-- 16-bit offset w/in hi memory
;
; save the flags, we don't want to reenable interrupts if
; the caller has them disabled
;
pushf
;
; save all segment registers, except SS, on the real mode (RM) stack
;
push ds
push es
push fs
push gs
mov bx,ss ; BX <-- save RM stack segment
mov ebp,esp ; EBP <-- save RM stack ptr
cli ; mask off all maskable interrupts
; a NMI or SMI could still come in
;
; We don't handle any faults or exceptions. A GP fault,
; among others, would bring the machine to a screeching halt.
;
db 66h ; use 32-bit version of load
lidt NULL_LIMBAS ; force shutdown in case of a fault or exception
db 66h ; use 32-bit version of load
lgdt lbGDT ; tell the CPU about the GDT's LIMIT:BASE
;
; switch to Protected Mode
;
mov eax,cr0
or eax,1 ; set the Protection Enable (PE) bit
and eax,NOT 80000000h ; turn off the PaGing bit
mov cr0,eax ; here we go
;
; flush the prefetch queue
;
jmp SHORT $+2
;
; Switch all segment registers except for GS to
; the selector pointing to the "Other" descriptor.
; Note that once this is done, all data offsets are
; zero based.
;
mov dx,OTHER_SELECTOR ; DX <-- "Other" data selector
mov ds,dx
mov es,dx
mov fs,dx
;
; switch to the PM stack
;
lea ecx,stak + ((SIZE stak) - 4)
mov ss,dx
mov esp,ecx
mov dx,GS_SELECTOR
mov gs,dx
push ebx ; save the original stack frame pointers
push ebp
mov bx,cs ; BX <-- save the RM code segment
push WORD PTR CS_SELECTOR; push CS PM selector onto stack
push WORD PTR 0 ; initial offset in PM
jmp DWORD PTR [esp] ; set CS to a PM selector
?PMCodeExecute:
add esp,4 ; clear off the entry address
xor ax,ax ; we don't have a LDT
lldt ax ; (this instruction would GP fault in RM)
; this is also a Pentium serializing instruction
;
; This is all we want to do!
;
mov ebp,gs:[edi]
;
; Start the trip back. Prior to returning to real
; mode, we need to ensure that the segment registers
; have selectors that point to descriptors with 64K
; limits. That is why during PM setup we set the
; limits to 64K instead of the size of the code and
; and data in this module. We set the GS selector to
; its actual size, but since we're done with it we
; can now set it to one of the other segment register
; values.
;
push ds ; DS <-- has a 64K limit
pop gs ; set GS to point to 64K limit also
;
; clear the PE flag
;
mov eax,cr0
and eax,NOT 1 ; turn off the PE bit
mov cr0,eax
;
; return to real mode
;
push bx ; BX <-- holds the original real mode CS
push OFFSET ?RealModeReturn ; this offset is based relative to
; to the start of the RM code segment
jmp DWORD PTR [esp] ; flush the cache
; (only needed for pre-Pentium processors)
?RealModeReturn:
add esp,4 ; clear off the return address
pop eax
pop ebx
mov ss,bx ; restore the RM
mov esp,eax ; stack
pop gs ; restore all other stack registers
pop fs
pop es
pop ds
lidt lbRealModeIDT ; restore the RM IDT
popf ; this will reenable interrupts
; if the caller had them on
;
; exit
;
mov eax,ebp
mov edx,eax
shr edx,16
pop ds
pop edi ; restore the usual
pop esi ; suspects
pop ebx
pop ebp
ret
_ReadDeviceRegister ENDP
;-------------------------------------------------------------------------------
; _A20On: Turns on the A20 address line.
;
; void __cdecl A20On(void);
;
;-------------------------------------------------------------------------------
PUBLIC _A20On
_A20On PROC FAR
call _?DrainPort ; wait for the input buffer to be empty
mov al,0d1h ; AL <-- write output buffer opcode
out 64h,al ; write opcode to control register
call _?DrainPort ; wait for the input buffer to be empty
mov al,0dfh ; AL <-- bits to write to the output buffer
out 60h,al ; tell the control register to write the bits
call _?DrainPort ; wait for the input buffer to be empty
ret
_A20On ENDP
;-------------------------------------------------------------------------------
; _?DrainPort: Drains the keyboard input buffer port
;
; This is an internal ftn.
;
;-------------------------------------------------------------------------------
_?DrainPort PROC NEAR
xor cx,cx
$1$?dr:
jmp SHORT $+2
in al,64h ; read the status register
and al,2 ; clear off all but the input buffer sts flag
loopne $1$?dr ; loop here until the input buffer is empty
ret
_?DrainPort ENDP
DEVREG_TEXT ENDS
END