summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2019-01-10 14:46:59 -0800
committerH. Peter Anvin <hpa@zytor.com>2019-01-10 14:46:59 -0800
commit200ab0a31dfa8382ebb9a4562a164662ce12fa56 (patch)
treeb831719f6a6b32eed215be6896aab385feb29c11
downloadabi-200ab0a31dfa8382ebb9a4562a164662ce12fa56.tar.gz
abi-200ab0a31dfa8382ebb9a4562a164662ce12fa56.tar.xz
abi-200ab0a31dfa8382ebb9a4562a164662ce12fa56.zip
Initial version
-rw-r--r--segelf.txt299
1 files changed, 299 insertions, 0 deletions
diff --git a/segelf.txt b/segelf.txt
new file mode 100644
index 0000000..fa4defe
--- /dev/null
+++ b/segelf.txt
@@ -0,0 +1,299 @@
+ABI for 16-bit real mode segmented code in ELF
+----------------------------------------------
+
+H. Peter Anvin
+Version: 2018-12-22
+
+16-bit segmented code in ELF is implemented with a combination of
+three new relocations and a set of software conventions. This document
+describes both.
+
+The extensions are implemented in such a way that mixed-mode
+programming is possible, as well, with the binary format explicitly
+exposing segment-relative and absolute relocations.
+
+
+Requirements
+------------
+
+16-bit code relies on a combination of segment types:
+
+1. NEAR segments are addressed from a common segment base, and the
+ segment registers are generally kept at a fixed value. All NEAR
+ segments combined may not exceed 64K.
+
+2. FAR segments are addressed from a segment base specific to that
+ segment. Any one FAR segment may not exceed 64K.
+
+3. HUGE segments are addressed from a segment base specific to each
+ data item in the segment. HUGE segments have no size limit other
+ than the global address space limit of 1088K-16 bytes.
+
+4. A PUBLIC segment can be combined with other segments of the same
+ name using the same segment base.
+
+5. A PRIVATE segment has a separate segment base for each translation
+ unit.
+
+6. Multiple PUBLIC segments can be grouped together with a common
+ segment base. This is mainly used for NEAR segments, in particular
+ the standard _DATA, _BSS and _STACK segments (and, in the "tiny"
+ memory model, the _TEXT segment) are usually combined in a group
+ called "DGROUP".
+
+Mixed-mode programming furthermore requires a way to reference any
+data item by flat linear address.
+
+
+New ELF relocations
+-------------------
+
+The following new relocations are added to the ELF i386 psABI:
+
+R_386_SEG16 45 word16 A + (S >> 4)
+R_386_SUB16 46 word16 A - S
+R_386_SUB32 47 word32 A - S
+
+In accordance with the ELF gABI specification, multiple relocations at
+the same address are cumulative. This is essential for the SUB
+relocations to work.
+
+These are the only extensions to the ELF format proper.
+
+
+Software conventions
+--------------------
+
+1. Sections
+-----------
+
+A PRIVATE or HUGE segment is represented by a section without any
+special attributes. A PRIVATE or HUGE segment section must have an
+alignment of 16 or higher.
+
+A PUBLIC segment is represented as a pair of sections:
+
+ section~b
+ section~p
+
+"section~b" will contain symbols but no data (see below). "section~p"
+carries the actual contents of the section. The "~b" section must
+have an alignment of 16 or higher, but the "~p" sections MAY have any
+alignment.
+
+Segment groups are simply represented by a "~b" section without a
+corresponding "~p" section.
+
+These sections are named such that sorting the sections by name will
+put all the ~b sections immediately before all the ~p sections for
+the same segment.
+
+When using subsection variants intended to be merged into the same
+segment, e.g. for merged strings, any appended string should come
+*after* ~[bp] to guarantee the sorting will be done correctly; for
+example:
+
+ _DATA~b.strings
+ _DATA~p.strings
+
+In the interest of robustness compilers/assemblers should:
+
+a. Emit group ~b sections immediately before the first grouped ~b section;
+b. Emit ~b sections before the corresponding ~p sections, preferably
+ immediately before.
+
+
+2. Symbols
+----------
+
+Symbols contain, as is normal in ELF, linear addresses, including the
+value of the segment base. Thus, a symbol located at 0x1234:0x5678
+will have a value of (0x1234 << 4) + 0x5678 = 0x179b8. This also means
+that flat 32-bit code can make direct use of this symbol in normal
+fashion.
+
+Each symbol is matched with an auxiliary symbol containing the
+preferred segment base as a linear address. The name of the auxiliary
+symbol associated with the symbol "foo" is "foo~b". Accordingly, for
+the example above, with foo at 0x1234:0x5678, we would have:
+
+ foo = 0x179b8
+ foo~b = 0x12340
+
+For a PRIVATE segment, these auxiliary symbols are simply placed at
+the beginning of the section by the compiler/assembler.
+
+For a PUBLIC segment, they are placed in the ~b section corresponding
+to the segment (however, the primary symbol is placed in the ~p
+section.)
+
+For a grouped segment, they are put in the ~b section for the group.
+
+For a HUGE segment, the compiler/assembler should generate the ~b
+symbols so that:
+
+ symbol~b = symbol & ~0xf
+
+Undefined (external) references to these auxiliary symbols should be
+marked WEAK. If the auxiliary symbol would contain the absolute value
+0, it does not need to be emitted. This, again, simplifies mixed-mode
+programming.
+
+
+
+3. Use of relocations
+---------------------
+
+To access a symbol by its preferred segment base:
+
+ mov ax,SEG symbol
+ mov es,ax
+ mov ax,[es:symbol]
+
+ SEG symbol generates:
+
+ R_386_SEG16 symbol~b
+
+ [symbol] generates:
+
+ R_386_16 symbol
+ R_386_SUB16 symbol~b
+
+
+To access a symbol relative to a different segment base:
+
+ mov ax,[symbol wrt DGROUP]
+
+ R_386_16 symbol
+ R_386_SUB16 DGROUP~b
+
+To access a symbol relative to the segment base of a different symbol:
+
+ mov ax,[symbol wrt seg othersymbol]
+
+ R_386_16 symbol
+ R_386_SUB16 othersymbol~b
+
+To access the absolute linear address of a symbol:
+
+ mov eax,symbol wrt 0
+
+ R_386_32 symbol
+
+
+To access the address of a symbol versus a fixed segment base:
+
+ mov ax,[video_rows wrt 40h]
+
+ R_386_16 video_rows-0x400
+
+
+
+4. Sample linker script
+-----------------------
+
+This linker script is applicable to the conventional DOS memory models
+except the tiny model.
+
+SECTIONS
+{
+ . = 0;
+
+ _TEXT_far : {
+ *(SORT_BY_NAME(SORT_BY_ALIGNMENT(?*_TEXT ?*_TEXT[~.]*)))
+ }
+ _DATA_far : {
+ *(SORT_BY_NAME(SORT_BY_ALIGNMENT(?*_DATA* ?*_DATA[~.]*)))
+ }
+
+ _TEXT ALIGN(16) : {
+ *(_START~b* _TEXT~b*)
+ *(SORT_NONE(_START~p*))
+ *(SORT_BY_ALIGNMENT(_TEXT~p*))
+ }
+
+ /*
+ * Note: to unconditionally force references to _DATA,
+ * _BSS and _STACK to be relative to DGROUP, one can merge
+ * the corresponding ~b sections into DGROUP instead of the
+ * normal output sections.
+ */
+ DGROUP ALIGN(16) : {
+ *(DGROUP~b)
+ PROVIDE(___bss_start~b = .);
+ PROVIDE(___bss_end~b = .);
+ PROVIDE(___stack_base~b = .);
+ PROVIDE(___stack_top~b = .);
+ }
+
+ _DATA : {
+ *(_DATA~b*)
+ *(SORT_BY_ALIGNMENT(_DATA~p*))
+ }
+
+ PROVIDE(___filesize = .);
+
+ _BSS : {
+ PROVIDE(___bss_start = .);
+ *(_BSS~b*)
+ *(SORT_BY_ALIGNMENT(_BSS~p*))
+ *(COMMON)
+ PROVIDE(___bss_end = .);
+ }
+
+ . = ALIGN(16);
+ /* Default near stack/heap segment size, can be overridden */
+ PROVIDE(___stack_size = 65536 + ADDR(DGROUP) - .);
+ _STACK (NOLOAD) : {
+ PROVIDE(___stack_base = .);
+ . = . + ___stack_size;
+ PROVIDE(___stack_top = .);
+ }
+
+ _BSS_far ALIGN(16) : {
+ PROVIDE(___farbss_start = .);
+ *(SORT_BY_NAME(SORT_BY_ALIGNMENT(?*_BSS ?*_BSS[~.]*)))
+ . = ALIGN(16);
+ PROVIDE(___farbss_end = .);
+ }
+
+ PROVIDE(___end = .);
+}
+ENTRY(__start)
+
+
+This linker script is applicable to the tiny DOS memory model.
+
+SECTIONS
+{
+ . = 0;
+
+ DGROUP (NOLOAD) : {
+ *(*~b*)
+ PROVIDE(___bss_start~b = .);
+ PROVIDE(___bss_end~b = .);
+ PROVIDE(___end~b = .);
+ }
+
+ . = 0x100;
+
+ _TEXT : {
+ *(SORT_NONE(_START~p*))
+ *(SORT_BY_ALIGNMENT(_TEXT~p*))
+ }
+ _DATA : {
+ *(SORT_BY_ALIGNMENT(_DATA~p*))
+ }
+
+ PROVIDE(___filesize = .);
+
+ _BSS : {
+ PROVIDE(___bss_start = .);
+ *(SORT_BY_ALIGNMENT(_BSS~p*))
+ *(COMMON)
+ PROVIDE(___bss_end = .);
+ }
+
+ PROVIDE(___end = .);
+}
+ENTRY(__start)