README Oracc Home SEARCH DOCUMENTATION

Creative Commons License

ORACC Home


Introduction


xtf.rnc

Preamble

#-lines

Protocols

Comments

Notes

&-lines

Document Types

@-lines

Objects

Surfaces

Columns

Status

Headings

Milestones

Implied tags

$-lines

Seal

State

Rulings

Examples

Images

Text Lines

Advanced

Legacy

Line Numbers

Cells & Fields

Streams

Alignment

Zones

Composites

@composite

Structure

Locator

Variants


Resources


Links

Top

Tutorial

GDL Manual

Advanced

Composites

Lexical

Linkage

Protocols

XTF: XML Transliteration Format Version

(http://oracc.org/ns/xtf/1.0)

Steve Tinney
Version of 2014-07-27

Introduction

XTF is an XML format for describing the transliteration of cuneiform texts; it also encompasses facilities for other kinds of editions commonly used in cuneiform studies.

xtf.rnc

default namespace = "http://oracc.org/ns/xtf/1.0"
namespace note = "http://oracc.org/ns/note/1.0"
include "../../gdl/1.0/gdl.rnc"
include "../../xtr/1.0/xtr.rnc"
start = xtf | doctype
xtf = element xtf { proto.outer? , doctype }
doctype = score | translation | transliteration | composite | atf
atf = element atf { attribute xml:id   { xsd:ID } , text }

proto.outer = element protocols {
  attribute scope { text },
  proto.basket
}

proto.start = element protocols {
  attribute scope { text },
  (  proto.atf | proto.bib | proto.etcsl | proto.key | proto.lemmatizer 
     | proto.link | proto.project | proto.syntax | proto.version )*
}

proto.after = proto.note

proto.inter = proto.bib | proto.etcsl | proto.lem | proto.link 
            | proto.note | proto.psu | proto.var

proto.atf    = element protocol { attribute type { "atf" }    , text }
proto.basket = element protocol { attribute type { "basket" } , text }
proto.bib    = element protocol { attribute type { "bib" }    , text }
proto.etcsl  = element protocol { attribute type { "etcsl" }  , text }
proto.key    = element protocol { attribute type { "key" }    , text }
proto.lem    = element protocol { attribute type { "lem" }    , text }
proto.psu    = element protocol { attribute type { "psu" }    , text }
proto.lemmatizer
             = element protocol { attribute type { "lemmatizer" }, text }
proto.link   = element protocol { attribute type { "link" }   , text }
proto.note   = element protocol { attribute type { "note" }   , text }
proto.project= element protocol { attribute type { "project" }, text }
proto.syntax = element protocol { attribute type { "syntax" } , text }
proto.var    = element protocol { attribute type { "var" }    , text }
proto.version= element protocol { attribute type { "version" }, text }

comments = cmt | notetext
cmt = element cmt { text }
notelabel = attribute note:label { text }
notemark  = attribute note:mark { text }
noteauto  = attribute note:auto { text }
noteref   = attribute note:ref { xsd:IDREF }
noteblock = (notelink | notetext)
notelink  = element note:link {
		notelabel?,
		noteref,
		text
	    }
notetext  = element note:text {
		attribute xml:id  { xsd:ID } ,
		notemark ,
		noteauto? ,
		notelabel? ,
		htext 
	    }

transliteration =
  element transliteration {
    attribute xml:id   { xsd:ID },
    attribute n        { text },
    attribute hand     { text }?,
    attribute xml:lang { xsd:NMTOKEN },
    project?,
    implicit?,
    haslinks?,
    maxcells?,
    (proto.start? , (noteblock | object | nonobject | comments | sealing)*)
  }
n.attr = attribute n { text }
n.attr.lc = attribute n { xsd:string { pattern="[a-z]" }}
haslinks = attribute haslinks { xsd:boolean }
maxcells = attribute cols { xsd:nonNegativeInteger }
project = attribute project { xsd:string { pattern="[a-z][a-z0-9/]+" } }

composite =
  element composite {
    composite-attlist,
    sigdef*,
    attribute hand     { text }?,
    project?,
    implicit?,
    haslinks?,
    maxcells?,
    proto.start?,
    composite-content,
    (referto, comments?)*
  }

composite-attlist &=
  attribute xml:id { xsd:ID },
  attribute n { text },
  attribute xml:lang { xsd:NMTOKEN }?

object = 
  element object {
    (implicit 
     | (attribute xml:id   { xsd:ID },
        attribute label    { text })),
    ( attribute type { known.object }
     |(attribute type { user.object } , n.attr) 
    ) ,
    status.flags,
    (m.fragment | surface | sealing | comments | nonx | noteblock)*
  }
known.object =    xsd:string { pattern="tablet|envelope|prism|bulla" }
user.object =     xsd:string { pattern="object" }
nonobject = nonx

surface =
  element surface { 
    (implicit 
     | (attribute xml:id   { xsd:ID },
        attribute label    { text })),
    (proto.inter | column | nonx | m | comments)* ,
    (  attribute type { known.surface }
     |(attribute type { face.surface } , n.attr.lc)
     |(attribute type { edge.surface } , n.attr.lc?)
     |(attribute type { user.surface | docket.surface | seal.surface } , n.attr)
     ),
    primes?,
    status.flags,
    noteblock*
  }

known.surface =
  xsd:string {
    pattern="surface|obverse|reverse|left|right|top|bottom"
  }
face.surface = xsd:string { pattern="face" }
edge.surface = xsd:string { pattern="edge" }
user.surface = xsd:string { pattern="surface" }
docket.surface = xsd:string { pattern="docket" }
seal.surface = xsd:string { pattern="seal" }

sealing =
  element sealing {
    attribute xml:id { xsd:ID },
    attribute label    { text },
    attribute n { xsd:NMTOKEN },
    attribute scid { xsd:NMTOKEN }?,
    (column | nonx | milestone | comments | noteblock)*
  }

column = 
  element column { 
    (implicit
     | (attribute xml:id   { xsd:ID },
        attribute label    { text })),
    (milestone | hdr | lg | l | nonl | nonx | comments | proto.inter)*,
    attribute n { text },
    attribute o { text }?,
    primes?,
    status.flags
  }

primes = 
  attribute primes { xsd:string { pattern="\x{2032}+" } }

hdr = element h {
  attribute level { xsd:nonNegativeInteger },
  attribute xml:id   { xsd:ID },
  htext
}

milestone = m | m.discourse

m = element m { 
  attribute type { "division" | "locator" },
  attribute subtype { text }?,
  attribute n { text }?,
  text
}

m.discourse = element m {
  attribute type { "discourse" },
  attribute subtype { "body" | "catchline" | "colophon" | "date" | "linecount" 
                      | "witnesses" | "signature" | "summary" 
		      | "sealings" | "sealing" },
  attribute endflag { "1" }?,
  text
}

m.fragment = element m { 
  attribute type { "locator" },
  attribute subtype { "fragment" }?,
  text
}

implicit = attribute implicit { "1" }

nonx = element nonx { nonx-attlist, text }
nonl = element nonl { nonl-attlist, text }
nong = element nong { nong-attlist, text }

nonx-attlist =
  attribute xml:id { xsd:ID },
  (attribute label { text },
   attribute silent { "1" })?,
  (
   (attribute strict { "1" },
    ((attribute ref     { text },
      attribute scope   { text })
     |(attribute extent { text },
       attribute scope  { text },
       attribute state  { text },
       attribute flags  { xsd:string {
                              pattern="[!?*]+"
			  }}?)))
  |
   (attribute strict { "0" },
    attribute extent { text }?,
    attribute ref    { text }?,
    attribute scope  { text }?,
    attribute state  { text }?)
  |
   (attribute strict { "0" },
    attribute ref    { "none" },
    attribute type   { "empty" })
  |
   (attribute type   { "image" },
    attribute strict { "0" },
    attribute ref    { xsd:string {
                          pattern="[PQX][0-9]+@[0-9]+[a-z]*" 
		       }},
    attribute alt    { text })
  )

non-x-attr-set =
  attribute type {
    "newline" | "broken" | "maybe-broken" | "traces"
    | "maybe-traces" | "blank" | "ruling" | "image"
    | "seal" | "docket" | "comment" | "bullet" | "other"
  },
  attribute unit { "self" | "quantity" | "ref" }?,
  attribute extent { text }?,
  attribute ref { text }?,
  attribute xml:id { xsd:ID }?
noncolumn-attlist &= non-x-attr-set
nonl-attlist &= non-x-attr-set
nong-attlist &= non-x-attr-set

l =
  element l {
    attribute xml:id { xsd:ID },
    attribute n { text },
    attribute o { text }?,
    attribute label { text }?,
    attribute spanall { "1" }?,
    (cell+ | f+ | (ag | l.inner)*) 
  }

l.inner = (surro | normword | words | glo)*

cell = element c { span? , (f+ | l.inner) }
span = attribute span { xsd:nonNegativeInteger }

f = element f { f-attlist, (ag | l.inner)* }
f-attlist &=
  attribute xml:id { xsd:ID }?,
  attribute n { text }?,
  attribute type { xsd:NMTOKEN },
  attribute xml:lang { xsd:NMTOKEN }?

lg = element lg { 
  attribute xml:id { xsd:ID }?,
  maxcells?,
  attribute n { text }?,
  (   (l,gus?,nts)
    | (l,gus?,nts?,(proto.link?,bil+)+)
    | (l,gus?,lgs) 
    | (l,gus?,nts,lgs) 
    | (l,gus?, (e | comments)*)),
  proto.inter*,
  var*
}
bil = element l { attribute type { "bil" } , l.inner* }
nts = element l { attribute type { "nts" } , (ag | l.inner)* }
lgs = element l { attribute type { "lgs" } , grapheme* }
gus = element l { attribute type { "gus" } , l.inner* }
var = element v { 
  attribute varnum { text | "*" } ,
  attribute label  { text } ,
  attribute ex_label { text }? ,
  (l.inner | cell+)
}

# alignment groups
ag = element ag { 
  attribute ref { xsd:string { pattern="[a-z]+" } },
  attribute form { text }?,
  l.inner*
}

surro = element surro { l.inner }
words |= surro?
word |= surro?

composite-content = 
  (noteblock? | milestone | \include | \div | variants | hdr | lg | l | comments | nonl | nonx | proto.inter)*

\include = element include { increfAttr }

referto = element referto { increfAttr }

increfAttr = 
  (attribute ref { text } , 
   attribute n { text } ,
   (attribute from { text },
    attribute to { text }?)?)

\div =
  element div {
    div-attlist, 
    composite-content
  }
div-attlist &=
  attribute xml:id { xsd:ID }?,
  attribute n { text }?,
  attribute type { xsd:NMTOKEN },
  attribute lang { text }?,
  attribute place { text }?,
  attribute subtype { text }?

variants = element variants { variant* }

variant = 
  element variant {
    (\div | noteblock | variants | lg | l | comments | nonl | proto.inter | nonx)*
  }

score =
  element score {
    score-attlist, 
    proto.start? , (sigdef | noteblock?)* ,
    (object | surface | column | milestone | \div | lg | comments | nonl)*
  }
score-attlist &=
  attribute xml:id { xsd:ID },
  attribute n { text },
  score_mode,
  score_type,
  score_word?,
  attribute xml:lang { xsd:NMTOKEN }?,
  attribute hand     { text }?,
  project?,
  implicit?,
  haslinks?,
  maxcells?

score_mode = attribute score-mode { "parsed" | "unparsed" }
score_type = attribute score-type { "matrix" | "synopsis" }
score_word = attribute score-word { "yes" }

synopticon =
  element synopticon { synopticon-attlist, sigdef*, (eg | comments | nonl)* }
synopticon-attlist &=
  attribute xml:id { xsd:ID },
  attribute n { text },
  attribute xml:lang { xsd:NMTOKEN }?
sigdef = element sigdef { sigdef-attlist, empty }
sigdef-attlist &=
  attribute xml:id { xsd:ID },
  attribute targ-id { xsd:NMTOKEN },
  attribute targ-n { text }
eg = element eg { eg-attlist, e* }
eg-attlist &= attribute xml:id { xsd:ID }?
e =
  element e {
    e-attlist,
    (l.inner
     | cell+
     | f+)
  }
e-attlist &=
  attribute xml:id { xsd:ID }?,
  attribute sigref { xsd:IDREF }?,
  attribute n { text }?,
  attribute l { text }?,
  attribute p { text }?,
  attribute hlid { text }?,
  attribute plid { text }?

Preamble

This document is a work in progress; the schema is correct and defines the XML output format produced by atf2xtf. Developer documentation is not yet included here, but the tutorial is essentially complete.

Most elements in an XTF file are in either the XTF or GDL namespaces, the latter being defined in the included GDL specification. The n namespace is used for normalized text as described below.

The macro structure of any XTF file produced by the ATF processor is always an outer container, the xtf element, followed by optional outer protocols and then zero or more transliterations and/or composite texts.

We allow transliteration and composite as start elements to simplify the ATF processor's internal validation of texts.

default namespace = "http://oracc.org/ns/xtf/1.0"
namespace note = "http://oracc.org/ns/note/1.0"
include "../../gdl/1.0/gdl.rnc"
include "../../xtr/1.0/xtr.rnc"
start = xtf | doctype
xtf = element xtf { proto.outer? , doctype }
doctype = score | translation | transliteration | composite | atf
atf = element atf { attribute xml:id   { xsd:ID } , text }

#-lines

The other quite common type of line in an ATF file begins with the hash sign (#). There are two kinds of #-line: protocols and comments.

Protocols

Protocols are statements which are interpreted or stored by the ATF processor but are not part of the text edition proper. Protocols are all named and may trigger special processing within the ATF processor.

With the exception of #note:, protocols must occur on a single line; multiple protocols do not need blank lines between them except for multiple #note: protocols which behave like comments.

Protocols are divided into four classes:

outer
protocols which may only occur at the very beginning of the document; only #basket: may occur in this location.
start
protocols which may occur at the start of a text; only #atf:, #bib:, #link:, #note: and #version: may occur in this location.
after
protocols which may occur only after all other protocols have been given in a particular section; only #note: may occur in this location. Other protocols are not required before #note:, but if they are present they must precede it.
inter
protocols which may occur between lines of a text; only #bib:, #lem:, #note: and #var: may occur in this location.
#bib: MSL 14, 343

1. a
#lem: a[water]
#note: This can only occur after any protocols other than #note:.

Protocols which may be given explicitly by users in an ATF file are: atf; basket; bib; lem; lemmatizer; link; note; syntax; var; version.

Note that the #link: protocol handles only a subset of intertext linkage; link protocols in XTF may also originate from the || << >> operator set. See the link protocol documentation for further details. The #note: protocol does not generate a protocol node; it generates a note element.

proto.outer = element protocols {
  attribute scope { text },
  proto.basket
}

proto.start = element protocols {
  attribute scope { text },
  (  proto.atf | proto.bib | proto.etcsl | proto.key | proto.lemmatizer 
     | proto.link | proto.project | proto.syntax | proto.version )*
}

proto.after = proto.note

proto.inter = proto.bib | proto.etcsl | proto.lem | proto.link 
            | proto.note | proto.psu | proto.var

proto.atf    = element protocol { attribute type { "atf" }    , text }
proto.basket = element protocol { attribute type { "basket" } , text }
proto.bib    = element protocol { attribute type { "bib" }    , text }
proto.etcsl  = element protocol { attribute type { "etcsl" }  , text }
proto.key    = element protocol { attribute type { "key" }    , text }
proto.lem    = element protocol { attribute type { "lem" }    , text }
proto.psu    = element protocol { attribute type { "psu" }    , text }
proto.lemmatizer
             = element protocol { attribute type { "lemmatizer" }, text }
proto.link   = element protocol { attribute type { "link" }   , text }
proto.note   = element protocol { attribute type { "note" }   , text }
proto.project= element protocol { attribute type { "project" }, text }
proto.syntax = element protocol { attribute type { "syntax" } , text }
proto.var    = element protocol { attribute type { "var" }    , text }
proto.version= element protocol { attribute type { "version" }, text }

Comments

Comments are asides which are not part of the text edition or the annotation; they are useful for keeping odd bits of information in the file without it getting in the way of the text edition or annotation.

Comments look like protocols in that they begin with a hash-sign, but they may not begin with the sequence hash-name-colon. Comments may be included within text transliterations but not before the first text in a file. Comments must always follow any protocols which occur adjacent to them.

A sequence of lines beginning with hash-signs is a multi-line comment. To separate multiple comments to the same line use a blank line in the ATF file.

1. a
#a simple comment

2. a
#a longer comment which somewhat artificially extends
#over multiple lines

3. a
#one comment to line 3.

#another comment to line 3.

4. a
#Comments look a bit like protocols but there is no chance of
#confusion: the ATF processor's scanning rules take care of that.

5. a
#lem: a[water]
#note: If you want a comment to appear in the displayed text-edition 
#use the '#note:' protocol instead.

#and note that any comment must follow any other protocol, including
#'#note:'.

You can include note marks in the transliteration and after an #note: by putting the note mark between caret signs (e.g., ^1^). You can also specify that a note corresponds to the label of a text line (or a range) by using the @notelabel{...} notation, e.g., #note: @notelabel{i 1} A note to column 1 line 1..

Notes

Notes are implemented in this schema although they can actually occur both in transliteration--in which case they are attached to grapheme nodes of some kind--or in translations. In the latter case they are attached to word nodes.



comments = cmt | notetext
cmt = element cmt { text }
notelabel = attribute note:label { text }
notemark  = attribute note:mark { text }
noteauto  = attribute note:auto { text }
noteref   = attribute note:ref { xsd:IDREF }
noteblock = (notelink | notetext)
notelink  = element note:link {
		notelabel?,
		noteref,
		text
	    }
notetext  = element note:text {
		attribute xml:id  { xsd:ID } ,
		notemark ,
		noteauto? ,
		notelabel? ,
		htext 
	    }

&-lines

&-lines are used to introduce a new text and consist of two parts: the ID and the name.

For transliterations of exemplars, the ID is a 'P' followed by six digits, e.g., P123456. This ID is assigned by CDLI and is the reference ID of the object in the main CDLI catalog; to get IDs for objects not in the CDLI catalog send an e-mail to cdli@cdli.ucla.edu.

The name of the text should be identical with the 'Designation' field in the CDLI main catalog; the ATF processor detects mismatches and reports the correct name. This mechanism is designed to provide a check that the P-number in the ID actually references the text the transliterator intends.

Document Types

Transliterations are not the only data type which can be entered in ATF: composite texts, translations and scores are all understood by the ATF processor. Also, when the ATF processor is unable to parse a text, it outputs the literal input wrapped in atf tags, so that it can be displayed in some form if required.

transliteration =
  element transliteration {
    attribute xml:id   { xsd:ID },
    attribute n        { text },
    attribute hand     { text }?,
    attribute xml:lang { xsd:NMTOKEN },
    project?,
    implicit?,
    haslinks?,
    maxcells?,
    (proto.start? , (noteblock | object | nonobject | comments | sealing)*)
  }
n.attr = attribute n { text }
n.attr.lc = attribute n { xsd:string { pattern="[a-z]" }}
haslinks = attribute haslinks { xsd:boolean }
maxcells = attribute cols { xsd:nonNegativeInteger }
project = attribute project { xsd:string { pattern="[a-z][a-z0-9/]+" } }

composite =
  element composite {
    composite-attlist,
    sigdef*,
    attribute hand     { text }?,
    project?,
    implicit?,
    haslinks?,
    maxcells?,
    proto.start?,
    composite-content,
    (referto, comments?)*
  }

composite-attlist &=
  attribute xml:id { xsd:ID },
  attribute n { text },
  attribute xml:lang { xsd:NMTOKEN }?

@-lines

@-lines are used for structural tags. Several kinds of structure may be indicated using this mechanism: physical structure, e.g., objects, surfaces; manuscript structure, i.e., columns; and document structure, e.g., divisions and colophons. For clarity, we describe here only the structural features which are permitted in object transliterations, i.e., texts with an ID beginning with P. Documentation of structural conventions for composite texts is given in the composites manual.

Objects

The kind of object on which the inscription being transliterated is written is designated using one of the following tags:

@tablet
The default, and therefore optional; object is a tablet.
@envelope
Tablets and envelopes with the same P number can be transliterated separately using this tag.
@prism
Object is a prism.
@bulla
Object is a bulla.
@fragment
Object is a fragment, with a fragment name (e.g., a letter) following the tag; may be used more than once to transliterate multiple fragments of an object, e.g.:
&P212121 = Some Fragmentary Object
@fragment a
1. a
@fragment b
1. a
@object
The generic object tag which must be followed by the type of the object, e.g. @object Stone wig.

Seals

A transliteration of the text inscribed on a physical seal object should be handled using the @object tag:

&P333444 = Some Seal
@object seal
1. da-da
2. dumu du-du
object = 
  element object {
    (implicit 
     | (attribute xml:id   { xsd:ID },
        attribute label    { text })),
    ( attribute type { known.object }
     |(attribute type { user.object } , n.attr) 
    ) ,
    status.flags,
    (m.fragment | surface | sealing | comments | nonx | noteblock)*
  }
known.object =    xsd:string { pattern="tablet|envelope|prism|bulla" }
user.object =     xsd:string { pattern="object" }
nonobject = nonx

Surfaces

Surfaces are principally the physical surfaces:

@obverse, @reverse
Obverse and reverse.
@left, @right, @top, @bottom
Specifiable edges, left right, top and bottom (as seen when looking at obverse of tablet).
@face
Conventional designation for surfaces of a prism; must be followed by single lowercase letter indicating the face, e.g.:
&P123321 = Some Prism
@prism
@face a
1. a
@face b
1. e
@surface
Generic surface tag which must be followed by name of surface, e.g.: @surface shoulder; @surface side a.
@edge
Generic edge tag; may be followed by single lowercase letter to name the edge similarly to @face.

Sealings

A transliteration of a sealing should be handled using the @seal tag included like a surface after the transliteration of the object on which the sealing occurs:

&P343434 = Some Sealed Tablet
1. a
$ seal 1

@seal 1
1. du-du

The use of $ seal anticipates the discussion of $-lines below; this mechanism can be used to indicate which sealings occur where on an object.

surface =
  element surface { 
    (implicit 
     | (attribute xml:id   { xsd:ID },
        attribute label    { text })),
    (proto.inter | column | nonx | m | comments)* ,
    (  attribute type { known.surface }
     |(attribute type { face.surface } , n.attr.lc)
     |(attribute type { edge.surface } , n.attr.lc?)
     |(attribute type { user.surface | docket.surface | seal.surface } , n.attr)
     ),
    primes?,
    status.flags,
    noteblock*
  }

known.surface =
  xsd:string {
    pattern="surface|obverse|reverse|left|right|top|bottom"
  }
face.surface = xsd:string { pattern="face" }
edge.surface = xsd:string { pattern="edge" }
user.surface = xsd:string { pattern="surface" }
docket.surface = xsd:string { pattern="docket" }
seal.surface = xsd:string { pattern="seal" }

The scid attribute is intended for use in cross-referencing sealing instance transliterations to composite transliterations of sealings stored in an external database.

sealing =
  element sealing {
    attribute xml:id { xsd:ID },
    attribute label    { text },
    attribute n { xsd:NMTOKEN },
    attribute scid { xsd:NMTOKEN }?,
    (column | nonx | milestone | comments | noteblock)*
  }

Columns

Columns are indicated with the @column tag, which may be omitted for single-column texts. Column numbers must be given in arabic numerals:

&P545454 = Some Columnar Text
@column 1
1. a
@column 2
1. e
column = 
  element column { 
    (implicit
     | (attribute xml:id   { xsd:ID },
        attribute label    { text })),
    (milestone | hdr | lg | l | nonl | nonx | comments | proto.inter)*,
    attribute n { text },
    attribute o { text }?,
    primes?,
    status.flags
  }

Status

The status of some of the features indicated with @-lines can be indicated in a manner similar to that of graphemes; the notation is intended to be natural and to follow Assyriological conventions:

@obverse?

Meaning: status of obverse/reverse uncertain

@reverse!*

Meaning: collated; reverse correct despite designation in publication

Primes can be used where this makes sense:

@face a'

@column 3'
primes = 
  attribute primes { xsd:string { pattern="\x{2032}+" } }

Headings

Transliterations and composites can both contain headings, which take the form @h<DIGIT>, where DIGIT is the outline-level of the heading, normally 1, 2 or 3.

Milestones

For technical reasons it is impossible to interweave physical structure (of the kind described above for transliterated objects) and document structure (e.g., paragraph divisions). This limitation is resolved by recourse to milestones.

Divisions

Documentary divisions in a transliterated object are given using the @m tag, with the milestone type given after an equals sign and the division type following; an optional division name or number may follow the division type:

@m=division paragraph 1

@m=division colophon

Discourse

Simple support for discourse elements in administrative and scholarly texts is provided using shorthands which are also implemented as milestones. These shorthands are:

  • @catchline
  • @colophon
  • @date
  • @signatures and @signature
  • @summary
  • @witnesses

These milestones must be specified between lines. If you need to mark a milestone in the middle of a line then you can split the line into two (labeled, e.g., a and b) at the milestone.

&P787878 = Some Administrative Text
1. 1(diš) udu
2. da-da
3. šu ba-ti
@date
4. u₄ 1-kam
@left
@summary
1. 1(diš) udu
&P908908 = A Scholarly Text
@colophon
1a. UNUG{ki} 
@date
1b. {iti}AB U₄ 1-KAM₂
2. MU 1.39@v-KAM₂ {m}an-ti-ʾi-ku-su LUGAL 
hdr = element h {
  attribute level { xsd:nonNegativeInteger },
  attribute xml:id   { xsd:ID },
  htext
}

milestone = m | m.discourse

m = element m { 
  attribute type { "division" | "locator" },
  attribute subtype { text }?,
  attribute n { text }?,
  text
}

m.discourse = element m {
  attribute type { "discourse" },
  attribute subtype { "body" | "catchline" | "colophon" | "date" | "linecount" 
                      | "witnesses" | "signature" | "summary" 
		      | "sealings" | "sealing" },
  attribute endflag { "1" }?,
  text
}

m.fragment = element m { 
  attribute type { "locator" },
  attribute subtype { "fragment" }?,
  text
}

Implied tags

The ATF processor supplies structural elements where they are implied by the transliteration and this is indicated in the XTF tree by use of the implicit attribute. For example, given:

&P121212 = Some Sparse Data
1. a

The following (schematic) element structure is generated:

<transliteration>
  <object>
    <surface>
      <column>

All of these elements have implicit="1".

N.B.: Implicit elements are not addressable by label or xml:id attributes; explicit object, surface and column indicators must be given if addressability is a requirement.

implicit = attribute implicit { "1" }

$-lines

$-lines are used to indicate information about the state of the text or object, or to describe features on the object which are not part of the transliteration proper. They come in two flavours: strict and loose.

Strict $-lines are subject to the restrictions in the table below; strict $-lines can be interpreted in their entirety by the ATF processor and the interpreted information can then be used by other programs. Strict $-lines are the best practice.

Loose $-lines are indicated by putting parentheses around the contents of the $-line. This is a facility provided to enable annotation of features which are not covered by the strict $-line specification. If the ATF processor detects that a loose $-line actually meets the criteria defined for strict $-lines it gives an advisory notice that the parentheses should be removed.

$-lines and comments are two quite different facilities, but experience has shown that transliterators can confuse the two. Comments are for information which does not belong in the transliteration and description of the text; comments are not displayed when the text is formatted for display or print. $-lines are for information which is integral to an understanding of the textual data; $-lines are included when the text is displayed or printed.

Seal

A particular use of $-lines is to indicate that a seal is used on an object; the form is:

$ seal <N>

Where N is a number indicating which seal is used; if a transliteration of the seal is also given using the @seal heading, the number following $ seal should correspond to the number following @seal. See the example above.

State

Most $-lines are used to give information about the state of the object being transliterated. The conventions for this can be summarized as follows:

Summary of Strict $-line Conventions for States
QualificationExtent1ScopeState
1The extent N may be a number such as 1 or 5; a RANGE gives two numbers separated by a hyphen, e.g., 3-5.
2OBJECT is any object specifier as described above, e.g., tablet, object etc.
3SURFACE is any surface specifier as described above, e.g., obverse, left etc.
at least
at most
about
n
several
some
NUMBER
RANGE
rest of
start of
beginning of
middle of
end of
OBJECT2
SURFACE3
column
columns
line
lines
case
cases
surface
blank
broken
effaced
illegible
missing
traces

Rulings

$-lines are also used to indicate noteworthy rulings on the tablet; ordinary case- or line-ruling should not be indicated with a $-line, but where a scribe has used a ruling to give additional information about the document structure this should be noted as:

(single | double | triple)   ruling

Examples

Strict $-lines look like this:

$ 3 lines blank
$ rest of obverse missing

A loose $-line looks like this:

$ (head of statue broken)

A ruling $-line looks like this:

$ double ruling

Images

Inline images can be specified using the form:

$ (image N = <text>)

Where N is an image number consisting of digits followed by optional lowercase letters from a to z, and <text> is free text, giving a label for the image (which is copied through to the XHTML 'alt' attribute on the <img> tag).

$ (image 1 = numbered diagram of triangle)

At present, the implementation only works for XHTML which is produced within a project. The ATF processor constructs a file name consisting of the text ID and the image's N value, joined by an at sign (e.g., P123456@1). The XHTML producer then emits an <img> tag with the src attribute set to /<PROJECT>/<FILENAME>.png.

Thus, in the present implementation, there must exist an appropriately named file in the PNG graphics format residing in the project's images directory. The implementation is expected to support a more sophisticated locator mechanism in the future.

nonx = element nonx { nonx-attlist, text }
nonl = element nonl { nonl-attlist, text }
nong = element nong { nong-attlist, text }

nonx-attlist =
  attribute xml:id { xsd:ID },
  (attribute label { text },
   attribute silent { "1" })?,
  (
   (attribute strict { "1" },
    ((attribute ref     { text },
      attribute scope   { text })
     |(attribute extent { text },
       attribute scope  { text },
       attribute state  { text },
       attribute flags  { xsd:string {
                              pattern="[!?*]+"
			  }}?)))
  |
   (attribute strict { "0" },
    attribute extent { text }?,
    attribute ref    { text }?,
    attribute scope  { text }?,
    attribute state  { text }?)
  |
   (attribute strict { "0" },
    attribute ref    { "none" },
    attribute type   { "empty" })
  |
   (attribute type   { "image" },
    attribute strict { "0" },
    attribute ref    { xsd:string {
                          pattern="[PQX][0-9]+@[0-9]+[a-z]*" 
		       }},
    attribute alt    { text })
  )

non-x-attr-set =
  attribute type {
    "newline" | "broken" | "maybe-broken" | "traces"
    | "maybe-traces" | "blank" | "ruling" | "image"
    | "seal" | "docket" | "comment" | "bullet" | "other"
  },
  attribute unit { "self" | "quantity" | "ref" }?,
  attribute extent { text }?,
  attribute ref { text }?,
  attribute xml:id { xsd:ID }?
noncolumn-attlist &= non-x-attr-set
nonl-attlist &= non-x-attr-set
nong-attlist &= non-x-attr-set

Text Lines

Lines of transliterated text begin with a sequence of non-space characters followed by a period and a space (these are typically numbers, but that is not a requirement):

1.   a
a+1. e
2'.  i
l =
  element l {
    attribute xml:id { xsd:ID },
    attribute n { text },
    attribute o { text }?,
    attribute label { text }?,
    attribute spanall { "1" }?,
    (cell+ | f+ | (ag | l.inner)*) 
  }

l.inner = (surro | normword | words | glo)*

Advanced

Legacy

To save the time and bother of converting legacy transliteration into ATF you can use:

#atf: use legacy

to get the processor to treat typographic features such as diacritics, half-brackets, and intra-sign square brackets as if they were valid ATF.

Line Numbers

By default the ATF processor renumbers lines, storing the original line number and generating a new one according to consistently defined rules. This procedure was adopted because of the lack of consistency in numbering administrative texts.

It is possible to suppress this behaviour and, indeed, it is necessary to suppress this behaviour if intertext linking is in use. The relevant protocol to achieve this is:

#atf: use mylines

Cells & Fields

Two mechanisms provide structural subdivisions of lines: cells and fields.

Cells are alignment units (like table cells); they can be of use to organize the data in a way that mimics the layout on the object. Fields are logical subdivisions in a line which are not necessarily laid out in a special way on the object. Cells can contain fields but fields cannot contain cells; fields are lower in the structural hierarchy than cells.

Fields can have a type specified so that higher order processors working with the XTF data can work intelligently with them.

cell = element c { span? , (f+ | l.inner) }
span = attribute span { xsd:nonNegativeInteger }

f = element f { f-attlist, (ag | l.inner)* }
f-attlist &=
  attribute xml:id { xsd:ID }?,
  attribute n { text }?,
  attribute type { xsd:NMTOKEN },
  attribute xml:lang { xsd:NMTOKEN }?

Streams

Streams are XTF's mechanism for entering data several times in several different ways; no automatic alignment is done between streams, but an alignment-group mechanism is provided for those occasions where alignment is a requirement. There are three kinds of stream in XTF:

MTS: Main Transliteration Stream
This is the default line-type and is the only one that is normally used. Lemmatization information is aligned with the MTS unless there is an NTS.
NTS: Normalized Transliteration Stream
This is a transliteration stream in which adjustments have been made to normalize the text; a normal-orthography version of an emesal text could be created using this mechanism, for example. Lemmatization information is aligned with the NTS if present. If NTS and LGS are both given, NTS must come before LGS.
LGS: Linearized Grapheme Stream
This is the sequence of graphemes exactly in order and linearized to the extent possible; this is mainly used in transliterations of ED texts where the presumed reading sequence and the actual grapheme sequence often diverge. No alignment is ever done with the LGS.
GUS: Gloss Underneath Stream
Implemented for compatibility with the SAA corpus, this stream allows glosses which appear on the tablet underneath the main text line to be given in their own line.
lg = element lg { 
  attribute xml:id { xsd:ID }?,
  maxcells?,
  attribute n { text }?,
  (   (l,gus?,nts)
    | (l,gus?,nts?,(proto.link?,bil+)+)
    | (l,gus?,lgs) 
    | (l,gus?,nts,lgs) 
    | (l,gus?, (e | comments)*)),
  proto.inter*,
  var*
}
bil = element l { attribute type { "bil" } , l.inner* }
nts = element l { attribute type { "nts" } , (ag | l.inner)* }
lgs = element l { attribute type { "lgs" } , grapheme* }
gus = element l { attribute type { "gus" } , l.inner* }
var = element v { 
  attribute varnum { text | "*" } ,
  attribute label  { text } ,
  attribute ex_label { text }? ,
  (l.inner | cell+)
}

Alignment

Alignment between MTS and NTS can be effected through the alignment-groups mechanism in which groups of words can be defined and labelled such that the groups in one stream correspond to the groups in the other stream.

If groups are used at all in a stream then every word in the stream must belong to a group.

# alignment groups
ag = element ag { 
  attribute ref { xsd:string { pattern="[a-z]+" } },
  attribute form { text }?,
  l.inner*
}

Zones

Zones are an experimental feature; at the schema level they are defined in the GDL, but it is convenient to discuss them here because they are another mechanism for grouping graphemes. The concept is that part of an inscription, e.g., a case, may exhibit ordering which may not be linear but is nevertheless be based on some spatial relationship between signs. Transliterators can assign graphemes to zones and label the graphemes by zone.

See the GDL documentation under Presence for surrogates.

surro = element surro { l.inner }
words |= surro?
word |= surro?

Composites

@composite

Composite texts by convention have an ID beginning with Q and are declared by an @-line which immediately follows the &-line for the text:

&Q000002 = Archaic Lu A
@composite

To obtain an ID for a composite text e-mail stinney@sas.upenn.edu.

Structure

Most of the @-lines which are permitted in transliterations are not permitted in composites; this is because composites are organized around documentary structure rather than the structure of a physical object. The one exception is that milestones are allowed in composites.

Documentary divisions are indicated in ATF by use of the @div tag which is followed by the name of the division and an optional name for the division. The @div tag requires a closing @end tag, which must take as its single argument the name of its corresponding opening @div. @div's of different kinds may not be interwoven

The @div tag maps to the DIV element in XTF. The first NMTOKEN which follows the @div is the name of the division and is stored in the @TYPE attribute. The remainder of the line is stored in the @N attribute..

 
@div part 1
...
@end part

@div colophon
...
@end colophon

In the liturgical corpus (including ETCSL editions of texts which could reasonably be considered liturgical), kirugu and other rubrics are used as logical structures, and they contain subdivisions giving the actual rubric; this is supported with the following syntax:

@div kirugu 1
1.  tur3-ra-na ...

@div rubric kirugu
10. ki-ru-gu2 1(disz)-a-kam
@end rubric

@end kirugu

@div giszgigal 1
11. u2-a a-u3-a u2-a-u2-a

@div rubric giszgigal
12. gisz-gi4-gal2-bi-im
@end rubric

@end giszgigal

Locator

A physical location may be given in a composite by using the locator milestone; the content after locator is a label. This is intended for use when the documentary structure of composites is being used to edit a text which is preserved only in one exemplar (the ePSD royal inscriptions corpus edits all royal inscriptions as composites):

1. a
@m=locator o 1

Variants

Variants are implemented to support the ETCSL corpus but may be used in any composite.

composite-content = 
  (noteblock? | milestone | \include | \div | variants | hdr | lg | l | comments | nonl | nonx | proto.inter)*

\include = element include { increfAttr }

referto = element referto { increfAttr }

increfAttr = 
  (attribute ref { text } , 
   attribute n { text } ,
   (attribute from { text },
    attribute to { text }?)?)

\div =
  element div {
    div-attlist, 
    composite-content
  }
div-attlist &=
  attribute xml:id { xsd:ID }?,
  attribute n { text }?,
  attribute type { xsd:NMTOKEN },
  attribute lang { text }?,
  attribute place { text }?,
  attribute subtype { text }?

variants = element variants { variant* }

variant = 
  element variant {
    (\div | noteblock | variants | lg | l | comments | nonl | proto.inter | nonx)*
  }
score =
  element score {
    score-attlist, 
    proto.start? , (sigdef | noteblock?)* ,
    (object | surface | column | milestone | \div | lg | comments | nonl)*
  }
score-attlist &=
  attribute xml:id { xsd:ID },
  attribute n { text },
  score_mode,
  score_type,
  score_word?,
  attribute xml:lang { xsd:NMTOKEN }?,
  attribute hand     { text }?,
  project?,
  implicit?,
  haslinks?,
  maxcells?

score_mode = attribute score-mode { "parsed" | "unparsed" }
score_type = attribute score-type { "matrix" | "synopsis" }
score_word = attribute score-word { "yes" }

synopticon =
  element synopticon { synopticon-attlist, sigdef*, (eg | comments | nonl)* }
synopticon-attlist &=
  attribute xml:id { xsd:ID },
  attribute n { text },
  attribute xml:lang { xsd:NMTOKEN }?
sigdef = element sigdef { sigdef-attlist, empty }
sigdef-attlist &=
  attribute xml:id { xsd:ID },
  attribute targ-id { xsd:NMTOKEN },
  attribute targ-n { text }
eg = element eg { eg-attlist, e* }
eg-attlist &= attribute xml:id { xsd:ID }?
e =
  element e {
    e-attlist,
    (l.inner
     | cell+
     | f+)
  }
e-attlist &=
  attribute xml:id { xsd:ID }?,
  attribute sigref { xsd:IDREF }?,
  attribute n { text }?,
  attribute l { text }?,
  attribute p { text }?,
  attribute hlid { text }?,
  attribute plid { text }?

Resources

Links

Top

Tutorial

GDL Manual

Advanced

Composites

Lexical

Linkage

Protocols


Questions about this document may be directed to the Oracc Steering Committee (osc at oracc dot org).