// Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //go:generate go run gen.go gen_trieval.go gen_ranges.go // Package bidi contains functionality for bidirectional text support. // // See https://www.unicode.org/reports/tr9. // // NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways // and without notice. package bidi // import "golang.org/x/text/unicode/bidi" // TODO // - Transformer for reordering? // - Transformer (validator, really) for Bidi Rule. import ( "bytes" ) // This API tries to avoid dealing with embedding levels for now. Under the hood // these will be computed, but the question is to which extent the user should // know they exist. We should at some point allow the user to specify an // embedding hierarchy, though. // A Direction indicates the overall flow of text. type Direction int const ( // LeftToRight indicates the text contains no right-to-left characters and // that either there are some left-to-right characters or the option // DefaultDirection(LeftToRight) was passed. LeftToRight Direction = iota // RightToLeft indicates the text contains no left-to-right characters and // that either there are some right-to-left characters or the option // DefaultDirection(RightToLeft) was passed. RightToLeft // Mixed indicates text contains both left-to-right and right-to-left // characters. Mixed // Neutral means that text contains no left-to-right and right-to-left // characters and that no default direction has been set. Neutral ) type options struct { defaultDirection Direction } // An Option is an option for Bidi processing. type Option func(*options) // ICU allows the user to define embedding levels. This may be used, for example, // to use hierarchical structure of markup languages to define embeddings. // The following option may be a way to expose this functionality in this API. // // LevelFunc sets a function that associates nesting levels with the given text. // // The levels function will be called with monotonically increasing values for p. // func LevelFunc(levels func(p int) int) Option { // panic("unimplemented") // } // DefaultDirection sets the default direction for a Paragraph. The direction is // overridden if the text contains directional characters. func DefaultDirection(d Direction) Option { return func(opts *options) { opts.defaultDirection = d } } // A Paragraph holds a single Paragraph for Bidi processing. type Paragraph struct { p []byte o Ordering opts []Option types []Class pairTypes []bracketType pairValues []rune runes []rune options options } // Initialize the p.pairTypes, p.pairValues and p.types from the input previously // set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph // separator (bidi class B). // // The function p.Order() needs these values to be set, so this preparation could be postponed. // But since the SetBytes and SetStrings functions return the length of the input up to the paragraph // separator, the whole input needs to be processed anyway and should not be done twice. // // The function has the same return values as SetBytes() / SetString() func (p *Paragraph) prepareInput() (n int, err error) { p.runes = bytes.Runes(p.p) bytecount := 0 // clear slices from previous SetString or SetBytes p.pairTypes = nil p.pairValues = nil p.types = nil for _, r := range p.runes { props, i := LookupRune(r) bytecount += i cls := props.Class() if cls == B { return bytecount, nil } p.types = append(p.types, cls) if props.IsOpeningBracket() { p.pairTypes = append(p.pairTypes, bpOpen) p.pairValues = append(p.pairValues, r) } else if props.IsBracket() { // this must be a closing bracket, // since IsOpeningBracket is not true p.pairTypes = append(p.pairTypes, bpClose) p.pairValues = append(p.pairValues, r) } else { p.pairTypes = append(p.pairTypes, bpNone) p.pairValues = append(p.pairValues, 0) } } return bytecount, nil } // SetBytes configures p for the given paragraph text. It replaces text // previously set by SetBytes or SetString. If b contains a paragraph separator // it will only process the first paragraph and report the number of bytes // consumed from b including this separator. Error may be non-nil if options are // given. func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) { p.p = b p.opts = opts return p.prepareInput() } // SetString configures s for the given paragraph text. It replaces text // previously set by SetBytes or SetString. If s contains a paragraph separator // it will only process the first paragraph and report the number of bytes // consumed from s including this separator. Error may be non-nil if options are // given. func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) { p.p = []byte(s) p.opts = opts return p.prepareInput() } // IsLeftToRight reports whether the principle direction of rendering for this // paragraphs is left-to-right. If this returns false, the principle direction // of rendering is right-to-left. func (p *Paragraph) IsLeftToRight() bool { return p.Direction() == LeftToRight } // Direction returns the direction of the text of this paragraph. // // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. func (p *Paragraph) Direction() Direction { return p.o.Direction() } // TODO: what happens if the position is > len(input)? This should return an error. // RunAt reports the Run at the given position of the input text. // // This method can be used for computing line breaks on paragraphs. func (p *Paragraph) RunAt(pos int) Run { c := 0 runNumber := 0 for i, r := range p.o.runes { c += len(r) if pos < c { runNumber = i } } return p.o.Run(runNumber) } func calculateOrdering(levels []level, runes []rune) Ordering { var curDir Direction prevDir := Neutral prevI := 0 o := Ordering{} // lvl = 0,2,4,...: left to right // lvl = 1,3,5,...: right to left for i, lvl := range levels { if lvl%2 == 0 { curDir = LeftToRight } else { curDir = RightToLeft } if curDir != prevDir { if i > 0 { o.runes = append(o.runes, runes[prevI:i]) o.directions = append(o.directions, prevDir) o.startpos = append(o.startpos, prevI) } prevI = i prevDir = curDir } } o.runes = append(o.runes, runes[prevI:]) o.directions = append(o.directions, prevDir) o.startpos = append(o.startpos, prevI) return o } // Order computes the visual ordering of all the runs in a Paragraph. func (p *Paragraph) Order() (Ordering, error) { if len(p.types) == 0 { return Ordering{}, nil } for _, fn := range p.opts { fn(&p.options) } lvl := level(-1) if p.options.defaultDirection == RightToLeft { lvl = 1 } para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl) if err != nil { return Ordering{}, err } levels := para.getLevels([]int{len(p.types)}) p.o = calculateOrdering(levels, p.runes) return p.o, nil } // Line computes the visual ordering of runs for a single line starting and // ending at the given positions in the original text. func (p *Paragraph) Line(start, end int) (Ordering, error) { lineTypes := p.types[start:end] para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1) if err != nil { return Ordering{}, err } levels := para.getLevels([]int{len(lineTypes)}) o := calculateOrdering(levels, p.runes[start:end]) return o, nil } // An Ordering holds the computed visual order of runs of a Paragraph. Calling // SetBytes or SetString on the originating Paragraph invalidates an Ordering. // The methods of an Ordering should only be called by one goroutine at a time. type Ordering struct { runes [][]rune directions []Direction startpos []int } // Direction reports the directionality of the runs. // // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. func (o *Ordering) Direction() Direction { return o.directions[0] } // NumRuns returns the number of runs. func (o *Ordering) NumRuns() int { return len(o.runes) } // Run returns the ith run within the ordering. func (o *Ordering) Run(i int) Run { r := Run{ runes: o.runes[i], direction: o.directions[i], startpos: o.startpos[i], } return r } // TODO: perhaps with options. // // Reorder creates a reader that reads the runes in visual order per character. // // Modifiers remain after the runes they modify. // func (l *Runs) Reorder() io.Reader { // panic("unimplemented") // } // A Run is a continuous sequence of characters of a single direction. type Run struct { runes []rune direction Direction startpos int } // String returns the text of the run in its original order. func (r *Run) String() string { return string(r.runes) } // Bytes returns the text of the run in its original order. func (r *Run) Bytes() []byte { return []byte(r.String()) } // TODO: methods for // - Display order // - headers and footers // - bracket replacement. // Direction reports the direction of the run. func (r *Run) Direction() Direction { return r.direction } // Pos returns the position of the Run within the text passed to SetBytes or SetString of the // originating Paragraph value. func (r *Run) Pos() (start, end int) { return r.startpos, r.startpos + len(r.runes) - 1 } // AppendReverse reverses the order of characters of in, appends them to out, // and returns the result. Modifiers will still follow the runes they modify. // Brackets are replaced with their counterparts. func AppendReverse(out, in []byte) []byte { ret := make([]byte, len(in)+len(out)) copy(ret, out) inRunes := bytes.Runes(in) for i, r := range inRunes { prop, _ := LookupRune(r) if prop.IsBracket() { inRunes[i] = prop.reverseBracket(r) } } for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 { inRunes[i], inRunes[j] = inRunes[j], inRunes[i] } copy(ret[len(out):], string(inRunes)) return ret } // ReverseString reverses the order of characters in s and returns a new string. // Modifiers will still follow the runes they modify. Brackets are replaced with // their counterparts. func ReverseString(s string) string { input := []rune(s) li := len(input) ret := make([]rune, li) for i, r := range input { prop, _ := LookupRune(r) if prop.IsBracket() { ret[li-i-1] = prop.reverseBracket(r) } else { ret[li-i-1] = r } } return string(ret) }