1
0
mirror of https://github.com/jesseduffield/lazygit.git synced 2025-08-06 11:02:41 +03:00

refactor to only have one context per view

This commit is contained in:
Jesse Duffield
2022-06-13 11:01:26 +10:00
parent 6dfef08efc
commit 524bf83a4a
372 changed files with 28866 additions and 6902 deletions

View File

@@ -1,8 +1,13 @@
<a href="https://stand-with-ukraine.pp.ua">
<img src="https://upload.wikimedia.org/wikipedia/commons/d/d2/Flag_of_Ukraine.png" height="20px" width="100%"/>
</a>
# ![Tcell](logos/tcell.png)
<img src="logos/tcell.png" style="float: right"/>
Please see [here](UKRAINE.md) for an important message for the people of Russia.
# Tcell
_Tcell_ is a _Go_ package that provides a cell based view for text terminals, like _XTerm_.
It was inspired by _termbox_, but includes many additional improvements.

View File

@@ -1,6 +1,7 @@
//go:build windows
// +build windows
// Copyright 2021 The TCell Authors
// Copyright 2022 The TCell Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use file except in compliance with the License.
@@ -114,22 +115,23 @@ var (
// characters (Unicode) are in use. The documentation refers to them
// without this suffix, as the resolution is made via preprocessor.
var (
procReadConsoleInput = k32.NewProc("ReadConsoleInputW")
procWaitForMultipleObjects = k32.NewProc("WaitForMultipleObjects")
procCreateEvent = k32.NewProc("CreateEventW")
procSetEvent = k32.NewProc("SetEvent")
procGetConsoleCursorInfo = k32.NewProc("GetConsoleCursorInfo")
procSetConsoleCursorInfo = k32.NewProc("SetConsoleCursorInfo")
procSetConsoleCursorPosition = k32.NewProc("SetConsoleCursorPosition")
procSetConsoleMode = k32.NewProc("SetConsoleMode")
procGetConsoleMode = k32.NewProc("GetConsoleMode")
procGetConsoleScreenBufferInfo = k32.NewProc("GetConsoleScreenBufferInfo")
procFillConsoleOutputAttribute = k32.NewProc("FillConsoleOutputAttribute")
procFillConsoleOutputCharacter = k32.NewProc("FillConsoleOutputCharacterW")
procSetConsoleWindowInfo = k32.NewProc("SetConsoleWindowInfo")
procSetConsoleScreenBufferSize = k32.NewProc("SetConsoleScreenBufferSize")
procSetConsoleTextAttribute = k32.NewProc("SetConsoleTextAttribute")
procMessageBeep = u32.NewProc("MessageBeep")
procReadConsoleInput = k32.NewProc("ReadConsoleInputW")
procWaitForMultipleObjects = k32.NewProc("WaitForMultipleObjects")
procCreateEvent = k32.NewProc("CreateEventW")
procSetEvent = k32.NewProc("SetEvent")
procGetConsoleCursorInfo = k32.NewProc("GetConsoleCursorInfo")
procSetConsoleCursorInfo = k32.NewProc("SetConsoleCursorInfo")
procSetConsoleCursorPosition = k32.NewProc("SetConsoleCursorPosition")
procSetConsoleMode = k32.NewProc("SetConsoleMode")
procGetConsoleMode = k32.NewProc("GetConsoleMode")
procGetConsoleScreenBufferInfo = k32.NewProc("GetConsoleScreenBufferInfo")
procFillConsoleOutputAttribute = k32.NewProc("FillConsoleOutputAttribute")
procFillConsoleOutputCharacter = k32.NewProc("FillConsoleOutputCharacterW")
procSetConsoleWindowInfo = k32.NewProc("SetConsoleWindowInfo")
procSetConsoleScreenBufferSize = k32.NewProc("SetConsoleScreenBufferSize")
procSetConsoleTextAttribute = k32.NewProc("SetConsoleTextAttribute")
procGetLargestConsoleWindowSize = k32.NewProc("GetLargestConsoleWindowSize")
procMessageBeep = u32.NewProc("MessageBeep")
)
const (
@@ -189,7 +191,7 @@ func (s *cScreen) Init() error {
s.in = in
out, e := syscall.Open("CONOUT$", syscall.O_RDWR, 0)
if e != nil {
syscall.Close(s.in)
_ = syscall.Close(s.in)
return e
}
s.out = out
@@ -224,15 +226,15 @@ func (s *cScreen) Init() error {
s.resize()
s.fini = false
s.setInMode(modeResizeEn | modeExtndFlg)
s.setInMode(modeResizeEn | modeExtendFlg)
// 24-bit color is opt-in for now, because we can't figure out
// to make it work consistently.
if s.truecolor {
s.setOutMode(modeVtOutput | modeNoAutoNL | modeCookedOut)
var omode uint32
s.getOutMode(&omode)
if omode&modeVtOutput == modeVtOutput {
var om uint32
s.getOutMode(&om)
if om&modeVtOutput == modeVtOutput {
s.vten = true
} else {
s.truecolor = false
@@ -268,9 +270,9 @@ func (s *cScreen) DisableMouse() {
func (s *cScreen) enableMouse(on bool) {
if on {
s.setInMode(modeResizeEn | modeMouseEn | modeExtndFlg)
s.setInMode(modeResizeEn | modeMouseEn | modeExtendFlg)
} else {
s.setInMode(modeResizeEn | modeExtndFlg)
s.setInMode(modeResizeEn | modeExtendFlg)
}
}
@@ -292,7 +294,7 @@ func (s *cScreen) disengage() {
}
s.running = false
stopQ := s.stopQ
procSetEvent.Call(uintptr(s.cancelflag))
_, _, _ = procSetEvent.Call(uintptr(s.cancelflag))
close(stopQ)
s.Unlock()
@@ -307,7 +309,7 @@ func (s *cScreen) disengage() {
s.clearScreen(StyleDefault, false)
s.setCursorPos(0, 0, false)
s.setCursorInfo(&s.ocursor)
procSetConsoleTextAttribute.Call(
_, _, _ = procSetConsoleTextAttribute.Call(
uintptr(s.out),
uintptr(s.mapStyle(StyleDefault)))
}
@@ -421,7 +423,7 @@ type rect struct {
func (s *cScreen) emitVtString(vs string) {
esc := utf16.Encode([]rune(vs))
syscall.WriteConsole(s.out, &esc[0], uint32(len(esc)), nil, nil)
_ = syscall.WriteConsole(s.out, &esc[0], uint32(len(esc)), nil, nil)
}
func (s *cScreen) showCursor() {
@@ -487,8 +489,8 @@ const (
keyEvent uint16 = 1
mouseEvent uint16 = 2
resizeEvent uint16 = 4
menuEvent uint16 = 8 // don't use
focusEvent uint16 = 16 // don't use
// menuEvent uint16 = 8 // don't use
// focusEvent uint16 = 16 // don't use
)
type mouseRecord struct {
@@ -500,10 +502,10 @@ type mouseRecord struct {
}
const (
mouseDoubleClick uint32 = 0x2
mouseHWheeled uint32 = 0x8
mouseVWheeled uint32 = 0x4
mouseMoved uint32 = 0x1
mouseHWheeled uint32 = 0x8
mouseVWheeled uint32 = 0x4
// mouseDoubleClick uint32 = 0x2
// mouseMoved uint32 = 0x1
)
type resizeRecord struct {
@@ -590,6 +592,8 @@ var vkKeys = map[uint16]Key{
vkInsert: KeyInsert,
vkDelete: KeyDelete,
vkHelp: KeyHelp,
vkEscape: KeyEscape,
vkSpace: ' ',
vkF1: KeyF1,
vkF2: KeyF2,
vkF3: KeyF3,
@@ -806,11 +810,11 @@ func (s *cScreen) scanInput(stopQ chan struct{}) {
}
}
// Windows console can display 8 characters, in either low or high intensity
func (s *cScreen) Colors() int {
if s.vten {
return 1 << 24
}
// Windows console can display 8 colors, in either low or high intensity
return 16
}
@@ -868,10 +872,10 @@ func (s *cScreen) mapStyle(style Style) uint16 {
// views.
if a&AttrReverse != 0 {
attr = ba
attr |= (fa << 4)
attr |= fa << 4
} else {
attr = fa
attr |= (ba << 4)
attr |= ba << 4
}
if a&AttrBold != 0 {
attr |= 0x8
@@ -895,19 +899,19 @@ func (s *cScreen) SetCell(x, y int, style Style, ch ...rune) {
}
}
func (s *cScreen) SetContent(x, y int, mainc rune, combc []rune, style Style) {
func (s *cScreen) SetContent(x, y int, primary rune, combining []rune, style Style) {
s.Lock()
if !s.fini {
s.cells.SetContent(x, y, mainc, combc, style)
s.cells.SetContent(x, y, primary, combining, style)
}
s.Unlock()
}
func (s *cScreen) GetContent(x, y int) (rune, []rune, Style, int) {
s.Lock()
mainc, combc, style, width := s.cells.GetContent(x, y)
primary, combining, style, width := s.cells.GetContent(x, y)
s.Unlock()
return mainc, combc, style, width
return primary, combining, style, width
}
func (s *cScreen) sendVtStyle(style Style) {
@@ -931,15 +935,15 @@ func (s *cScreen) sendVtStyle(style Style) {
}
if fg.IsRGB() {
r, g, b := fg.RGB()
fmt.Fprintf(esc, vtSetFgRGB, r, g, b)
_, _ = fmt.Fprintf(esc, vtSetFgRGB, r, g, b)
} else if fg.Valid() {
fmt.Fprintf(esc, vtSetFg, fg&0xff)
_, _ = fmt.Fprintf(esc, vtSetFg, fg&0xff)
}
if bg.IsRGB() {
r, g, b := bg.RGB()
fmt.Fprintf(esc, vtSetBgRGB, r, g, b)
_, _ = fmt.Fprintf(esc, vtSetBgRGB, r, g, b)
} else if bg.Valid() {
fmt.Fprintf(esc, vtSetBg, bg&0xff)
_, _ = fmt.Fprintf(esc, vtSetBg, bg&0xff)
}
s.emitVtString(esc.String())
}
@@ -954,16 +958,16 @@ func (s *cScreen) writeString(x, y int, style Style, ch []uint16) {
if s.vten {
s.sendVtStyle(style)
} else {
procSetConsoleTextAttribute.Call(
_, _, _ = procSetConsoleTextAttribute.Call(
uintptr(s.out),
uintptr(s.mapStyle(style)))
}
syscall.WriteConsole(s.out, &ch[0], uint32(len(ch)), nil, nil)
_ = syscall.WriteConsole(s.out, &ch[0], uint32(len(ch)), nil, nil)
}
func (s *cScreen) draw() {
// allocate a scratch line bit enough for no combining chars.
// if you have combining characters, you may pay for extra allocs.
// if you have combining characters, you may pay for extra allocations.
if s.clear {
s.clearScreen(s.style, s.vten)
s.clear = false
@@ -1053,19 +1057,19 @@ type consoleInfo struct {
}
func (s *cScreen) getConsoleInfo(info *consoleInfo) {
procGetConsoleScreenBufferInfo.Call(
_, _, _ = procGetConsoleScreenBufferInfo.Call(
uintptr(s.out),
uintptr(unsafe.Pointer(info)))
}
func (s *cScreen) getCursorInfo(info *cursorInfo) {
procGetConsoleCursorInfo.Call(
_, _, _ = procGetConsoleCursorInfo.Call(
uintptr(s.out),
uintptr(unsafe.Pointer(info)))
}
func (s *cScreen) setCursorInfo(info *cursorInfo) {
procSetConsoleCursorInfo.Call(
_, _, _ = procSetConsoleCursorInfo.Call(
uintptr(s.out),
uintptr(unsafe.Pointer(info)))
@@ -1076,14 +1080,14 @@ func (s *cScreen) setCursorPos(x, y int, vtEnable bool) {
// Note that the string is Y first. Origin is 1,1.
s.emitVtString(fmt.Sprintf(vtCursorPos, y+1, x+1))
} else {
procSetConsoleCursorPosition.Call(
_, _, _ = procSetConsoleCursorPosition.Call(
uintptr(s.out),
coord{int16(x), int16(y)}.uintptr())
}
}
func (s *cScreen) setBufferSize(x, y int) {
procSetConsoleScreenBufferSize.Call(
_, _, _ = procSetConsoleScreenBufferSize.Call(
uintptr(s.out),
coord{int16(x), int16(y)}.uintptr())
}
@@ -1096,6 +1100,37 @@ func (s *cScreen) Size() (int, int) {
return w, h
}
func (s *cScreen) SetSize(w, h int) {
xy, _, _ := procGetLargestConsoleWindowSize.Call(uintptr(s.out))
// xy is little endian packed
y := int(xy >> 16)
x := int(xy & 0xffff)
if x == 0 || y == 0 {
return
}
// This is a hacky workaround for Windows Terminal.
// Essentially Windows Terminal (Windows 11) does not support application
// initiated resizing. To detect this, we look for an extremely large size
// for the maximum width. If it is > 500, then this is almost certainly
// Windows Terminal, and won't support this. (Note that the legacy console
// does support application resizing.)
if x >= 500 {
return
}
s.setBufferSize(x, y)
r := rect{0, 0, int16(w - 1), int16(h - 1)}
_, _, _ = procSetConsoleWindowInfo.Call(
uintptr(s.out),
uintptr(1),
uintptr(unsafe.Pointer(&r)))
s.resize()
}
func (s *cScreen) resize() {
info := consoleInfo{}
s.getConsoleInfo(&info)
@@ -1114,11 +1149,11 @@ func (s *cScreen) resize() {
s.setBufferSize(w, h)
r := rect{0, 0, int16(w - 1), int16(h - 1)}
procSetConsoleWindowInfo.Call(
_, _, _ = procSetConsoleWindowInfo.Call(
uintptr(s.out),
uintptr(1),
uintptr(unsafe.Pointer(&r)))
s.PostEvent(NewEventResize(w, h))
_ = s.PostEvent(NewEventResize(w, h))
}
func (s *cScreen) Clear() {
@@ -1151,13 +1186,13 @@ func (s *cScreen) clearScreen(style Style, vtEnable bool) {
scratch := uint32(0)
count := uint32(x * y)
procFillConsoleOutputAttribute.Call(
_, _, _ = procFillConsoleOutputAttribute.Call(
uintptr(s.out),
uintptr(attr),
uintptr(count),
pos.uintptr(),
uintptr(unsafe.Pointer(&scratch)))
procFillConsoleOutputCharacter.Call(
_, _, _ = procFillConsoleOutputCharacter.Call(
uintptr(s.out),
uintptr(' '),
uintptr(count),
@@ -1168,47 +1203,39 @@ func (s *cScreen) clearScreen(style Style, vtEnable bool) {
const (
// Input modes
modeExtndFlg uint32 = 0x0080
modeMouseEn = 0x0010
modeResizeEn = 0x0008
modeCooked = 0x0001
modeVtInput = 0x0200
modeExtendFlg uint32 = 0x0080
modeMouseEn = 0x0010
modeResizeEn = 0x0008
// modeCooked = 0x0001
// modeVtInput = 0x0200
// Output modes
modeCookedOut uint32 = 0x0001
modeWrapEOL = 0x0002
modeVtOutput = 0x0004
modeNoAutoNL = 0x0008
// modeWrapEOL = 0x0002
)
func (s *cScreen) setInMode(mode uint32) error {
rv, _, err := procSetConsoleMode.Call(
func (s *cScreen) setInMode(mode uint32) {
_, _, _ = procSetConsoleMode.Call(
uintptr(s.in),
uintptr(mode))
if rv == 0 {
return err
}
return nil
}
func (s *cScreen) setOutMode(mode uint32) error {
rv, _, err := procSetConsoleMode.Call(
func (s *cScreen) setOutMode(mode uint32) {
_, _, _ = procSetConsoleMode.Call(
uintptr(s.out),
uintptr(mode))
if rv == 0 {
return err
}
return nil
}
func (s *cScreen) getInMode(v *uint32) {
procGetConsoleMode.Call(
_, _, _ = procGetConsoleMode.Call(
uintptr(s.in),
uintptr(unsafe.Pointer(v)))
}
func (s *cScreen) getOutMode(v *uint32) {
procGetConsoleMode.Call(
_, _, _ = procGetConsoleMode.Call(
uintptr(s.out),
uintptr(unsafe.Pointer(v)))
}
@@ -1221,15 +1248,15 @@ func (s *cScreen) SetStyle(style Style) {
// No fallback rune support, since we have Unicode. Yay!
func (s *cScreen) RegisterRuneFallback(r rune, subst string) {
func (s *cScreen) RegisterRuneFallback(_ rune, _ string) {
}
func (s *cScreen) UnregisterRuneFallback(r rune) {
func (s *cScreen) UnregisterRuneFallback(_ rune) {
}
func (s *cScreen) CanDisplay(r rune, checkFallbacks bool) bool {
func (s *cScreen) CanDisplay(_ rune, _ bool) bool {
// We presume we can display anything -- we're Unicode.
// (Sadly this not precisely true. Combinings are especially
// (Sadly this not precisely true. Combining characters are especially
// poorly supported under Windows.)
return true
}

View File

@@ -27,7 +27,7 @@ type EventPaste struct {
t time.Time
}
// When returns the time when this EventMouse was created.
// When returns the time when this EventPaste was created.
func (ev *EventPaste) When() time.Time {
return ev.t
}

View File

@@ -1,4 +1,4 @@
// Copyright 2021 The TCell Authors
// Copyright 2022 The TCell Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use file except in compliance with the License.
@@ -43,7 +43,7 @@ type Screen interface {
// be displayed if Show() or Sync() is called. The width is the width
// in screen cells; most often this will be 1, but some East Asian
// characters require two cells.
GetContent(x, y int) (mainc rune, combc []rune, style Style, width int)
GetContent(x, y int) (primary rune, combining []rune, style Style, width int)
// SetContent sets the contents of the given cell location. If
// the coordinates are out of range, then the operation is ignored.
@@ -52,13 +52,13 @@ type Screen interface {
// that follows is a possible list of combining characters to append,
// and will usually be nil (no combining characters.)
//
// The results are not displayd until Show() or Sync() is called.
// The results are not displayed until Show() or Sync() is called.
//
// Note that wide (East Asian full width) runes occupy two cells,
// and attempts to place character at next cell to the right will have
// undefined effects. Wide runes that are printed in the
// last column will be replaced with a single width space on output.
SetContent(x int, y int, mainc rune, combc []rune, style Style)
SetContent(x int, y int, primary rune, combining []rune, style Style)
// SetStyle sets the default style to use when clearing the screen
// or when StyleDefault is specified. If it is also StyleDefault,
@@ -70,7 +70,7 @@ type Screen interface {
// dimensions of the screen, the cursor will be hidden.
ShowCursor(x int, y int)
// HideCursor is used to hide the cursor. Its an alias for
// HideCursor is used to hide the cursor. It's an alias for
// ShowCursor(-1, -1).sim
HideCursor()
@@ -139,7 +139,7 @@ type Screen interface {
DisablePaste()
// HasMouse returns true if the terminal (apparently) supports a
// mouse. Note that the a return value of true doesn't guarantee that
// mouse. Note that the return value of true doesn't guarantee that
// a mouse/pointing device is present; a false return definitely
// indicates no mouse support is available.
HasMouse() bool
@@ -161,8 +161,8 @@ type Screen interface {
// internal model. This may be both expensive and visually jarring,
// so it should only be used when believed to actually be necessary.
//
// Typically this is called as a result of a user-requested redraw
// (e.g. to clear up on screen corruption caused by some other program),
// Typically, this is called as a result of a user-requested redraw
// (e.g. to clear up on-screen corruption caused by some other program),
// or during a resize event.
Sync()
@@ -178,13 +178,13 @@ type Screen interface {
// o as a fallback for ø. This should be done cautiously for
// characters that might be displayed ordinarily in language
// specific text -- characters that could change the meaning of
// of written text would be dangerous. The intention here is to
// written text would be dangerous. The intention here is to
// facilitate fallback characters in pseudo-graphical applications.
//
// If the terminal has fallbacks already in place via an alternate
// character set, those are used in preference. Also, standard
// fallbacks for graphical characters in the ACSC terminfo string
// are registered implicitly.
// fallbacks for graphical characters in the alternate character set
// terminfo string are registered implicitly.
//
// The display string should be the same width as original rune.
// This makes it possible to register two character replacements
@@ -203,7 +203,7 @@ type Screen interface {
UnregisterRuneFallback(r rune)
// CanDisplay returns true if the given rune can be displayed on
// this screen. Note that this is a best guess effort -- whether
// this screen. Note that this is a best-guess effort -- whether
// your fonts support the character or not may be questionable.
// Mostly this is for folks who work outside of Unicode.
//
@@ -213,7 +213,7 @@ type Screen interface {
// one that is visually indistinguishable from the one requested.
CanDisplay(r rune, checkFallbacks bool) bool
// Resize does nothing, since its generally not possible to
// Resize does nothing, since it's generally not possible to
// ask a screen to resize, but it allows the Screen to implement
// the View interface.
Resize(int, int, int, int)
@@ -239,6 +239,15 @@ type Screen interface {
// Beep attempts to sound an OS-dependent audible alert and returns an error
// when unsuccessful.
Beep() error
// SetSize attempts to resize the window. It also invalidates the cells and
// calls the resize function. Note that if the window size is changed, it will
// not be restored upon application exit.
//
// Many terminals cannot support this. Perversely, the "modern" Windows Terminal
// does not support application-initiated resizing, whereas the legacy terminal does.
// Also, some emulators can support this but may have it disabled by default.
SetSize(int, int)
}
// NewScreen returns a default Screen suitable for the user's terminal
@@ -255,7 +264,7 @@ func NewScreen() (Screen, error) {
}
// MouseFlags are options to modify the handling of mouse events.
// Actual events can be or'd together.
// Actual events can be ORed together.
type MouseFlags int
const (
@@ -265,7 +274,7 @@ const (
)
// CursorStyle represents a given cursor style, which can include the shape and
// whether the cursor blinks or is solid. Support for changing these is not universal.
// whether the cursor blinks or is solid. Support for changing this is not universal.
type CursorStyle int
const (
@@ -276,4 +285,4 @@ const (
CursorStyleSteadyUnderline
CursorStyleBlinkingBar
CursorStyleSteadyBar
)
)

View File

@@ -1,4 +1,4 @@
// Copyright 2021 The TCell Authors
// Copyright 2022 The TCell Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use file except in compliance with the License.
@@ -49,13 +49,6 @@ type SimulationScreen interface {
// InjectMouse injects a mouse event.
InjectMouse(x, y int, buttons ButtonMask, mod ModMask)
// SetSize resizes the underlying physical screen. It also causes
// a resize event to be injected during the next Show() or Sync().
// A new physical contents array will be allocated (with data from
// the old copied), so any prior value obtained with GetContents
// won't be used anymore
SetSize(width, height int)
// GetContents returns screen contents as an array of
// cells, along with the physical width & height. Note that the
// physical contents will be used until the next time SetSize()

View File

@@ -1,4 +1,4 @@
// Copyright 2020 The TCell Authors
// Copyright 2022 The TCell Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use file except in compliance with the License.
@@ -26,6 +26,7 @@ type Style struct {
fg Color
bg Color
attrs AttrMask
url string
}
// StyleDefault represents a default style, based upon the context.
@@ -42,6 +43,7 @@ func (s Style) Foreground(c Color) Style {
fg: c,
bg: s.bg,
attrs: s.attrs,
url: s.url,
}
}
@@ -52,11 +54,12 @@ func (s Style) Background(c Color) Style {
fg: s.fg,
bg: c,
attrs: s.attrs,
url: s.url,
}
}
// Decompose breaks a style up, returning the foreground, background,
// and other attributes.
// and other attributes. The URL if set is not included.
func (s Style) Decompose() (fg Color, bg Color, attr AttrMask) {
return s.fg, s.bg, s.attrs
}
@@ -67,12 +70,14 @@ func (s Style) setAttrs(attrs AttrMask, on bool) Style {
fg: s.fg,
bg: s.bg,
attrs: s.attrs | attrs,
url: s.url,
}
}
return Style{
fg: s.fg,
bg: s.bg,
attrs: s.attrs &^ attrs,
url: s.url,
}
}
@@ -133,5 +138,18 @@ func (s Style) Attributes(attrs AttrMask) Style {
fg: s.fg,
bg: s.bg,
attrs: attrs,
url: s.url,
}
}
// Url returns a style with the Url set. If the provided Url is not empty,
// and the terminal supports it, text will typically be marked up as a clickable
// link to that Url. If the Url is empty, then this mode is turned off.
func (s Style) Url(url string) Style {
return Style{
fg: s.fg,
bg: s.bg,
attrs: s.attrs,
url: url,
}
}

View File

@@ -1,4 +1,4 @@
// Copyright 2021 The TCell Authors
// Copyright 2022 The TCell Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use file except in compliance with the License.
@@ -227,6 +227,9 @@ type Terminfo struct {
CursorSteadyUnderline string
CursorBlinkingBar string
CursorSteadyBar string
EnterUrl string
ExitUrl string
SetWindowSize string
}
const (
@@ -234,93 +237,75 @@ const (
ModifiersXTerm = 1
)
type stackElem struct {
s string
i int
isStr bool
isInt bool
type stack []interface{}
func (st stack) Push(v interface{}) stack {
return append(st, v)
}
type stack []stackElem
func (st stack) Push(v string) stack {
e := stackElem{
s: v,
isStr: true,
}
return append(st, e)
}
func (st stack) Pop() (string, stack) {
v := ""
func (st stack) Pop() (interface{}, stack) {
if len(st) > 0 {
e := st[len(st)-1]
st = st[:len(st)-1]
if e.isStr {
v = e.s
} else {
v = strconv.Itoa(e.i)
}
return e, st[:len(st)-1]
}
return v, st
return 0, st
}
func (st stack) PopString() (string, stack) {
if len(st) > 0 {
e := st[len(st)-1]
var s string
switch v := e.(type) {
case int:
s = strconv.Itoa(v)
case bool:
s = strconv.FormatBool(v)
case string:
s = v
}
return s, st[:len(st)-1]
}
return "", st
}
func (st stack) PopInt() (int, stack) {
if len(st) > 0 {
e := st[len(st)-1]
st = st[:len(st)-1]
if e.isInt {
return e.i, st
} else if e.isStr {
// If the string that was pushed was the representation
// of a number e.g. '123', then return the number. If the
// conversion doesn't work, assume the string pushed was
// intended to return, as an int, the ascii representation
// of the (one and only) character.
i, err := strconv.Atoi(e.s)
if err == nil {
return i, st
} else if len(e.s) >= 1 {
return int(e.s[0]), st
var i int
switch v := e.(type) {
case int:
i = v
case bool:
if v {
i = 1
} else {
i = 0
}
case string:
i, _ = strconv.Atoi(v)
}
return i, st[:len(st)-1]
}
return 0, st
}
func (st stack) PopBool() (bool, stack) {
var b bool
if len(st) > 0 {
e := st[len(st)-1]
st = st[:len(st)-1]
if e.isStr {
if e.s == "1" {
return true, st
}
return false, st
} else if e.i == 1 {
return true, st
} else {
return false, st
switch v := e.(type) {
case int:
b = v != 0
case bool:
b = v
case string:
b = v != "" && v != "false"
}
return b, st[:len(st)-1]
}
return false, st
}
func (st stack) PushInt(i int) stack {
e := stackElem{
i: i,
isInt: true,
}
return append(st, e)
}
func (st stack) PushBool(i bool) stack {
if i {
return st.PushInt(1)
}
return st.PushInt(0)
}
// static vars
var svars [26]string
@@ -372,13 +357,13 @@ var pb = &paramsBuffer{}
// TParm takes a terminfo parameterized string, such as setaf or cup, and
// evaluates the string, and returns the result with the parameter
// applied.
func (t *Terminfo) TParm(s string, p ...int) string {
func (t *Terminfo) TParm(s string, p ...interface{}) string {
var stk stack
var a, b string
var ai, bi int
var ab bool
var dvars [26]string
var params [9]int
var params [9]interface{}
pb.Start(s)
@@ -413,14 +398,18 @@ func (t *Terminfo) TParm(s string, p ...int) string {
pb.PutCh(ch)
case 'i': // increment both parameters (ANSI cup support)
params[0]++
params[1]++
if i, ok := params[0].(int); ok {
params[0] = i + 1
}
if i, ok := params[1].(int); ok {
params[1] = i + 1
}
case 'c', 's':
// NB: these, and 'd' below are special cased for
// efficiency. They could be handled by the richer
// format support below, less efficiently.
a, stk = stk.Pop()
a, stk = stk.PopString()
pb.PutString(a)
case 'd':
@@ -431,7 +420,7 @@ func (t *Terminfo) TParm(s string, p ...int) string {
// This is pretty suboptimal, but this is rarely used.
// None of the mainstream terminals use any of this,
// and it would surprise me if this code is ever
// executed outside of test cases.
// executed outside test cases.
f := "%"
if ch == ':' {
ch, _ = pb.NextCh()
@@ -450,7 +439,7 @@ func (t *Terminfo) TParm(s string, p ...int) string {
ai, stk = stk.PopInt()
pb.PutString(fmt.Sprintf(f, ai))
case 'c', 's':
a, stk = stk.Pop()
a, stk = stk.PopString()
pb.PutString(fmt.Sprintf(f, a))
}
@@ -458,17 +447,17 @@ func (t *Terminfo) TParm(s string, p ...int) string {
ch, _ = pb.NextCh()
ai = int(ch - '1')
if ai >= 0 && ai < len(params) {
stk = stk.PushInt(params[ai])
stk = stk.Push(params[ai])
} else {
stk = stk.PushInt(0)
stk = stk.Push(0)
}
case 'P': // pop & store variable
ch, _ = pb.NextCh()
if ch >= 'A' && ch <= 'Z' {
svars[int(ch-'A')], stk = stk.Pop()
svars[int(ch-'A')], stk = stk.PopString()
} else if ch >= 'a' && ch <= 'z' {
dvars[int(ch-'a')], stk = stk.Pop()
dvars[int(ch-'a')], stk = stk.PopString()
}
case 'g': // recall & push variable
@@ -481,7 +470,7 @@ func (t *Terminfo) TParm(s string, p ...int) string {
case '\'': // push(char)
ch, _ = pb.NextCh()
pb.NextCh() // must be ' but we don't check
_, _ = pb.NextCh() // must be ' but we don't check
stk = stk.Push(string(ch))
case '{': // push(int)
@@ -493,82 +482,82 @@ func (t *Terminfo) TParm(s string, p ...int) string {
ch, _ = pb.NextCh()
}
// ch must be '}' but no verification
stk = stk.PushInt(ai)
stk = stk.Push(ai)
case 'l': // push(strlen(pop))
a, stk = stk.Pop()
stk = stk.PushInt(len(a))
a, stk = stk.PopString()
stk = stk.Push(len(a))
case '+':
bi, stk = stk.PopInt()
ai, stk = stk.PopInt()
stk = stk.PushInt(ai + bi)
stk = stk.Push(ai + bi)
case '-':
bi, stk = stk.PopInt()
ai, stk = stk.PopInt()
stk = stk.PushInt(ai - bi)
stk = stk.Push(ai - bi)
case '*':
bi, stk = stk.PopInt()
ai, stk = stk.PopInt()
stk = stk.PushInt(ai * bi)
stk = stk.Push(ai * bi)
case '/':
bi, stk = stk.PopInt()
ai, stk = stk.PopInt()
if bi != 0 {
stk = stk.PushInt(ai / bi)
stk = stk.Push(ai / bi)
} else {
stk = stk.PushInt(0)
stk = stk.Push(0)
}
case 'm': // push(pop mod pop)
bi, stk = stk.PopInt()
ai, stk = stk.PopInt()
if bi != 0 {
stk = stk.PushInt(ai % bi)
stk = stk.Push(ai % bi)
} else {
stk = stk.PushInt(0)
stk = stk.Push(0)
}
case '&': // AND
bi, stk = stk.PopInt()
ai, stk = stk.PopInt()
stk = stk.PushInt(ai & bi)
stk = stk.Push(ai & bi)
case '|': // OR
bi, stk = stk.PopInt()
ai, stk = stk.PopInt()
stk = stk.PushInt(ai | bi)
stk = stk.Push(ai | bi)
case '^': // XOR
bi, stk = stk.PopInt()
ai, stk = stk.PopInt()
stk = stk.PushInt(ai ^ bi)
stk = stk.Push(ai ^ bi)
case '~': // bit complement
ai, stk = stk.PopInt()
stk = stk.PushInt(ai ^ -1)
stk = stk.Push(ai ^ -1)
case '!': // logical NOT
ai, stk = stk.PopInt()
stk = stk.PushBool(ai != 0)
stk = stk.Push(ai != 0)
case '=': // numeric compare or string compare
b, stk = stk.Pop()
a, stk = stk.Pop()
stk = stk.PushBool(a == b)
b, stk = stk.PopString()
a, stk = stk.PopString()
stk = stk.Push(a == b)
case '>': // greater than, numeric
bi, stk = stk.PopInt()
ai, stk = stk.PopInt()
stk = stk.PushBool(ai > bi)
stk = stk.Push(ai > bi)
case '<': // less than, numeric
bi, stk = stk.PopInt()
ai, stk = stk.PopInt()
stk = stk.PushBool(ai < bi)
stk = stk.Push(ai < bi)
case '?': // start conditional
@@ -650,15 +639,15 @@ func (t *Terminfo) TPuts(w io.Writer, s string) {
beg := strings.Index(s, "$<")
if beg < 0 {
// Most strings don't need padding, which is good news!
io.WriteString(w, s)
_, _ = io.WriteString(w, s)
return
}
io.WriteString(w, s[:beg])
_, _ = io.WriteString(w, s[:beg])
s = s[beg+2:]
end := strings.Index(s, ">")
if end < 0 {
// unterminated.. just emit bytes unadulterated
io.WriteString(w, "$<"+s)
_, _ = io.WriteString(w, "$<"+s)
return
}
val := s[:end]
@@ -729,7 +718,6 @@ func (t *Terminfo) TColor(fi, bi int) string {
var (
dblock sync.Mutex
terminfos = make(map[string]*Terminfo)
aliases = make(map[string]string)
)
// AddTerminfo can be called to register a new Terminfo entry.

View File

@@ -148,6 +148,9 @@ type tScreen struct {
finiOnce sync.Once
enablePaste string
disablePaste string
enterUrl string
exitUrl string
setWinSize string
cursorStyles map[CursorStyle]string
cursorStyle CursorStyle
saved *term.State
@@ -334,6 +337,26 @@ func (t *tScreen) prepareBracketedPaste() {
}
}
func (t *tScreen) prepareExtendedOSC() {
// More stuff for limits in terminfo. This time we are applying
// the most common OSC (operating system commands). Generally
// terminals that don't understand these will ignore them.
// Again, we condition this based on mouse capabilities.
if t.ti.EnterUrl != "" {
t.enterUrl = t.ti.EnterUrl
t.exitUrl = t.ti.ExitUrl
} else if t.ti.Mouse != "" {
t.enterUrl = "\x1b]8;;%p1%s\x1b\\"
t.exitUrl = "\x1b]8;;\x1b\\"
}
if t.ti.SetWindowSize != "" {
t.setWinSize = t.ti.SetWindowSize
} else if t.ti.Mouse != "" {
t.setWinSize = "\x1b[8;%p1%p2%d;%dt"
}
}
func (t *tScreen) prepareCursorStyles() {
// Another workaround for lack of reporting in terminfo.
// We assume if the terminal has a mouse entry, that it
@@ -502,6 +525,7 @@ func (t *tScreen) prepareKeys() {
t.prepareXtermModifiers()
t.prepareBracketedPaste()
t.prepareCursorStyles()
t.prepareExtendedOSC()
outer:
// Add key mappings for control keys.
@@ -623,11 +647,27 @@ func (t *tScreen) encodeRune(r rune, buf []byte) []byte {
return buf
}
func (t *tScreen) sendFgBg(fg Color, bg Color) {
func (t *tScreen) sendFgBg(fg Color, bg Color, attr AttrMask) AttrMask {
ti := t.ti
if ti.Colors == 0 {
return
// foreground vs background, we calculate luminance
// and possibly do a reverse video
if !fg.Valid() {
return attr
}
v, ok := t.colors[fg]
if !ok {
v = FindColor(fg, []Color{ColorBlack, ColorWhite})
t.colors[fg] = v
}
switch v {
case ColorWhite:
return attr
case ColorBlack:
return attr ^ AttrReverse
}
}
if fg == ColorReset || bg == ColorReset {
t.TPuts(ti.ResetFgBg)
}
@@ -638,7 +678,7 @@ func (t *tScreen) sendFgBg(fg Color, bg Color) {
t.TPuts(ti.TParm(ti.SetFgBgRGB,
int(r1), int(g1), int(b1),
int(r2), int(g2), int(b2)))
return
return attr
}
if fg.IsRGB() && ti.SetFgRGB != "" {
@@ -685,6 +725,7 @@ func (t *tScreen) sendFgBg(fg Color, bg Color) {
t.TPuts(ti.TParm(ti.SetBg, int(bg&0xff)))
}
}
return attr
}
func (t *tScreen) drawCell(x, y int) int {
@@ -727,7 +768,7 @@ func (t *tScreen) drawCell(x, y int) int {
t.TPuts(ti.AttrOff)
t.sendFgBg(fg, bg)
attrs = t.sendFgBg(fg, bg, attrs)
if attrs&AttrBold != 0 {
t.TPuts(ti.Bold)
}
@@ -749,8 +790,19 @@ func (t *tScreen) drawCell(x, y int) int {
if attrs&AttrStrikeThrough != 0 {
t.TPuts(ti.StrikeThrough)
}
// URL string can be long, so don't send it unless we really need to
if t.enterUrl != "" && t.curstyle != style {
if style.url != "" {
t.TPuts(ti.TParm(t.enterUrl, style.url))
} else {
t.TPuts(t.exitUrl)
}
}
t.curstyle = style
}
// now emit runes - taking care to not overrun width with a
// wide character, and to ensure that we emit exactly one regular
// character followed up by any residual combing characters
@@ -859,8 +911,9 @@ func (t *tScreen) Show() {
func (t *tScreen) clearScreen() {
t.TPuts(t.ti.AttrOff)
t.TPuts(t.exitUrl)
fg, bg, _ := t.style.Decompose()
t.sendFgBg(fg, bg)
_ = t.sendFgBg(fg, bg, AttrNone)
t.TPuts(t.ti.Clear)
t.clear = false
}
@@ -1716,6 +1769,14 @@ func (t *tScreen) HasKey(k Key) bool {
return t.keyexist[k]
}
func (t *tScreen) SetSize(w, h int) {
if t.setWinSize != "" {
t.TPuts(t.ti.TParm(t.setWinSize, w, h))
}
t.cells.Invalidate()
t.resize()
}
func (t *tScreen) Resize(int, int, int, int) {}
func (t *tScreen) Suspend() error {

View File

@@ -78,16 +78,13 @@ type ViewMouseBinding struct {
// the view that is clicked
ViewName string
// the context we are in when the click occurs. Not necessarily the context
// of the view we're clicking. If this is blank then it is a global binding.
FromContext string
// the context assigned to the clicked view. If blank, then we don't care
// what context is assigned
ToContext string
// the view that has focus when the click occurs.
FocusedView string
Handler func(ViewMouseBindingOpts) error
Modifier Modifier
// must be a mouse key
Key Key
}
@@ -191,8 +188,6 @@ type Gui struct {
screen tcell.Screen
suspendedMutex sync.Mutex
suspended bool
currentContext string
}
// NewGui returns a new Gui object with a given output mode.
@@ -268,10 +263,6 @@ func (g *Gui) Size() (x, y int) {
return g.maxX, g.maxY
}
func (g *Gui) SetCurrentContext(context string) {
g.currentContext = context
}
// SetRune writes a rune at the given point, relative to the top-left
// corner of the terminal. It checks if the position is valid and applies
// the given colors.
@@ -370,6 +361,45 @@ func (g *Gui) SetViewOnBottom(name string) (*View, error) {
return nil, errors.Wrap(ErrUnknownView, 0)
}
func (g *Gui) SetViewOnTopOf(toMove string, other string) error {
g.Mutexes.ViewsMutex.Lock()
defer g.Mutexes.ViewsMutex.Unlock()
if toMove == other {
return nil
}
// need to find the two current positions and then move toMove before other in the list.
toMoveIndex := -1
otherIndex := -1
for i, v := range g.views {
if v.name == toMove {
toMoveIndex = i
}
if v.name == other {
otherIndex = i
}
}
if toMoveIndex == -1 || otherIndex == -1 {
return errors.Wrap(ErrUnknownView, 0)
}
// already on top
if toMoveIndex > otherIndex {
return nil
}
// need to actually do it the other way around. Last is highest
viewToMove := g.views[toMoveIndex]
g.views = append(g.views[:toMoveIndex], g.views[toMoveIndex+1:]...)
g.views = append(g.views[:otherIndex], append([]*View{viewToMove}, g.views[otherIndex:]...)...)
return nil
}
// Views returns all the views in the GUI.
func (g *Gui) Views() []*View {
return g.views
@@ -470,7 +500,7 @@ func (g *Gui) CurrentView() *View {
// It behaves differently on different platforms. Somewhere it doesn't register Alt key press,
// on others it might report Ctrl as Alt. It's not consistent and therefore it's not recommended
// to use with mouse keys.
func (g *Gui) SetKeybinding(viewname string, contexts []string, key interface{}, mod Modifier, handler func(*Gui, *View) error) error {
func (g *Gui) SetKeybinding(viewname string, key interface{}, mod Modifier, handler func(*Gui, *View) error) error {
var kb *keybinding
k, ch, err := getKey(key)
@@ -482,7 +512,7 @@ func (g *Gui) SetKeybinding(viewname string, contexts []string, key interface{},
return ErrBlacklisted
}
kb = newKeybinding(viewname, contexts, k, ch, mod, handler)
kb = newKeybinding(viewname, k, ch, mod, handler)
g.keybindings = append(g.keybindings, kb)
return nil
}
@@ -1181,7 +1211,7 @@ func (g *Gui) onKey(ev *GocuiEvent) error {
return err
}
if ev.Mod == ModNone && IsMouseKey(ev.Key) {
if IsMouseKey(ev.Key) {
opts := ViewMouseBindingOpts{X: newCx + v.ox, Y: newCy + v.oy}
matched, err := g.execMouseKeybindings(v, ev, opts)
if err != nil {
@@ -1202,18 +1232,20 @@ func (g *Gui) onKey(ev *GocuiEvent) error {
func (g *Gui) execMouseKeybindings(view *View, ev *GocuiEvent, opts ViewMouseBindingOpts) (bool, error) {
isMatch := func(binding *ViewMouseBinding) bool {
return binding.ViewName == view.Name() && ev.Key == binding.Key && (binding.ToContext == "" || binding.ToContext == view.Context)
return binding.ViewName == view.Name() &&
ev.Key == binding.Key &&
ev.Mod == binding.Modifier
}
// first pass looks for ones that match both the view and the from context
// first pass looks for ones that match the focused view
for _, binding := range g.viewMouseBindings {
if isMatch(binding) && binding.FromContext != "" && binding.FromContext == g.currentContext {
if isMatch(binding) && binding.FocusedView != "" && binding.FocusedView == g.currentView.Name() {
return true, binding.Handler(opts)
}
}
for _, binding := range g.viewMouseBindings {
if isMatch(binding) && binding.FromContext == "" {
if isMatch(binding) && binding.FocusedView == "" {
return true, binding.Handler(opts)
}
}
@@ -1485,14 +1517,5 @@ func (g *Gui) matchView(v *View, kb *keybinding) bool {
if kb.viewName != v.name {
return false
}
// if the keybinding doesn't specify contexts, it applies for all contexts
if len(kb.contexts) == 0 {
return true
}
for _, context := range kb.contexts {
if context == v.Context {
return true
}
}
return false
return true
}

View File

@@ -20,7 +20,6 @@ type Modifier tcell.ModMask
// Keybidings are used to link a given key-press event with a handler.
type keybinding struct {
viewName string
contexts []string
key Key
ch rune
mod Modifier
@@ -93,10 +92,9 @@ func MustParseAll(input []string) map[interface{}]Modifier {
}
// newKeybinding returns a new Keybinding object.
func newKeybinding(viewname string, contexts []string, key Key, ch rune, mod Modifier, handler func(*Gui, *View) error) (kb *keybinding) {
func newKeybinding(viewname string, key Key, ch rune, mod Modifier, handler func(*Gui, *View) error) (kb *keybinding) {
kb = &keybinding{
viewName: viewname,
contexts: contexts,
key: key,
ch: ch,
mod: mod,

View File

@@ -146,8 +146,6 @@ type View struct {
// ParentView is the view which catches events bubbled up from the given view if there's no matching handler
ParentView *View
Context string // this is for assigning keybindings to a view only in certain contexts
searcher *searcher
// KeybindOnEdit should be set to true when you want to execute keybindings even when the view is editable

View File

@@ -1,14 +1,14 @@
# Unicode Text Segmentation for Go
[![Godoc Reference](https://img.shields.io/badge/godoc-reference-blue.svg)](https://godoc.org/github.com/rivo/uniseg)
[![Go Reference](https://pkg.go.dev/badge/github.com/rivo/uniseg.svg)](https://pkg.go.dev/github.com/rivo/uniseg)
[![Go Report](https://img.shields.io/badge/go%20report-A%2B-brightgreen.svg)](https://goreportcard.com/report/github.com/rivo/uniseg)
This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](http://unicode.org/reports/tr29/) (Unicode version 12.0.0).
At this point, only the determination of grapheme cluster boundaries is implemented.
This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](https://unicode.org/reports/tr29/) and Unicode Line Breaking according to [Unicode Standard Annex #14](https://unicode.org/reports/tr14/) (Unicode version 14.0.0).
## Background
### Grapheme Clusters
In Go, [strings are read-only slices of bytes](https://blog.golang.org/strings). They can be turned into Unicode code points using the `for` loop or by casting: `[]rune(str)`. However, multiple code points may be combined into one user-perceived character or what the Unicode specification calls "grapheme cluster". Here are some examples:
|String|Bytes (UTF-8)|Code points (runes)|Grapheme clusters|
@@ -17,7 +17,19 @@ In Go, [strings are read-only slices of bytes](https://blog.golang.org/strings).
|🏳️‍🌈|14 bytes: `f0 9f 8f b3 ef b8 8f e2 80 8d f0 9f 8c 88`|4 code points: `1f3f3 fe0f 200d 1f308`|1 cluster: `[1f3f3 fe0f 200d 1f308]`|
|🇩🇪|8 bytes: `f0 9f 87 a9 f0 9f 87 aa`|2 code points: `1f1e9 1f1ea`|1 cluster: `[1f1e9 1f1ea]`|
This package provides a tool to iterate over these grapheme clusters. This may be used to determine the number of user-perceived characters, to split strings in their intended places, or to extract individual characters which form a unit.
This package provides tools to iterate over these grapheme clusters. This may be used to determine the number of user-perceived characters, to split strings in their intended places, or to extract individual characters which form a unit.
### Word Boundaries
Word boundaries are used in a number of different contexts. The most familiar ones are selection (double-click mouse selection), cursor movement ("move to next word" control-arrow keys), and the dialog option "Whole Word Search" for search and replace. They are also used in database queries, to determine whether elements are within a certain number of words of one another. Searching may also use word boundaries in determining matching items. This package provides tools to determine word boundaries within strings.
### Sentence Boundaries
Sentence boundaries are often used for triple-click or some other method of selecting or iterating through blocks of text that are larger than single words. They are also used to determine whether words occur within the same sentence in database queries. This package provides tools to determine sentence boundaries within strings.
### Line Breaking
Line breaking, also known as word wrapping, is the process of breaking a section of text into lines such that it will fit in the available width of a page, window or other display area. This package provides tools to determine where a string may or may not be broken and where it must be broken (for example after newline characters).
## Installation
@@ -25,38 +37,102 @@ This package provides a tool to iterate over these grapheme clusters. This may b
go get github.com/rivo/uniseg
```
## Basic Example
## Examples
### Counting Characters in a String
```go
package uniseg
import (
"fmt"
"github.com/rivo/uniseg"
)
func main() {
gr := uniseg.NewGraphemes("👍🏼!")
for gr.Next() {
fmt.Printf("%x ", gr.Runes())
}
// Output: [1f44d 1f3fc] [21]
}
n := uniseg.GraphemeClusterCount("🇩🇪🏳️‍🌈")
fmt.Println(n)
// 2
```
### Using the [`Graphemes`](https://pkg.go.dev/github.com/rivo/uniseg#Graphemes) Class
This is the most convenient method of iterating over grapheme clusters:
```go
gr := uniseg.NewGraphemes("👍🏼!")
for gr.Next() {
fmt.Printf("%x ", gr.Runes())
}
// [1f44d 1f3fc] [21]
```
### Using the [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) Function
This is orders of magnitude faster than the `Graphemes` class, but it requires the handling of states and boundaries:
```go
str := "🇩🇪🏳️‍🌈"
state := -1
var c string
for len(str) > 0 {
c, str, _, state = uniseg.StepString(str, state)
fmt.Printf("%x ", []rune(c))
}
// [1f1e9 1f1ea] [1f3f3 fe0f 200d 1f308]
```
### Advanced Examples
Breaking into grapheme clusters and evaluating line breaks:
```go
str := "First line.\nSecond line."
state := -1
var (
c string
boundaries int
)
for len(str) > 0 {
c, str, boundaries, state = uniseg.StepString(str, state)
fmt.Print(c)
if boundaries&uniseg.MaskLine == uniseg.LineCanBreak {
fmt.Print("|")
} else if boundaries&uniseg.MaskLine == uniseg.LineMustBreak {
fmt.Print("‖")
}
}
// First |line.
// ‖Second |line.‖
```
If you're only interested in word segmentation, use [`FirstWord`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWord) or [`FirstWordInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWordInString):
```go
str := "Hello, world!"
state := -1
var c string
for len(str) > 0 {
c, str, state = uniseg.FirstWordInString(str, state)
fmt.Printf("(%s)\n", c)
}
// (Hello)
// (,)
// ( )
// (world)
// (!)
```
Similarly, use
- [`FirstGraphemeCluster`](https://pkg.go.dev/github.com/rivo/uniseg#FirstGraphemeCluster) or [`FirstGraphemeClusterInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstGraphemeClusterInString) for grapheme cluster determination only,
- [`FirstSentence`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentence) or [`FirstSentenceInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentenceInString) for sentence segmentation only, and
- [`FirstLineSegment`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegment) or [`FirstLineSegmentInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegmentInString) for line breaking / word wrapping (although using [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) is preferred as it will observe grapheme cluster boundaries).
## Documentation
Refer to https://godoc.org/github.com/rivo/uniseg for the package's documentation.
Refer to https://pkg.go.dev/github.com/rivo/uniseg for the package's documentation.
## Dependencies
This package does not depend on any packages outside the standard library.
## Sponsor this Project
[Become a Sponsor on GitHub](https://github.com/sponsors/rivo?metadata_source=uniseg_readme) to support this project!
## Your Feedback
Add your issue here on GitHub. Feel free to get in touch if you have any questions.
## Version
Version tags will be introduced once Golang modules are official. Consider this version 0.1.
Add your issue here on GitHub, preferably before submitting any PR's. Feel free to get in touch if you have any questions.

53
vendor/github.com/rivo/uniseg/doc.go generated vendored
View File

@@ -1,8 +1,53 @@
/*
Package uniseg implements Unicode Text Segmentation according to Unicode
Standard Annex #29 (http://unicode.org/reports/tr29/).
Package uniseg implements Unicode Text Segmentation and Unicode Line Breaking.
Unicode Text Segmentation conforms to Unicode Standard Annex #29
(https://unicode.org/reports/tr29/) and Unicode Line Breaking conforms to
Unicode Standard Annex #14 (https://unicode.org/reports/tr14/).
In short, using this package, you can split a string into grapheme clusters
(what people would usually refer to as a "character"), into words, and into
sentences. Or, in its simplest case, this package allows you to count the number
of characters in a string, especially when it contains complex characters such
as emojis, combining characters, or characters from Asian, Arabic, Hebrew, or
other languages. Additionally, you can use it to implement line breaking (or
"word wrapping"), that is, to determine where text can be broken over to the
next line when the width of the line is not big enough to fit the entire text.
Grapheme Clusters
Consider the rainbow flag emoji: 🏳️‍🌈. On most modern systems, it appears as one
character. But its string representation actually has 14 bytes, so counting
bytes (or using len("🏳️‍🌈")) will not work as expected. Counting runes won't,
either: The flag has 4 Unicode code points, thus 4 runes. The stdlib function
utf8.RuneCountInString("🏳️‍🌈") and len([]rune("🏳️‍🌈")) will both return 4.
The uniseg.GraphemeClusterCount(str) function will return 1 for the rainbow flag
emoji. The Graphemes class and a variety of functions in this package will allow
you to split strings into its grapheme clusters.
Word Boundaries
Word boundaries are used in a number of different contexts. The most familiar
ones are selection (double-click mouse selection), cursor movement ("move to
next word" control-arrow keys), and the dialog option "Whole Word Search" for
search and replace. This package provides methods for determining word
boundaries.
Sentence Boundaries
Sentence boundaries are often used for triple-click or some other method of
selecting or iterating through blocks of text that are larger than single words.
They are also used to determine whether words occur within the same sentence in
database queries. This package provides methods for determining sentence
boundaries.
Line Breaking
Line breaking, also known as word wrapping, is the process of breaking a section
of text into lines such that it will fit in the available width of a page,
window or other display area. This package provides methods to determine the
positions in a string where a line must be broken, may be broken, or must not be
broken.
At this point, only the determination of grapheme cluster boundaries is
implemented.
*/
package uniseg

2553
vendor/github.com/rivo/uniseg/eastasianwidth.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

213
vendor/github.com/rivo/uniseg/gen_breaktest.go generated vendored Normal file
View File

@@ -0,0 +1,213 @@
//go:build generate
// This program generates a Go containing a slice of test cases based on the
// Unicode Character Database auxiliary data files. The command line arguments
// are as follows:
//
// 1. The name of the Unicode data file (just the filename, without extension).
// 2. The name of the locally generated Go file.
// 3. The name of the slice containing the test cases.
// 4. The name of the generator, for logging purposes.
//
//go:generate go run gen_breaktest.go GraphemeBreakTest graphemebreak_test.go graphemeBreakTestCases graphemes
//go:generate go run gen_breaktest.go WordBreakTest wordbreak_test.go wordBreakTestCases words
//go:generate go run gen_breaktest.go SentenceBreakTest sentencebreak_test.go sentenceBreakTestCases sentences
//go:generate go run gen_breaktest.go LineBreakTest linebreak_test.go lineBreakTestCases lines
package main
import (
"bufio"
"bytes"
"errors"
"fmt"
"go/format"
"io/ioutil"
"log"
"net/http"
"os"
"time"
)
// We want to test against a specific version rather than the latest. When the
// package is upgraded to a new version, change these to generate new tests.
const (
testCaseURL = `https://www.unicode.org/Public/14.0.0/ucd/auxiliary/%s.txt`
)
func main() {
if len(os.Args) < 5 {
fmt.Println("Not enough arguments, see code for details")
os.Exit(1)
}
log.SetPrefix("gen_breaktest (" + os.Args[4] + "): ")
log.SetFlags(0)
// Read text of testcases and parse into Go source code.
src, err := parse(fmt.Sprintf(testCaseURL, os.Args[1]))
if err != nil {
log.Fatal(err)
}
// Format the Go code.
formatted, err := format.Source(src)
if err != nil {
log.Fatalln("gofmt:", err)
}
// Write it out.
log.Print("Writing to ", os.Args[2])
if err := ioutil.WriteFile(os.Args[2], formatted, 0644); err != nil {
log.Fatal(err)
}
}
// parse reads a break text file, either from a local file or from a URL. It
// parses the file data into Go source code representing the test cases.
func parse(url string) ([]byte, error) {
log.Printf("Parsing %s", url)
res, err := http.Get(url)
if err != nil {
return nil, err
}
body := res.Body
defer body.Close()
buf := new(bytes.Buffer)
buf.Grow(120 << 10)
buf.WriteString(`package uniseg
// Code generated via go generate from gen_breaktest.go. DO NOT EDIT.
// ` + os.Args[3] + ` are Grapheme testcases taken from
// ` + url + `
// on ` + time.Now().Format("January 2, 2006") + `. See
// https://www.unicode.org/license.html for the Unicode license agreement.
var ` + os.Args[3] + ` = []testCase {
`)
sc := bufio.NewScanner(body)
num := 1
var line []byte
original := make([]byte, 0, 64)
expected := make([]byte, 0, 64)
for sc.Scan() {
num++
line = sc.Bytes()
if len(line) == 0 || line[0] == '#' {
continue
}
var comment []byte
if i := bytes.IndexByte(line, '#'); i >= 0 {
comment = bytes.TrimSpace(line[i+1:])
line = bytes.TrimSpace(line[:i])
}
original, expected, err := parseRuneSequence(line, original[:0], expected[:0])
if err != nil {
return nil, fmt.Errorf(`line %d: %v: %q`, num, err, line)
}
fmt.Fprintf(buf, "\t{original: \"%s\", expected: %s}, // %s\n", original, expected, comment)
}
if err := sc.Err(); err != nil {
return nil, err
}
// Check for final "# EOF", useful check if we're streaming via HTTP
if !bytes.Equal(line, []byte("# EOF")) {
return nil, fmt.Errorf(`line %d: exected "# EOF" as final line, got %q`, num, line)
}
buf.WriteString("}\n")
return buf.Bytes(), nil
}
// Used by parseRuneSequence to match input via bytes.HasPrefix.
var (
prefixBreak = []byte("÷ ")
prefixDontBreak = []byte("× ")
breakOk = []byte("÷")
breakNo = []byte("×")
)
// parseRuneSequence parses a rune + breaking opportunity sequence from b
// and appends the Go code for testcase.original to orig
// and appends the Go code for testcase.expected to exp.
// It retuns the new orig and exp slices.
//
// E.g. for the input b="÷ 0020 × 0308 ÷ 1F1E6 ÷"
// it will append
// "\u0020\u0308\U0001F1E6"
// and "[][]rune{{0x0020,0x0308},{0x1F1E6},}"
// to orig and exp respectively.
//
// The formatting of exp is expected to be cleaned up by gofmt or format.Source.
// Note we explicitly require the sequence to start with ÷ and we implicitly
// require it to end with ÷.
func parseRuneSequence(b, orig, exp []byte) ([]byte, []byte, error) {
// Check for and remove first ÷ or ×.
if !bytes.HasPrefix(b, prefixBreak) && !bytes.HasPrefix(b, prefixDontBreak) {
return nil, nil, errors.New("expected ÷ or × as first character")
}
if bytes.HasPrefix(b, prefixBreak) {
b = b[len(prefixBreak):]
} else {
b = b[len(prefixDontBreak):]
}
boundary := true
exp = append(exp, "[][]rune{"...)
for len(b) > 0 {
if boundary {
exp = append(exp, '{')
}
exp = append(exp, "0x"...)
// Find end of hex digits.
var i int
for i = 0; i < len(b) && b[i] != ' '; i++ {
if d := b[i]; ('0' <= d || d <= '9') ||
('A' <= d || d <= 'F') ||
('a' <= d || d <= 'f') {
continue
}
return nil, nil, errors.New("bad hex digit")
}
switch i {
case 4:
orig = append(orig, "\\u"...)
case 5:
orig = append(orig, "\\U000"...)
default:
return nil, nil, errors.New("unsupport code point hex length")
}
orig = append(orig, b[:i]...)
exp = append(exp, b[:i]...)
b = b[i:]
// Check for space between hex and ÷ or ×.
if len(b) < 1 || b[0] != ' ' {
return nil, nil, errors.New("bad input")
}
b = b[1:]
// Check for next boundary.
switch {
case bytes.HasPrefix(b, breakOk):
boundary = true
b = b[len(breakOk):]
case bytes.HasPrefix(b, breakNo):
boundary = false
b = b[len(breakNo):]
default:
return nil, nil, errors.New("missing ÷ or ×")
}
if boundary {
exp = append(exp, '}')
}
exp = append(exp, ',')
if len(b) > 0 && b[0] == ' ' {
b = b[1:]
}
}
exp = append(exp, '}')
return orig, exp, nil
}

240
vendor/github.com/rivo/uniseg/gen_properties.go generated vendored Normal file
View File

@@ -0,0 +1,240 @@
//go:build generate
// This program generates a property file in Go file from Unicode Character
// Database auxiliary data files. The command line arguments are as follows:
//
// 1. The name of the Unicode data file (just the filename, without extension).
// 2. The name of the locally generated Go file.
// 3. The name of the slice mapping code points to properties.
// 4. The name of the generator, for logging purposes.
// 5. (Optional) Flags, comma-separated. The following flags are available:
// - "emojis": include emoji properties (Extended Pictographic only).
// - "gencat": include general category properties.
//
//go:generate go run gen_properties.go auxiliary/GraphemeBreakProperty graphemeproperties.go graphemeCodePoints graphemes emojis
//go:generate go run gen_properties.go auxiliary/WordBreakProperty wordproperties.go workBreakCodePoints words emojis
//go:generate go run gen_properties.go auxiliary/SentenceBreakProperty sentenceproperties.go sentenceBreakCodePoints sentences
//go:generate go run gen_properties.go LineBreak lineproperties.go lineBreakCodePoints lines gencat
//go:generate go run gen_properties.go EastAsianWidth eastasianwidth.go eastAsianWidth eastasianwidth
package main
import (
"bufio"
"bytes"
"errors"
"fmt"
"go/format"
"io/ioutil"
"log"
"net/http"
"os"
"regexp"
"sort"
"strconv"
"strings"
"time"
)
// We want to test against a specific version rather than the latest. When the
// package is upgraded to a new version, change these to generate new tests.
const (
gbpURL = `https://www.unicode.org/Public/14.0.0/ucd/%s.txt`
emojiURL = `https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt`
)
// The regular expression for a line containing a code point range property.
var propertyPattern = regexp.MustCompile(`^([0-9A-F]{4,6})(\.\.([0-9A-F]{4,6}))?\s*;\s*([A-Za-z0-9_]+)\s*#\s(.+)$`)
func main() {
if len(os.Args) < 5 {
fmt.Println("Not enough arguments, see code for details")
os.Exit(1)
}
log.SetPrefix("gen_properties (" + os.Args[4] + "): ")
log.SetFlags(0)
// Parse flags.
flags := make(map[string]struct{})
if len(os.Args) >= 6 {
for _, flag := range strings.Split(os.Args[5], ",") {
flags[flag] = struct{}{}
}
}
// Parse the text file and generate Go source code from it.
var emojis string
if _, ok := flags["emojis"]; ok {
emojis = emojiURL
}
_, includeGeneralCategory := flags["gencat"]
src, err := parse(fmt.Sprintf(gbpURL, os.Args[1]), emojis, includeGeneralCategory)
if err != nil {
log.Fatal(err)
}
// Format the Go code.
formatted, err := format.Source([]byte(src))
if err != nil {
log.Fatal("gofmt:", err)
}
// Save it to the (local) target file.
log.Print("Writing to ", os.Args[2])
if err := ioutil.WriteFile(os.Args[2], formatted, 0644); err != nil {
log.Fatal(err)
}
}
// parse parses the Unicode Properties text files located at the given URLs and
// returns their equivalent Go source code to be used in the uniseg package. If
// "emojiURL" is an empty string, no emoji code points will be included. If
// "includeGeneralCategory" is true, the Unicode General Category property will
// be extracted from the comments and included in the output.
func parse(gbpURL, emojiURL string, includeGeneralCategory bool) (string, error) {
// Temporary buffer to hold properties.
var properties [][4]string
// Open the first URL.
log.Printf("Parsing %s", gbpURL)
res, err := http.Get(gbpURL)
if err != nil {
return "", err
}
in1 := res.Body
defer in1.Close()
// Parse it.
scanner := bufio.NewScanner(in1)
num := 0
for scanner.Scan() {
num++
line := strings.TrimSpace(scanner.Text())
// Skip comments and empty lines.
if strings.HasPrefix(line, "#") || line == "" {
continue
}
// Everything else must be a code point range, a property and a comment.
from, to, property, comment, err := parseProperty(line)
if err != nil {
return "", fmt.Errorf("%s line %d: %v", os.Args[4], num, err)
}
properties = append(properties, [4]string{from, to, property, comment})
}
if err := scanner.Err(); err != nil {
return "", err
}
// Open the second URL.
if emojiURL != "" {
log.Printf("Parsing %s", emojiURL)
res, err = http.Get(emojiURL)
if err != nil {
return "", err
}
in2 := res.Body
defer in2.Close()
// Parse it.
scanner = bufio.NewScanner(in2)
num = 0
for scanner.Scan() {
num++
line := scanner.Text()
// Skip comments, empty lines, and everything not containing
// "Extended_Pictographic".
if strings.HasPrefix(line, "#") || line == "" || !strings.Contains(line, "Extended_Pictographic") {
continue
}
// Everything else must be a code point range, a property and a comment.
from, to, property, comment, err := parseProperty(line)
if err != nil {
return "", fmt.Errorf("emojis line %d: %v", num, err)
}
properties = append(properties, [4]string{from, to, property, comment})
}
if err := scanner.Err(); err != nil {
return "", err
}
}
// Sort properties.
sort.Slice(properties, func(i, j int) bool {
left, _ := strconv.ParseUint(properties[i][0], 16, 64)
right, _ := strconv.ParseUint(properties[j][0], 16, 64)
return left < right
})
// Header.
var (
buf bytes.Buffer
emojiComment string
)
columns := 3
if includeGeneralCategory {
columns = 4
}
if emojiURL != "" {
emojiComment = `
// and
// ` + emojiURL + `
// ("Extended_Pictographic" only)`
}
buf.WriteString(`package uniseg
// Code generated via go generate from gen_properties.go. DO NOT EDIT.
// ` + os.Args[3] + ` are taken from
// ` + gbpURL + emojiComment + `
// on ` + time.Now().Format("January 2, 2006") + `. See https://www.unicode.org/license.html for the Unicode
// license agreement.
var ` + os.Args[3] + ` = [][` + strconv.Itoa(columns) + `]int{
`)
// Properties.
for _, prop := range properties {
if includeGeneralCategory {
generalCategory := "gc" + prop[3][:2]
if generalCategory == "gcL&" {
generalCategory = "gcLC"
}
prop[3] = prop[3][3:]
fmt.Fprintf(&buf, "{0x%s,0x%s,%s,%s}, // %s\n", prop[0], prop[1], translateProperty("pr", prop[2]), generalCategory, prop[3])
} else {
fmt.Fprintf(&buf, "{0x%s,0x%s,%s}, // %s\n", prop[0], prop[1], translateProperty("pr", prop[2]), prop[3])
}
}
// Tail.
buf.WriteString("}")
return buf.String(), nil
}
// parseProperty parses a line of the Unicode properties text file containing a
// property for a code point range and returns it along with its comment.
func parseProperty(line string) (from, to, property, comment string, err error) {
fields := propertyPattern.FindStringSubmatch(line)
if fields == nil {
err = errors.New("no property found")
return
}
from = fields[1]
to = fields[3]
if to == "" {
to = from
}
property = fields[4]
comment = fields[5]
return
}
// translateProperty translates a property name as used in the Unicode data file
// to a variable used in the Go code.
func translateProperty(prefix, property string) string {
return prefix + strings.ReplaceAll(property, "_", "")
}

View File

@@ -2,267 +2,246 @@ package uniseg
import "unicode/utf8"
// The states of the grapheme cluster parser.
const (
grAny = iota
grCR
grControlLF
grL
grLVV
grLVTT
grPrepend
grExtendedPictographic
grExtendedPictographicZWJ
grRIOdd
grRIEven
)
// The grapheme cluster parser's breaking instructions.
const (
grNoBoundary = iota
grBoundary
)
// The grapheme cluster parser's state transitions. Maps (state, property) to
// (new state, breaking instruction, rule number). The breaking instruction
// always refers to the boundary between the last and next code point.
// Graphemes implements an iterator over Unicode grapheme clusters, or
// user-perceived characters. While iterating, it also provides information
// about word boundaries, sentence boundaries, and line breaks.
//
// This map is queried as follows:
// After constructing the class via [NewGraphemes] for a given string "str",
// [Next] is called for every grapheme cluster in a loop until it returns false.
// Inside the loop, information about the grapheme cluster as well as boundary
// information is available via the various methods (see examples below).
//
// 1. Find specific state + specific property. Stop if found.
// 2. Find specific state + any property.
// 3. Find any state + specific property.
// 4. If only (2) or (3) (but not both) was found, stop.
// 5. If both (2) and (3) were found, use state and breaking instruction from
// the transition with the lower rule number, prefer (3) if rule numbers
// are equal. Stop.
// 6. Assume grAny and grBoundary.
var grTransitions = map[[2]int][3]int{
// GB5
{grAny, prCR}: {grCR, grBoundary, 50},
{grAny, prLF}: {grControlLF, grBoundary, 50},
{grAny, prControl}: {grControlLF, grBoundary, 50},
// GB4
{grCR, prAny}: {grAny, grBoundary, 40},
{grControlLF, prAny}: {grAny, grBoundary, 40},
// GB3.
{grCR, prLF}: {grAny, grNoBoundary, 30},
// GB6.
{grAny, prL}: {grL, grBoundary, 9990},
{grL, prL}: {grL, grNoBoundary, 60},
{grL, prV}: {grLVV, grNoBoundary, 60},
{grL, prLV}: {grLVV, grNoBoundary, 60},
{grL, prLVT}: {grLVTT, grNoBoundary, 60},
// GB7.
{grAny, prLV}: {grLVV, grBoundary, 9990},
{grAny, prV}: {grLVV, grBoundary, 9990},
{grLVV, prV}: {grLVV, grNoBoundary, 70},
{grLVV, prT}: {grLVTT, grNoBoundary, 70},
// GB8.
{grAny, prLVT}: {grLVTT, grBoundary, 9990},
{grAny, prT}: {grLVTT, grBoundary, 9990},
{grLVTT, prT}: {grLVTT, grNoBoundary, 80},
// GB9.
{grAny, prExtend}: {grAny, grNoBoundary, 90},
{grAny, prZWJ}: {grAny, grNoBoundary, 90},
// GB9a.
{grAny, prSpacingMark}: {grAny, grNoBoundary, 91},
// GB9b.
{grAny, prPreprend}: {grPrepend, grBoundary, 9990},
{grPrepend, prAny}: {grAny, grNoBoundary, 92},
// GB11.
{grAny, prExtendedPictographic}: {grExtendedPictographic, grBoundary, 9990},
{grExtendedPictographic, prExtend}: {grExtendedPictographic, grNoBoundary, 110},
{grExtendedPictographic, prZWJ}: {grExtendedPictographicZWJ, grNoBoundary, 110},
{grExtendedPictographicZWJ, prExtendedPictographic}: {grExtendedPictographic, grNoBoundary, 110},
// GB12 / GB13.
{grAny, prRegionalIndicator}: {grRIOdd, grBoundary, 9990},
{grRIOdd, prRegionalIndicator}: {grRIEven, grNoBoundary, 120},
{grRIEven, prRegionalIndicator}: {grRIOdd, grBoundary, 120},
}
// Graphemes implements an iterator over Unicode extended grapheme clusters,
// specified in the Unicode Standard Annex #29. Grapheme clusters correspond to
// "user-perceived characters". These characters often consist of multiple
// code points (e.g. the "woman kissing woman" emoji consists of 8 code points:
// woman + ZWJ + heavy black heart (2 code points) + ZWJ + kiss mark + ZWJ +
// woman) and the rules described in Annex #29 must be applied to group those
// code points into clusters perceived by the user as one character.
// Using this class to iterate over a string is convenient but it is much slower
// than using this package's [Step] or [StepString] functions or any of the
// other specialized functions starting with "First".
type Graphemes struct {
// The code points over which this class iterates.
codePoints []rune
// The original string.
original string
// The (byte-based) indices of the code points into the original string plus
// len(original string). Thus, len(indices) = len(codePoints) + 1.
indices []int
// The remaining string to be parsed.
remaining string
// The current grapheme cluster to be returned. These are indices into
// codePoints/indices. If start == end, we either haven't started iterating
// yet (0) or the iteration has already completed (1).
start, end int
// The current grapheme cluster.
cluster string
// The index of the next code point to be parsed.
pos int
// The byte offset of the current grapheme cluster relative to the original
// string.
offset int
// The current state of the code point parser.
// The current boundary information of the Step() parser.
boundaries int
// The current state of the Step() parser.
state int
}
// NewGraphemes returns a new grapheme cluster iterator.
func NewGraphemes(s string) *Graphemes {
l := utf8.RuneCountInString(s)
codePoints := make([]rune, l)
indices := make([]int, l+1)
i := 0
for pos, r := range s {
codePoints[i] = r
indices[i] = pos
i++
return &Graphemes{
original: s,
remaining: s,
state: -1,
}
indices[l] = len(s)
g := &Graphemes{
codePoints: codePoints,
indices: indices,
}
g.Next() // Parse ahead.
return g
}
// Next advances the iterator by one grapheme cluster and returns false if no
// clusters are left. This function must be called before the first cluster is
// accessed.
func (g *Graphemes) Next() bool {
g.start = g.end
// The state transition gives us a boundary instruction BEFORE the next code
// point so we always need to stay ahead by one code point.
// Parse the next code point.
for g.pos <= len(g.codePoints) {
// GB2.
if g.pos == len(g.codePoints) {
g.end = g.pos
g.pos++
break
}
// Determine the property of the next character.
nextProperty := property(g.codePoints[g.pos])
g.pos++
// Find the applicable transition.
var boundary bool
transition, ok := grTransitions[[2]int{g.state, nextProperty}]
if ok {
// We have a specific transition. We'll use it.
g.state = transition[0]
boundary = transition[1] == grBoundary
} else {
// No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := grTransitions[[2]int{g.state, prAny}]
transAnyState, okAnyState := grTransitions[[2]int{grAny, nextProperty}]
if okAnyProp && okAnyState {
// Both apply. We'll use a mix (see comments for grTransitions).
g.state = transAnyState[0]
boundary = transAnyState[1] == grBoundary
if transAnyProp[2] < transAnyState[2] {
g.state = transAnyProp[0]
boundary = transAnyProp[1] == grBoundary
}
} else if okAnyProp {
// We only have a specific state.
g.state = transAnyProp[0]
boundary = transAnyProp[1] == grBoundary
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
} else if okAnyState {
// We only have a specific property.
g.state = transAnyState[0]
boundary = transAnyState[1] == grBoundary
} else {
// No known transition. GB999: Any x Any.
g.state = grAny
boundary = true
}
}
// If we found a cluster boundary, let's stop here. The current cluster will
// be the one that just ended.
if g.pos-1 == 0 /* GB1 */ || boundary {
g.end = g.pos - 1
break
}
if len(g.remaining) == 0 {
// We're already past the end.
g.state = -2
g.cluster = ""
return false
}
return g.start != g.end
g.offset += len(g.cluster)
g.cluster, g.remaining, g.boundaries, g.state = StepString(g.remaining, g.state)
return true
}
// Runes returns a slice of runes (code points) which corresponds to the current
// grapheme cluster. If the iterator is already past the end or Next() has not
// grapheme cluster. If the iterator is already past the end or [Next] has not
// yet been called, nil is returned.
func (g *Graphemes) Runes() []rune {
if g.start == g.end {
if g.state < 0 {
return nil
}
return g.codePoints[g.start:g.end]
return []rune(g.cluster)
}
// Str returns a substring of the original string which corresponds to the
// current grapheme cluster. If the iterator is already past the end or Next()
// current grapheme cluster. If the iterator is already past the end or [Next]
// has not yet been called, an empty string is returned.
func (g *Graphemes) Str() string {
if g.start == g.end {
return ""
}
return string(g.codePoints[g.start:g.end])
return g.cluster
}
// Bytes returns a byte slice which corresponds to the current grapheme cluster.
// If the iterator is already past the end or Next() has not yet been called,
// If the iterator is already past the end or [Next] has not yet been called,
// nil is returned.
func (g *Graphemes) Bytes() []byte {
if g.start == g.end {
if g.state < 0 {
return nil
}
return []byte(string(g.codePoints[g.start:g.end]))
return []byte(g.cluster)
}
// Positions returns the interval of the current grapheme cluster as byte
// positions into the original string. The first returned value "from" indexes
// the first byte and the second returned value "to" indexes the first byte that
// is not included anymore, i.e. str[from:to] is the current grapheme cluster of
// the original string "str". If Next() has not yet been called, both values are
// the original string "str". If [Next] has not yet been called, both values are
// 0. If the iterator is already past the end, both values are 1.
func (g *Graphemes) Positions() (int, int) {
return g.indices[g.start], g.indices[g.end]
if g.state == -1 {
return 0, 0
} else if g.state == -2 {
return 1, 1
}
return g.offset, g.offset + len(g.cluster)
}
// IsWordBoundary returns true if a word ends after the current grapheme
// cluster.
func (g *Graphemes) IsWordBoundary() bool {
if g.state < 0 {
return true
}
return g.boundaries&MaskWord != 0
}
// IsSentenceBoundary returns true if a sentence ends after the current
// grapheme cluster.
func (g *Graphemes) IsSentenceBoundary() bool {
if g.state < 0 {
return true
}
return g.boundaries&MaskSentence != 0
}
// LineBreak returns whether the line can be broken after the current grapheme
// cluster. A value of [LineDontBreak] means the line may not be broken, a value
// of [LineMustBreak] means the line must be broken, and a value of
// [LineCanBreak] means the line may or may not be broken.
func (g *Graphemes) LineBreak() int {
if g.state == -1 {
return LineDontBreak
}
if g.state == -2 {
return LineMustBreak
}
return g.boundaries & MaskLine
}
// Reset puts the iterator into its initial state such that the next call to
// Next() sets it to the first grapheme cluster again.
// [Next] sets it to the first grapheme cluster again.
func (g *Graphemes) Reset() {
g.start, g.end, g.pos, g.state = 0, 0, 0, grAny
g.Next() // Parse ahead again.
g.state = -1
g.offset = 0
g.cluster = ""
g.remaining = g.original
}
// GraphemeClusterCount returns the number of user-perceived characters
// (grapheme clusters) for the given string. To calculate this number, it
// iterates through the string using the Graphemes iterator.
// (grapheme clusters) for the given string.
func GraphemeClusterCount(s string) (n int) {
g := NewGraphemes(s)
for g.Next() {
state := -1
for len(s) > 0 {
_, s, _, state = FirstGraphemeClusterInString(s, state)
n++
}
return
}
// FirstGraphemeCluster returns the first grapheme cluster found in the given
// byte slice according to the rules of Unicode Standard Annex #29, Grapheme
// Cluster Boundaries. This function can be called continuously to extract all
// grapheme clusters from a byte slice, as illustrated in the example below.
//
// If you don't know the current state, for example when calling the function
// for the first time, you must pass -1. For consecutive calls, pass the state
// and rest slice returned by the previous call.
//
// The "rest" slice is the sub-slice of the original byte slice "b" starting
// after the last byte of the identified grapheme cluster. If the length of the
// "rest" slice is 0, the entire byte slice "b" has been processed. The
// "cluster" byte slice is the sub-slice of the input slice containing the
// identified grapheme cluster.
//
// Given an empty byte slice "b", the function returns nil values.
//
// While slightly less convenient than using the Graphemes class, this function
// has much better performance and makes no allocations. It lends itself well to
// large byte slices.
//
// The "reserved" return value is a placeholder for future functionality and may
// be ignored for the time being.
func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, reserved, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRune(b)
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
return b, nil, 0, grAny
}
// If we don't know the state, determine it now.
if state < 0 {
state, _ = transitionGraphemeState(state, r)
}
// Transition until we find a boundary.
var boundary bool
for {
r, l := utf8.DecodeRune(b[length:])
state, boundary = transitionGraphemeState(state, r)
if boundary {
return b[:length], b[length:], 0, state
}
length += l
if len(b) <= length {
return b, nil, 0, grAny
}
}
}
// FirstGraphemeClusterInString is like [FirstGraphemeCluster] but its input and
// outputs are strings.
func FirstGraphemeClusterInString(str string, state int) (cluster, rest string, reserved, newState int) {
// An empty string returns nothing.
if len(str) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRuneInString(str)
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
return str, "", 0, grAny
}
// If we don't know the state, determine it now.
if state < 0 {
state, _ = transitionGraphemeState(state, r)
}
// Transition until we find a boundary.
var boundary bool
for {
r, l := utf8.DecodeRuneInString(str[length:])
state, boundary = transitionGraphemeState(state, r)
if boundary {
return str[:length], str[length:], 0, state
}
length += l
if len(str) <= length {
return str, "", 0, grAny
}
}
}

1891
vendor/github.com/rivo/uniseg/graphemeproperties.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

137
vendor/github.com/rivo/uniseg/graphemerules.go generated vendored Normal file
View File

@@ -0,0 +1,137 @@
package uniseg
// The states of the grapheme cluster parser.
const (
grAny = iota
grCR
grControlLF
grL
grLVV
grLVTT
grPrepend
grExtendedPictographic
grExtendedPictographicZWJ
grRIOdd
grRIEven
)
// The grapheme cluster parser's breaking instructions.
const (
grNoBoundary = iota
grBoundary
)
// The grapheme cluster parser's state transitions. Maps (state, property) to
// (new state, breaking instruction, rule number). The breaking instruction
// always refers to the boundary between the last and next code point.
//
// This map is queried as follows:
//
// 1. Find specific state + specific property. Stop if found.
// 2. Find specific state + any property.
// 3. Find any state + specific property.
// 4. If only (2) or (3) (but not both) was found, stop.
// 5. If both (2) and (3) were found, use state from (3) and breaking instruction
// from the transition with the lower rule number, prefer (3) if rule numbers
// are equal. Stop.
// 6. Assume grAny and grBoundary.
//
// Unicode version 14.0.0.
var grTransitions = map[[2]int][3]int{
// GB5
{grAny, prCR}: {grCR, grBoundary, 50},
{grAny, prLF}: {grControlLF, grBoundary, 50},
{grAny, prControl}: {grControlLF, grBoundary, 50},
// GB4
{grCR, prAny}: {grAny, grBoundary, 40},
{grControlLF, prAny}: {grAny, grBoundary, 40},
// GB3.
{grCR, prLF}: {grAny, grNoBoundary, 30},
// GB6.
{grAny, prL}: {grL, grBoundary, 9990},
{grL, prL}: {grL, grNoBoundary, 60},
{grL, prV}: {grLVV, grNoBoundary, 60},
{grL, prLV}: {grLVV, grNoBoundary, 60},
{grL, prLVT}: {grLVTT, grNoBoundary, 60},
// GB7.
{grAny, prLV}: {grLVV, grBoundary, 9990},
{grAny, prV}: {grLVV, grBoundary, 9990},
{grLVV, prV}: {grLVV, grNoBoundary, 70},
{grLVV, prT}: {grLVTT, grNoBoundary, 70},
// GB8.
{grAny, prLVT}: {grLVTT, grBoundary, 9990},
{grAny, prT}: {grLVTT, grBoundary, 9990},
{grLVTT, prT}: {grLVTT, grNoBoundary, 80},
// GB9.
{grAny, prExtend}: {grAny, grNoBoundary, 90},
{grAny, prZWJ}: {grAny, grNoBoundary, 90},
// GB9a.
{grAny, prSpacingMark}: {grAny, grNoBoundary, 91},
// GB9b.
{grAny, prPrepend}: {grPrepend, grBoundary, 9990},
{grPrepend, prAny}: {grAny, grNoBoundary, 92},
// GB11.
{grAny, prExtendedPictographic}: {grExtendedPictographic, grBoundary, 9990},
{grExtendedPictographic, prExtend}: {grExtendedPictographic, grNoBoundary, 110},
{grExtendedPictographic, prZWJ}: {grExtendedPictographicZWJ, grNoBoundary, 110},
{grExtendedPictographicZWJ, prExtendedPictographic}: {grExtendedPictographic, grNoBoundary, 110},
// GB12 / GB13.
{grAny, prRegionalIndicator}: {grRIOdd, grBoundary, 9990},
{grRIOdd, prRegionalIndicator}: {grRIEven, grNoBoundary, 120},
{grRIEven, prRegionalIndicator}: {grRIOdd, grBoundary, 120},
}
// transitionGraphemeState determines the new state of the grapheme cluster
// parser given the current state and the next code point. It also returns
// whether a cluster boundary was detected.
func transitionGraphemeState(state int, r rune) (newState int, boundary bool) {
// Determine the property of the next character.
nextProperty := property(graphemeCodePoints, r)
// Find the applicable transition.
transition, ok := grTransitions[[2]int{state, nextProperty}]
if ok {
// We have a specific transition. We'll use it.
return transition[0], transition[1] == grBoundary
}
// No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := grTransitions[[2]int{state, prAny}]
transAnyState, okAnyState := grTransitions[[2]int{grAny, nextProperty}]
if okAnyProp && okAnyState {
// Both apply. We'll use a mix (see comments for grTransitions).
newState = transAnyState[0]
boundary = transAnyState[1] == grBoundary
if transAnyProp[2] < transAnyState[2] {
boundary = transAnyProp[1] == grBoundary
}
return
}
if okAnyProp {
// We only have a specific state.
return transAnyProp[0], transAnyProp[1] == grBoundary
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
}
if okAnyState {
// We only have a specific property.
return transAnyState[0], transAnyState[1] == grBoundary
}
// No known transition. GB999: Any ÷ Any.
return grAny, true
}

129
vendor/github.com/rivo/uniseg/line.go generated vendored Normal file
View File

@@ -0,0 +1,129 @@
package uniseg
import "unicode/utf8"
// FirstLineSegment returns the prefix of the given byte slice after which a
// decision to break the string over to the next line can or must be made,
// according to the rules of Unicode Standard Annex #14. This is used to
// implement line breaking.
//
// Line breaking, also known as word wrapping, is the process of breaking a
// section of text into lines such that it will fit in the available width of a
// page, window or other display area.
//
// The returned "segment" may not be broken into smaller parts, unless no other
// breaking opportunities present themselves, in which case you may break by
// grapheme clusters (using the FirstGraphemeCluster() function to determine the
// grapheme clusters).
//
// The "mustBreak" flag indicates whether you MUST break the line after the
// given segment (true), for example after newline characters, or you MAY break
// the line after the given segment (false).
//
// This function can be called continuously to extract all non-breaking sub-sets
// from a byte slice, as illustrated in the example below.
//
// If you don't know the current state, for example when calling the function
// for the first time, you must pass -1. For consecutive calls, pass the state
// and rest slice returned by the previous call.
//
// The "rest" slice is the sub-slice of the original byte slice "b" starting
// after the last byte of the identified line segment. If the length of the
// "rest" slice is 0, the entire byte slice "b" has been processed. The
// "segment" byte slice is the sub-slice of the input slice containing the
// identified line segment.
//
// Given an empty byte slice "b", the function returns nil values.
//
// Note that in accordance with UAX #14 LB3, the final segment will end with
// "mustBreak" set to true. You can choose to ignore this by checking if the
// length of the "rest" slice is 0 and calling [HasTrailingLineBreak] or
// [HasTrailingLineBreakInString] on the last rune.
//
// Note also that this algorithm may break within grapheme clusters. This is
// addressed in Section 8.2 Example 6 of UAX #14. To avoid this, you can use
// the Step() function instead.
func FirstLineSegment(b []byte, state int) (segment, rest []byte, mustBreak bool, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRune(b)
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
return b, nil, true, lbAny // LB3.
}
// If we don't know the state, determine it now.
if state < 0 {
state, _ = transitionLineBreakState(state, r, b[length:], "")
}
// Transition until we find a boundary.
var boundary int
for {
r, l := utf8.DecodeRune(b[length:])
state, boundary = transitionLineBreakState(state, r, b[length+l:], "")
if boundary != LineDontBreak {
return b[:length], b[length:], boundary == LineMustBreak, state
}
length += l
if len(b) <= length {
return b, nil, true, lbAny // LB3
}
}
}
// FirstLineSegmentInString is like FirstLineSegment() but its input and outputs
// are strings.
func FirstLineSegmentInString(str string, state int) (segment, rest string, mustBreak bool, newState int) {
// An empty byte slice returns nothing.
if len(str) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRuneInString(str)
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
return str, "", true, lbAny // LB3.
}
// If we don't know the state, determine it now.
if state < 0 {
state, _ = transitionLineBreakState(state, r, nil, str[length:])
}
// Transition until we find a boundary.
var boundary int
for {
r, l := utf8.DecodeRuneInString(str[length:])
state, boundary = transitionLineBreakState(state, r, nil, str[length+l:])
if boundary != LineDontBreak {
return str[:length], str[length:], boundary == LineMustBreak, state
}
length += l
if len(str) <= length {
return str, "", true, lbAny // LB3.
}
}
}
// HasTrailingLineBreak returns true if the last rune in the given byte slice is
// one of the hard line break code points as defined in LB4 and LB5 of UAX #14.
func HasTrailingLineBreak(b []byte) bool {
r, _ := utf8.DecodeLastRune(b)
property, _ := propertyWithGenCat(lineBreakCodePoints, r)
return property == lbBK || property == lbCR || property == lbLF || property == lbNL
}
// HasTrailingLineBreakInString is like [HasTrailingLineBreak] but for a string.
func HasTrailingLineBreakInString(str string) bool {
r, _ := utf8.DecodeLastRuneInString(str)
property, _ := propertyWithGenCat(lineBreakCodePoints, r)
return property == lbBK || property == lbCR || property == lbLF || property == lbNL
}

3510
vendor/github.com/rivo/uniseg/lineproperties.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

470
vendor/github.com/rivo/uniseg/linerules.go generated vendored Normal file
View File

@@ -0,0 +1,470 @@
package uniseg
import "unicode/utf8"
// The states of the line break parser.
const (
lbAny = iota
lbBK
lbCR
lbLF
lbNL
lbSP
lbZW
lbWJ
lbGL
lbBA
lbHY
lbCL
lbCP
lbEX
lbIS
lbSY
lbOP
lbQU
lbQUSP
lbNS
lbCLCPSP
lbB2
lbB2SP
lbCB
lbBB
lbLB21a
lbHL
lbAL
lbNU
lbPR
lbEB
lbIDEM
lbNUNU
lbNUSY
lbNUIS
lbNUCL
lbNUCP
lbPO
lbJL
lbJV
lbJT
lbH2
lbH3
lbOddRI
lbEvenRI
lbExtPicCn
lbZWJBit = 64
lbCPeaFWHBit = 128
)
// These constants define whether a given text may be broken into the next line.
// If the break is optional (LineCanBreak), you may choose to break or not based
// on your own criteria, for example, if the text has reached the available
// width.
const (
LineDontBreak = iota // You may not break the line here.
LineCanBreak // You may or may not break the line here.
LineMustBreak // You must break the line here.
)
// The line break parser's state transitions. It's anologous to grTransitions,
// see comments there for details. Unicode version 14.0.0.
var lbTransitions = map[[2]int][3]int{
// LB4.
{lbAny, prBK}: {lbBK, LineCanBreak, 310},
{lbBK, prAny}: {lbAny, LineMustBreak, 40},
// LB5.
{lbAny, prCR}: {lbCR, LineCanBreak, 310},
{lbAny, prLF}: {lbLF, LineCanBreak, 310},
{lbAny, prNL}: {lbNL, LineCanBreak, 310},
{lbCR, prLF}: {lbLF, LineDontBreak, 50},
{lbCR, prAny}: {lbAny, LineMustBreak, 50},
{lbLF, prAny}: {lbAny, LineMustBreak, 50},
{lbNL, prAny}: {lbAny, LineMustBreak, 50},
// LB6.
{lbAny, prBK}: {lbBK, LineDontBreak, 60},
{lbAny, prCR}: {lbCR, LineDontBreak, 60},
{lbAny, prLF}: {lbLF, LineDontBreak, 60},
{lbAny, prNL}: {lbNL, LineDontBreak, 60},
// LB7.
{lbAny, prSP}: {lbSP, LineDontBreak, 70},
{lbAny, prZW}: {lbZW, LineDontBreak, 70},
// LB8.
{lbZW, prSP}: {lbZW, LineDontBreak, 70},
{lbZW, prAny}: {lbAny, LineCanBreak, 80},
// LB11.
{lbAny, prWJ}: {lbWJ, LineDontBreak, 110},
{lbWJ, prAny}: {lbAny, LineDontBreak, 110},
// LB12.
{lbAny, prGL}: {lbGL, LineCanBreak, 310},
{lbGL, prAny}: {lbAny, LineDontBreak, 120},
// LB13 (simple transitions).
{lbAny, prCL}: {lbCL, LineCanBreak, 310},
{lbAny, prCP}: {lbCP, LineCanBreak, 310},
{lbAny, prEX}: {lbEX, LineDontBreak, 130},
{lbAny, prIS}: {lbIS, LineCanBreak, 310},
{lbAny, prSY}: {lbSY, LineCanBreak, 310},
// LB14.
{lbAny, prOP}: {lbOP, LineCanBreak, 310},
{lbOP, prSP}: {lbOP, LineDontBreak, 70},
{lbOP, prAny}: {lbAny, LineDontBreak, 140},
// LB15.
{lbQU, prSP}: {lbQUSP, LineDontBreak, 70},
{lbQU, prOP}: {lbOP, LineDontBreak, 150},
{lbQUSP, prOP}: {lbOP, LineDontBreak, 150},
// LB16.
{lbCL, prSP}: {lbCLCPSP, LineDontBreak, 70},
{lbNUCL, prSP}: {lbCLCPSP, LineDontBreak, 70},
{lbCP, prSP}: {lbCLCPSP, LineDontBreak, 70},
{lbNUCP, prSP}: {lbCLCPSP, LineDontBreak, 70},
{lbCL, prNS}: {lbNS, LineDontBreak, 160},
{lbNUCL, prNS}: {lbNS, LineDontBreak, 160},
{lbCP, prNS}: {lbNS, LineDontBreak, 160},
{lbNUCP, prNS}: {lbNS, LineDontBreak, 160},
{lbCLCPSP, prNS}: {lbNS, LineDontBreak, 160},
// LB17.
{lbAny, prB2}: {lbB2, LineCanBreak, 310},
{lbB2, prSP}: {lbB2SP, LineDontBreak, 70},
{lbB2, prB2}: {lbB2, LineDontBreak, 170},
{lbB2SP, prB2}: {lbB2, LineDontBreak, 170},
// LB18.
{lbSP, prAny}: {lbAny, LineCanBreak, 180},
{lbQUSP, prAny}: {lbAny, LineCanBreak, 180},
{lbCLCPSP, prAny}: {lbAny, LineCanBreak, 180},
{lbB2SP, prAny}: {lbAny, LineCanBreak, 180},
// LB19.
{lbAny, prQU}: {lbQU, LineDontBreak, 190},
{lbQU, prAny}: {lbAny, LineDontBreak, 190},
// LB20.
{lbAny, prCB}: {lbCB, LineCanBreak, 200},
{lbCB, prAny}: {lbAny, LineCanBreak, 200},
// LB21.
{lbAny, prBA}: {lbBA, LineDontBreak, 210},
{lbAny, prHY}: {lbHY, LineDontBreak, 210},
{lbAny, prNS}: {lbNS, LineDontBreak, 210},
{lbAny, prBB}: {lbBB, LineCanBreak, 310},
{lbBB, prAny}: {lbAny, LineDontBreak, 210},
// LB21a.
{lbAny, prHL}: {lbHL, LineCanBreak, 310},
{lbHL, prHY}: {lbLB21a, LineDontBreak, 210},
{lbHL, prBA}: {lbLB21a, LineDontBreak, 210},
{lbLB21a, prAny}: {lbAny, LineDontBreak, 211},
// LB21b.
{lbSY, prHL}: {lbHL, LineDontBreak, 212},
{lbNUSY, prHL}: {lbHL, LineDontBreak, 212},
// LB22.
{lbAny, prIN}: {lbAny, LineDontBreak, 220},
// LB23.
{lbAny, prAL}: {lbAL, LineCanBreak, 310},
{lbAny, prNU}: {lbNU, LineCanBreak, 310},
{lbAL, prNU}: {lbNU, LineDontBreak, 230},
{lbHL, prNU}: {lbNU, LineDontBreak, 230},
{lbNU, prAL}: {lbAL, LineDontBreak, 230},
{lbNU, prHL}: {lbHL, LineDontBreak, 230},
{lbNUNU, prAL}: {lbAL, LineDontBreak, 230},
{lbNUNU, prHL}: {lbHL, LineDontBreak, 230},
// LB23a.
{lbAny, prPR}: {lbPR, LineCanBreak, 310},
{lbAny, prID}: {lbIDEM, LineCanBreak, 310},
{lbAny, prEB}: {lbEB, LineCanBreak, 310},
{lbAny, prEM}: {lbIDEM, LineCanBreak, 310},
{lbPR, prID}: {lbIDEM, LineDontBreak, 231},
{lbPR, prEB}: {lbEB, LineDontBreak, 231},
{lbPR, prEM}: {lbIDEM, LineDontBreak, 231},
{lbIDEM, prPO}: {lbPO, LineDontBreak, 231},
{lbEB, prPO}: {lbPO, LineDontBreak, 231},
// LB24.
{lbAny, prPO}: {lbPO, LineCanBreak, 310},
{lbPR, prAL}: {lbAL, LineDontBreak, 240},
{lbPR, prHL}: {lbHL, LineDontBreak, 240},
{lbPO, prAL}: {lbAL, LineDontBreak, 240},
{lbPO, prHL}: {lbHL, LineDontBreak, 240},
{lbAL, prPR}: {lbPR, LineDontBreak, 240},
{lbAL, prPO}: {lbPO, LineDontBreak, 240},
{lbHL, prPR}: {lbPR, LineDontBreak, 240},
{lbHL, prPO}: {lbPO, LineDontBreak, 240},
// LB25 (simple transitions).
{lbPR, prNU}: {lbNU, LineDontBreak, 250},
{lbPO, prNU}: {lbNU, LineDontBreak, 250},
{lbOP, prNU}: {lbNU, LineDontBreak, 250},
{lbHY, prNU}: {lbNU, LineDontBreak, 250},
{lbNU, prNU}: {lbNUNU, LineDontBreak, 250},
{lbNU, prSY}: {lbNUSY, LineDontBreak, 250},
{lbNU, prIS}: {lbNUIS, LineDontBreak, 250},
{lbNUNU, prNU}: {lbNUNU, LineDontBreak, 250},
{lbNUNU, prSY}: {lbNUSY, LineDontBreak, 250},
{lbNUNU, prIS}: {lbNUIS, LineDontBreak, 250},
{lbNUSY, prNU}: {lbNUNU, LineDontBreak, 250},
{lbNUSY, prSY}: {lbNUSY, LineDontBreak, 250},
{lbNUSY, prIS}: {lbNUIS, LineDontBreak, 250},
{lbNUIS, prNU}: {lbNUNU, LineDontBreak, 250},
{lbNUIS, prSY}: {lbNUSY, LineDontBreak, 250},
{lbNUIS, prIS}: {lbNUIS, LineDontBreak, 250},
{lbNU, prCL}: {lbNUCL, LineDontBreak, 250},
{lbNU, prCP}: {lbNUCP, LineDontBreak, 250},
{lbNUNU, prCL}: {lbNUCL, LineDontBreak, 250},
{lbNUNU, prCP}: {lbNUCP, LineDontBreak, 250},
{lbNUSY, prCL}: {lbNUCL, LineDontBreak, 250},
{lbNUSY, prCP}: {lbNUCP, LineDontBreak, 250},
{lbNUIS, prCL}: {lbNUCL, LineDontBreak, 250},
{lbNUIS, prCP}: {lbNUCP, LineDontBreak, 250},
{lbNU, prPO}: {lbPO, LineDontBreak, 250},
{lbNUNU, prPO}: {lbPO, LineDontBreak, 250},
{lbNUSY, prPO}: {lbPO, LineDontBreak, 250},
{lbNUIS, prPO}: {lbPO, LineDontBreak, 250},
{lbNUCL, prPO}: {lbPO, LineDontBreak, 250},
{lbNUCP, prPO}: {lbPO, LineDontBreak, 250},
{lbNU, prPR}: {lbPR, LineDontBreak, 250},
{lbNUNU, prPR}: {lbPR, LineDontBreak, 250},
{lbNUSY, prPR}: {lbPR, LineDontBreak, 250},
{lbNUIS, prPR}: {lbPR, LineDontBreak, 250},
{lbNUCL, prPR}: {lbPR, LineDontBreak, 250},
{lbNUCP, prPR}: {lbPR, LineDontBreak, 250},
// LB26.
{lbAny, prJL}: {lbJL, LineCanBreak, 310},
{lbAny, prJV}: {lbJV, LineCanBreak, 310},
{lbAny, prJT}: {lbJT, LineCanBreak, 310},
{lbAny, prH2}: {lbH2, LineCanBreak, 310},
{lbAny, prH3}: {lbH3, LineCanBreak, 310},
{lbJL, prJL}: {lbJL, LineDontBreak, 260},
{lbJL, prJV}: {lbJV, LineDontBreak, 260},
{lbJL, prH2}: {lbH2, LineDontBreak, 260},
{lbJL, prH3}: {lbH3, LineDontBreak, 260},
{lbJV, prJV}: {lbJV, LineDontBreak, 260},
{lbJV, prJT}: {lbJT, LineDontBreak, 260},
{lbH2, prJV}: {lbJV, LineDontBreak, 260},
{lbH2, prJT}: {lbJT, LineDontBreak, 260},
{lbJT, prJT}: {lbJT, LineDontBreak, 260},
{lbH3, prJT}: {lbJT, LineDontBreak, 260},
// LB27.
{lbJL, prPO}: {lbPO, LineDontBreak, 270},
{lbJV, prPO}: {lbPO, LineDontBreak, 270},
{lbJT, prPO}: {lbPO, LineDontBreak, 270},
{lbH2, prPO}: {lbPO, LineDontBreak, 270},
{lbH3, prPO}: {lbPO, LineDontBreak, 270},
{lbPR, prJL}: {lbJL, LineDontBreak, 270},
{lbPR, prJV}: {lbJV, LineDontBreak, 270},
{lbPR, prJT}: {lbJT, LineDontBreak, 270},
{lbPR, prH2}: {lbH2, LineDontBreak, 270},
{lbPR, prH3}: {lbH3, LineDontBreak, 270},
// LB28.
{lbAL, prAL}: {lbAL, LineDontBreak, 280},
{lbAL, prHL}: {lbHL, LineDontBreak, 280},
{lbHL, prAL}: {lbAL, LineDontBreak, 280},
{lbHL, prHL}: {lbHL, LineDontBreak, 280},
// LB29.
{lbIS, prAL}: {lbAL, LineDontBreak, 290},
{lbIS, prHL}: {lbHL, LineDontBreak, 290},
{lbNUIS, prAL}: {lbAL, LineDontBreak, 290},
{lbNUIS, prHL}: {lbHL, LineDontBreak, 290},
}
// transitionLineBreakState determines the new state of the line break parser
// given the current state and the next code point. It also returns the type of
// line break: LineDontBreak, LineCanBreak, or LineMustBreak. If more than one
// code point is needed to determine the new state, the byte slice or the string
// starting after rune "r" can be used (whichever is not nil or empty) for
// further lookups.
func transitionLineBreakState(state int, r rune, b []byte, str string) (newState int, lineBreak int) {
// Determine the property of the next character.
nextProperty, generalCategory := propertyWithGenCat(lineBreakCodePoints, r)
// Prepare.
var forceNoBreak, isCPeaFWH bool
if state >= 0 && state&lbCPeaFWHBit != 0 {
isCPeaFWH = true // LB30: CP but ea is not F, W, or H.
state = state &^ lbCPeaFWHBit
}
if state >= 0 && state&lbZWJBit != 0 {
state = state &^ lbZWJBit // Extract zero-width joiner bit.
forceNoBreak = true // LB8a.
}
defer func() {
// Transition into LB30.
if newState == lbCP || newState == lbNUCP {
ea := property(eastAsianWidth, r)
if ea != prF && ea != prW && ea != prH {
newState |= lbCPeaFWHBit
}
}
// Override break.
if forceNoBreak {
lineBreak = LineDontBreak
}
}()
// LB1.
if nextProperty == prAI || nextProperty == prSG || nextProperty == prXX {
nextProperty = prAL
} else if nextProperty == prSA {
if generalCategory == gcMn || generalCategory == gcMc {
nextProperty = prCM
} else {
nextProperty = prAL
}
} else if nextProperty == prCJ {
nextProperty = prNS
}
// Combining marks.
if nextProperty == prZWJ || nextProperty == prCM {
var bit int
if nextProperty == prZWJ {
bit = lbZWJBit
}
mustBreakState := state < 0 || state == lbBK || state == lbCR || state == lbLF || state == lbNL
if !mustBreakState && state != lbSP && state != lbZW && state != lbQUSP && state != lbCLCPSP && state != lbB2SP {
// LB9.
return state | bit, LineDontBreak
} else {
// LB10.
if mustBreakState {
return lbAL | bit, LineMustBreak
}
return lbAL | bit, LineCanBreak
}
}
// Find the applicable transition in the table.
var rule int
transition, ok := lbTransitions[[2]int{state, nextProperty}]
if ok {
// We have a specific transition. We'll use it.
newState, lineBreak, rule = transition[0], transition[1], transition[2]
} else {
// No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := lbTransitions[[2]int{state, prAny}]
transAnyState, okAnyState := lbTransitions[[2]int{lbAny, nextProperty}]
if okAnyProp && okAnyState {
// Both apply. We'll use a mix (see comments for grTransitions).
newState, lineBreak, rule = transAnyState[0], transAnyState[1], transAnyState[2]
if transAnyProp[2] < transAnyState[2] {
lineBreak, rule = transAnyProp[1], transAnyProp[2]
}
} else if okAnyProp {
// We only have a specific state.
newState, lineBreak, rule = transAnyProp[0], transAnyProp[1], transAnyProp[2]
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
} else if okAnyState {
// We only have a specific property.
newState, lineBreak, rule = transAnyState[0], transAnyState[1], transAnyState[2]
} else {
// No known transition. LB31: ALL ÷ ALL.
newState, lineBreak, rule = lbAny, LineCanBreak, 310
}
}
// LB12a.
if rule > 121 &&
nextProperty == prGL &&
(state != lbSP && state != lbBA && state != lbHY && state != lbLB21a && state != lbQUSP && state != lbCLCPSP && state != lbB2SP) {
return lbGL, LineDontBreak
}
// LB13.
if rule > 130 && state != lbNU && state != lbNUNU {
switch nextProperty {
case prCL:
return lbCL, LineDontBreak
case prCP:
return lbCP, LineDontBreak
case prIS:
return lbIS, LineDontBreak
case prSY:
return lbSY, LineDontBreak
}
}
// LB25 (look ahead).
if rule > 250 &&
(state == lbPR || state == lbPO) &&
nextProperty == prOP || nextProperty == prHY {
var r rune
if b != nil { // Byte slice version.
r, _ = utf8.DecodeRune(b)
} else { // String version.
r, _ = utf8.DecodeRuneInString(str)
}
if r != utf8.RuneError {
pr, _ := propertyWithGenCat(lineBreakCodePoints, r)
if pr == prNU {
return lbNU, LineDontBreak
}
}
}
// LB30 (part one).
if rule > 300 {
if (state == lbAL || state == lbHL || state == lbNU || state == lbNUNU) && nextProperty == prOP {
ea := property(eastAsianWidth, r)
if ea != prF && ea != prW && ea != prH {
return lbOP, LineDontBreak
}
} else if isCPeaFWH {
switch nextProperty {
case prAL:
return lbAL, LineDontBreak
case prHL:
return lbHL, LineDontBreak
case prNU:
return lbNU, LineDontBreak
}
}
}
// LB30a.
if newState == lbAny && nextProperty == prRI {
if state != lbOddRI && state != lbEvenRI { // Includes state == -1.
// Transition into the first RI.
return lbOddRI, lineBreak
}
if state == lbOddRI {
// Don't break pairs of Regional Indicators.
return lbEvenRI, LineDontBreak
}
return lbOddRI, lineBreak
}
// LB30b.
if rule > 302 {
if nextProperty == prEM {
if state == lbEB || state == lbExtPicCn {
return prAny, LineDontBreak
}
}
graphemeProperty := property(graphemeCodePoints, r)
if graphemeProperty == prExtendedPictographic && generalCategory == gcCn {
return lbExtPicCn, LineCanBreak
}
}
return
}

File diff suppressed because it is too large Load Diff

88
vendor/github.com/rivo/uniseg/sentence.go generated vendored Normal file
View File

@@ -0,0 +1,88 @@
package uniseg
import "unicode/utf8"
// FirstSentence returns the first sentence found in the given byte slice
// according to the rules of Unicode Standard Annex #29, Sentence Boundaries.
// This function can be called continuously to extract all sentences from a byte
// slice, as illustrated in the example below.
//
// If you don't know the current state, for example when calling the function
// for the first time, you must pass -1. For consecutive calls, pass the state
// and rest slice returned by the previous call.
//
// The "rest" slice is the sub-slice of the original byte slice "b" starting
// after the last byte of the identified sentence. If the length of the "rest"
// slice is 0, the entire byte slice "b" has been processed. The "sentence" byte
// slice is the sub-slice of the input slice containing the identified sentence.
//
// Given an empty byte slice "b", the function returns nil values.
func FirstSentence(b []byte, state int) (sentence, rest []byte, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRune(b)
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
return b, nil, sbAny
}
// If we don't know the state, determine it now.
if state < 0 {
state, _ = transitionSentenceBreakState(state, r, b[length:], "")
}
// Transition until we find a boundary.
var boundary bool
for {
r, l := utf8.DecodeRune(b[length:])
state, boundary = transitionSentenceBreakState(state, r, b[length+l:], "")
if boundary {
return b[:length], b[length:], state
}
length += l
if len(b) <= length {
return b, nil, sbAny
}
}
}
// FirstSentenceInString is like [FirstSentence] but its input and outputs are
// strings.
func FirstSentenceInString(str string, state int) (sentence, rest string, newState int) {
// An empty byte slice returns nothing.
if len(str) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRuneInString(str)
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
return str, "", sbAny
}
// If we don't know the state, determine it now.
if state < 0 {
state, _ = transitionSentenceBreakState(state, r, nil, str[length:])
}
// Transition until we find a boundary.
var boundary bool
for {
r, l := utf8.DecodeRuneInString(str[length:])
state, boundary = transitionSentenceBreakState(state, r, nil, str[length+l:])
if boundary {
return str[:length], str[length:], state
}
length += l
if len(str) <= length {
return str, "", sbAny
}
}
}

2812
vendor/github.com/rivo/uniseg/sentenceproperties.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

205
vendor/github.com/rivo/uniseg/sentencerules.go generated vendored Normal file
View File

@@ -0,0 +1,205 @@
package uniseg
import "unicode/utf8"
// The states of the sentence break parser.
const (
sbAny = iota
sbCR
sbParaSep
sbATerm
sbUpper
sbLower
sbSB7
sbSB8Close
sbSB8Sp
sbSTerm
sbSB8aClose
sbSB8aSp
)
// The sentence break parser's breaking instructions.
const (
sbDontBreak = iota
sbBreak
)
// The sentence break parser's state transitions. It's anologous to
// grTransitions, see comments there for details. Unicode version 14.0.0.
var sbTransitions = map[[2]int][3]int{
// SB3.
{sbAny, prCR}: {sbCR, sbDontBreak, 9990},
{sbCR, prLF}: {sbParaSep, sbDontBreak, 30},
// SB4.
{sbAny, prSep}: {sbParaSep, sbDontBreak, 9990},
{sbAny, prLF}: {sbParaSep, sbDontBreak, 9990},
{sbParaSep, prAny}: {sbAny, sbBreak, 40},
{sbCR, prAny}: {sbAny, sbBreak, 40},
// SB6.
{sbAny, prATerm}: {sbATerm, sbDontBreak, 9990},
{sbATerm, prNumeric}: {sbAny, sbDontBreak, 60},
{sbSB7, prNumeric}: {sbAny, sbDontBreak, 60}, // Because ATerm also appears in SB7.
// SB7.
{sbAny, prUpper}: {sbUpper, sbDontBreak, 9990},
{sbAny, prLower}: {sbLower, sbDontBreak, 9990},
{sbUpper, prATerm}: {sbSB7, sbDontBreak, 70},
{sbLower, prATerm}: {sbSB7, sbDontBreak, 70},
{sbSB7, prUpper}: {sbUpper, sbDontBreak, 70},
// SB8a.
{sbAny, prSTerm}: {sbSTerm, sbDontBreak, 9990},
{sbATerm, prSContinue}: {sbAny, sbDontBreak, 81},
{sbATerm, prATerm}: {sbATerm, sbDontBreak, 81},
{sbATerm, prSTerm}: {sbSTerm, sbDontBreak, 81},
{sbSB7, prSContinue}: {sbAny, sbDontBreak, 81},
{sbSB7, prATerm}: {sbATerm, sbDontBreak, 81},
{sbSB7, prSTerm}: {sbSTerm, sbDontBreak, 81},
{sbSB8Close, prSContinue}: {sbAny, sbDontBreak, 81},
{sbSB8Close, prATerm}: {sbATerm, sbDontBreak, 81},
{sbSB8Close, prSTerm}: {sbSTerm, sbDontBreak, 81},
{sbSB8Sp, prSContinue}: {sbAny, sbDontBreak, 81},
{sbSB8Sp, prATerm}: {sbATerm, sbDontBreak, 81},
{sbSB8Sp, prSTerm}: {sbSTerm, sbDontBreak, 81},
{sbSTerm, prSContinue}: {sbAny, sbDontBreak, 81},
{sbSTerm, prATerm}: {sbATerm, sbDontBreak, 81},
{sbSTerm, prSTerm}: {sbSTerm, sbDontBreak, 81},
{sbSB8aClose, prSContinue}: {sbAny, sbDontBreak, 81},
{sbSB8aClose, prATerm}: {sbATerm, sbDontBreak, 81},
{sbSB8aClose, prSTerm}: {sbSTerm, sbDontBreak, 81},
{sbSB8aSp, prSContinue}: {sbAny, sbDontBreak, 81},
{sbSB8aSp, prATerm}: {sbATerm, sbDontBreak, 81},
{sbSB8aSp, prSTerm}: {sbSTerm, sbDontBreak, 81},
// SB9.
{sbATerm, prClose}: {sbSB8Close, sbDontBreak, 90},
{sbSB7, prClose}: {sbSB8Close, sbDontBreak, 90},
{sbSB8Close, prClose}: {sbSB8Close, sbDontBreak, 90},
{sbATerm, prSp}: {sbSB8Sp, sbDontBreak, 90},
{sbSB7, prSp}: {sbSB8Sp, sbDontBreak, 90},
{sbSB8Close, prSp}: {sbSB8Sp, sbDontBreak, 90},
{sbSTerm, prClose}: {sbSB8aClose, sbDontBreak, 90},
{sbSB8aClose, prClose}: {sbSB8aClose, sbDontBreak, 90},
{sbSTerm, prSp}: {sbSB8aSp, sbDontBreak, 90},
{sbSB8aClose, prSp}: {sbSB8aSp, sbDontBreak, 90},
{sbATerm, prSep}: {sbParaSep, sbDontBreak, 90},
{sbATerm, prCR}: {sbParaSep, sbDontBreak, 90},
{sbATerm, prLF}: {sbParaSep, sbDontBreak, 90},
{sbSB7, prSep}: {sbParaSep, sbDontBreak, 90},
{sbSB7, prCR}: {sbParaSep, sbDontBreak, 90},
{sbSB7, prLF}: {sbParaSep, sbDontBreak, 90},
{sbSB8Close, prSep}: {sbParaSep, sbDontBreak, 90},
{sbSB8Close, prCR}: {sbParaSep, sbDontBreak, 90},
{sbSB8Close, prLF}: {sbParaSep, sbDontBreak, 90},
{sbSTerm, prSep}: {sbParaSep, sbDontBreak, 90},
{sbSTerm, prCR}: {sbParaSep, sbDontBreak, 90},
{sbSTerm, prLF}: {sbParaSep, sbDontBreak, 90},
{sbSB8aClose, prSep}: {sbParaSep, sbDontBreak, 90},
{sbSB8aClose, prCR}: {sbParaSep, sbDontBreak, 90},
{sbSB8aClose, prLF}: {sbParaSep, sbDontBreak, 90},
// SB10.
{sbSB8Sp, prSp}: {sbSB8Sp, sbDontBreak, 100},
{sbSB8aSp, prSp}: {sbSB8aSp, sbDontBreak, 100},
{sbSB8Sp, prSep}: {sbParaSep, sbDontBreak, 100},
{sbSB8Sp, prCR}: {sbParaSep, sbDontBreak, 100},
{sbSB8Sp, prLF}: {sbParaSep, sbDontBreak, 100},
// SB11.
{sbATerm, prAny}: {sbAny, sbBreak, 110},
{sbSB7, prAny}: {sbAny, sbBreak, 110},
{sbSB8Close, prAny}: {sbAny, sbBreak, 110},
{sbSB8Sp, prAny}: {sbAny, sbBreak, 110},
{sbSTerm, prAny}: {sbAny, sbBreak, 110},
{sbSB8aClose, prAny}: {sbAny, sbBreak, 110},
{sbSB8aSp, prAny}: {sbAny, sbBreak, 110},
// We'll always break after ParaSep due to SB4.
}
// transitionSentenceBreakState determines the new state of the sentence break
// parser given the current state and the next code point. It also returns
// whether a sentence boundary was detected. If more than one code point is
// needed to determine the new state, the byte slice or the string starting
// after rune "r" can be used (whichever is not nil or empty) for further
// lookups.
func transitionSentenceBreakState(state int, r rune, b []byte, str string) (newState int, sentenceBreak bool) {
// Determine the property of the next character.
nextProperty := property(sentenceBreakCodePoints, r)
// SB5 (Replacing Ignore Rules).
if nextProperty == prExtend || nextProperty == prFormat {
if state == sbParaSep || state == sbCR {
return sbAny, true // Make sure we don't apply SB5 to SB3 or SB4.
}
if state < 0 {
return sbAny, true // SB1.
}
return state, false
}
// Find the applicable transition in the table.
var rule int
transition, ok := sbTransitions[[2]int{state, nextProperty}]
if ok {
// We have a specific transition. We'll use it.
newState, sentenceBreak, rule = transition[0], transition[1] == sbBreak, transition[2]
} else {
// No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := sbTransitions[[2]int{state, prAny}]
transAnyState, okAnyState := sbTransitions[[2]int{sbAny, nextProperty}]
if okAnyProp && okAnyState {
// Both apply. We'll use a mix (see comments for grTransitions).
newState, sentenceBreak, rule = transAnyState[0], transAnyState[1] == sbBreak, transAnyState[2]
if transAnyProp[2] < transAnyState[2] {
sentenceBreak, rule = transAnyProp[1] == sbBreak, transAnyProp[2]
}
} else if okAnyProp {
// We only have a specific state.
newState, sentenceBreak, rule = transAnyProp[0], transAnyProp[1] == sbBreak, transAnyProp[2]
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
} else if okAnyState {
// We only have a specific property.
newState, sentenceBreak, rule = transAnyState[0], transAnyState[1] == sbBreak, transAnyState[2]
} else {
// No known transition. SB999: Any × Any.
newState, sentenceBreak, rule = sbAny, false, 9990
}
}
// SB8.
if rule > 80 && (state == sbATerm || state == sbSB8Close || state == sbSB8Sp || state == sbSB7) {
// Check the right side of the rule.
var length int
for nextProperty != prOLetter &&
nextProperty != prUpper &&
nextProperty != prLower &&
nextProperty != prSep &&
nextProperty != prCR &&
nextProperty != prLF &&
nextProperty != prATerm &&
nextProperty != prSTerm {
// Move on to the next rune.
if b != nil { // Byte slice version.
r, length = utf8.DecodeRune(b)
b = b[length:]
} else { // String version.
r, length = utf8.DecodeRuneInString(str)
str = str[length:]
}
if r == utf8.RuneError {
break
}
nextProperty = property(sentenceBreakCodePoints, r)
}
if nextProperty == prLower {
return sbLower, false
}
}
return
}

198
vendor/github.com/rivo/uniseg/step.go generated vendored Normal file
View File

@@ -0,0 +1,198 @@
package uniseg
import "unicode/utf8"
// The bit masks used to extract boundary information returned by the Step()
// function.
const (
MaskLine = 3
MaskWord = 4
MaskSentence = 8
)
// The bit positions by which boundary flags are shifted by the Step() function.
// This must correspond to the Mask constants.
const (
shiftWord = 2
shiftSentence = 3
)
// The bit positions by which states are shifted by the Step() function. These
// values must ensure state values defined for each of the boundary algorithms
// don't overlap (and that they all still fit in a single int).
const (
shiftWordState = 4
shiftSentenceState = 9
shiftLineState = 13
)
// The bit mask used to extract the state returned by the Step() function, after
// shifting. These values must correspond to the shift constants.
const (
maskGraphemeState = 0xf
maskWordState = 0x1f
maskSentenceState = 0xf
maskLineState = 0xff
)
// Step returns the first grapheme cluster (user-perceived character) found in
// the given byte slice. It also returns information about the boundary between
// that grapheme cluster and the one following it. There are three types of
// boundary information: word boundaries, sentence boundaries, and line breaks.
// This function is therefore a combination of FirstGraphemeCluster(),
// FirstWord(), FirstSentence(), and FirstLineSegment().
//
// The "boundaries" return value can be evaluated as follows:
//
// - boundaries&MaskWord != 0: The boundary is a word boundary.
// - boundaries&MaskWord == 0: The boundary is not a word boundary.
// - boundaries&MaskSentence != 0: The boundary is a sentence boundary.
// - boundaries&MaskSentence == 0: The boundary is not a sentence boundary.
// - boundaries&MaskLine == LineDontBreak: You must not break the line at the
// boundary.
// - boundaries&MaskLine == LineMustBreak: You must break the line at the
// boundary.
// - boundaries&MaskLine == LineCanBreak: You may or may not break the line at
// the boundary.
//
// This function can be called continuously to extract all grapheme clusters
// from a byte slice, as illustrated in the examples below.
//
// If you don't know which state to pass, for example when calling the function
// for the first time, you must pass -1. For consecutive calls, pass the state
// and rest slice returned by the previous call.
//
// The "rest" slice is the sub-slice of the original byte slice "b" starting
// after the last byte of the identified grapheme cluster. If the length of the
// "rest" slice is 0, the entire byte slice "b" has been processed. The
// "cluster" byte slice is the sub-slice of the input slice containing the
// first identified grapheme cluster.
//
// Given an empty byte slice "b", the function returns nil values.
//
// While slightly less convenient than using the Graphemes class, this function
// has much better performance and makes no allocations. It lends itself well to
// large byte slices.
//
// Note that in accordance with UAX #14 LB3, the final segment will end with
// a mandatory line break (boundaries&MaskLine == LineMustBreak). You can choose
// to ignore this by checking if the length of the "rest" slice is 0 and calling
// [HasTrailingLineBreak] or [HasTrailingLineBreakInString] on the last rune.
func Step(b []byte, state int) (cluster, rest []byte, boundaries int, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRune(b)
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
return b, nil, LineMustBreak | (1 << shiftWord) | (1 << shiftSentence), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState)
}
// If we don't know the state, determine it now.
var graphemeState, wordState, sentenceState, lineState int
remainder := b[length:]
if state < 0 {
graphemeState, _ = transitionGraphemeState(state, r)
wordState, _ = transitionWordBreakState(state, r, remainder, "")
sentenceState, _ = transitionSentenceBreakState(state, r, remainder, "")
lineState, _ = transitionLineBreakState(state, r, remainder, "")
} else {
graphemeState = state & maskGraphemeState
wordState = (state >> shiftWordState) & maskWordState
sentenceState = (state >> shiftSentenceState) & maskSentenceState
lineState = (state >> shiftLineState) & maskLineState
}
// Transition until we find a grapheme cluster boundary.
var (
graphemeBoundary, wordBoundary, sentenceBoundary bool
lineBreak int
)
for {
r, l := utf8.DecodeRune(remainder)
remainder = b[length+l:]
graphemeState, graphemeBoundary = transitionGraphemeState(graphemeState, r)
wordState, wordBoundary = transitionWordBreakState(wordState, r, remainder, "")
sentenceState, sentenceBoundary = transitionSentenceBreakState(sentenceState, r, remainder, "")
lineState, lineBreak = transitionLineBreakState(lineState, r, remainder, "")
if graphemeBoundary {
boundary := lineBreak
if wordBoundary {
boundary |= 1 << shiftWord
}
if sentenceBoundary {
boundary |= 1 << shiftSentence
}
return b[:length], b[length:], boundary, graphemeState | (wordState << shiftWordState) | (sentenceState << shiftSentenceState) | (lineState << shiftLineState)
}
length += l
if len(b) <= length {
return b, nil, LineMustBreak | (1 << shiftWord) | (1 << shiftSentence), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState)
}
}
}
// StepString is like [Step] but its input and outputs are strings.
func StepString(str string, state int) (cluster, rest string, boundaries int, newState int) {
// An empty byte slice returns nothing.
if len(str) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRuneInString(str)
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState)
}
// If we don't know the state, determine it now.
var graphemeState, wordState, sentenceState, lineState int
remainder := str[length:]
if state < 0 {
graphemeState, _ = transitionGraphemeState(state, r)
wordState, _ = transitionWordBreakState(state, r, nil, remainder)
sentenceState, _ = transitionSentenceBreakState(state, r, nil, remainder)
lineState, _ = transitionLineBreakState(state, r, nil, remainder)
} else {
graphemeState = state & maskGraphemeState
wordState = (state >> shiftWordState) & maskWordState
sentenceState = (state >> shiftSentenceState) & maskSentenceState
lineState = (state >> shiftLineState) & maskLineState
}
// Transition until we find a grapheme cluster boundary.
var (
graphemeBoundary, wordBoundary, sentenceBoundary bool
lineBreak int
)
for {
r, l := utf8.DecodeRuneInString(remainder)
remainder = str[length+l:]
graphemeState, graphemeBoundary = transitionGraphemeState(graphemeState, r)
wordState, wordBoundary = transitionWordBreakState(wordState, r, nil, remainder)
sentenceState, sentenceBoundary = transitionSentenceBreakState(sentenceState, r, nil, remainder)
lineState, lineBreak = transitionLineBreakState(lineState, r, nil, remainder)
if graphemeBoundary {
boundary := lineBreak
if wordBoundary {
boundary |= 1 << shiftWord
}
if sentenceBoundary {
boundary |= 1 << shiftSentence
}
return str[:length], str[length:], boundary, graphemeState | (wordState << shiftWordState) | (sentenceState << shiftSentenceState) | (lineState << shiftLineState)
}
length += l
if len(str) <= length {
return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState)
}
}
}

87
vendor/github.com/rivo/uniseg/word.go generated vendored Normal file
View File

@@ -0,0 +1,87 @@
package uniseg
import "unicode/utf8"
// FirstWord returns the first word found in the given byte slice according to
// the rules of Unicode Standard Annex #29, Word Boundaries. This function can
// be called continuously to extract all words from a byte slice, as illustrated
// in the example below.
//
// If you don't know the current state, for example when calling the function
// for the first time, you must pass -1. For consecutive calls, pass the state
// and rest slice returned by the previous call.
//
// The "rest" slice is the sub-slice of the original byte slice "b" starting
// after the last byte of the identified word. If the length of the "rest" slice
// is 0, the entire byte slice "b" has been processed. The "word" byte slice is
// the sub-slice of the input slice containing the identified word.
//
// Given an empty byte slice "b", the function returns nil values.
func FirstWord(b []byte, state int) (word, rest []byte, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRune(b)
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
return b, nil, wbAny
}
// If we don't know the state, determine it now.
if state < 0 {
state, _ = transitionWordBreakState(state, r, b[length:], "")
}
// Transition until we find a boundary.
var boundary bool
for {
r, l := utf8.DecodeRune(b[length:])
state, boundary = transitionWordBreakState(state, r, b[length+l:], "")
if boundary {
return b[:length], b[length:], state
}
length += l
if len(b) <= length {
return b, nil, wbAny
}
}
}
// FirstWordInString is like [FirstWord] but its input and outputs are strings.
func FirstWordInString(str string, state int) (word, rest string, newState int) {
// An empty byte slice returns nothing.
if len(str) == 0 {
return
}
// Extract the first rune.
r, length := utf8.DecodeRuneInString(str)
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
return str, "", wbAny
}
// If we don't know the state, determine it now.
if state < 0 {
state, _ = transitionWordBreakState(state, r, nil, str[length:])
}
// Transition until we find a boundary.
var boundary bool
for {
r, l := utf8.DecodeRuneInString(str[length:])
state, boundary = transitionWordBreakState(state, r, nil, str[length+l:])
if boundary {
return str[:length], str[length:], state
}
length += l
if len(str) <= length {
return str, "", wbAny
}
}
}

1848
vendor/github.com/rivo/uniseg/wordproperties.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

246
vendor/github.com/rivo/uniseg/wordrules.go generated vendored Normal file
View File

@@ -0,0 +1,246 @@
package uniseg
import "unicode/utf8"
// The states of the word break parser.
const (
wbAny = iota
wbCR
wbLF
wbNewline
wbWSegSpace
wbHebrewLetter
wbALetter
wbWB7
wbWB7c
wbNumeric
wbWB11
wbKatakana
wbExtendNumLet
wbOddRI
wbEvenRI
wbZWJBit = 16 // This bit is set for any states followed by at least one zero-width joiner (see WB4 and WB3c).
)
// The word break parser's breaking instructions.
const (
wbDontBreak = iota
wbBreak
)
// The word break parser's state transitions. It's anologous to grTransitions,
// see comments there for details. Unicode version 14.0.0.
var wbTransitions = map[[2]int][3]int{
// WB3b.
{wbAny, prNewline}: {wbNewline, wbBreak, 32},
{wbAny, prCR}: {wbCR, wbBreak, 32},
{wbAny, prLF}: {wbLF, wbBreak, 32},
// WB3a.
{wbNewline, prAny}: {wbAny, wbBreak, 31},
{wbCR, prAny}: {wbAny, wbBreak, 31},
{wbLF, prAny}: {wbAny, wbBreak, 31},
// WB3.
{wbCR, prLF}: {wbLF, wbDontBreak, 30},
// WB3d.
{wbAny, prWSegSpace}: {wbWSegSpace, wbBreak, 9990},
{wbWSegSpace, prWSegSpace}: {wbWSegSpace, wbDontBreak, 34},
// WB5.
{wbAny, prALetter}: {wbALetter, wbBreak, 9990},
{wbAny, prHebrewLetter}: {wbHebrewLetter, wbBreak, 9990},
{wbALetter, prALetter}: {wbALetter, wbDontBreak, 50},
{wbALetter, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 50},
{wbHebrewLetter, prALetter}: {wbALetter, wbDontBreak, 50},
{wbHebrewLetter, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 50},
// WB7. Transitions to wbWB7 handled by transitionWordBreakState().
{wbWB7, prALetter}: {wbALetter, wbDontBreak, 70},
{wbWB7, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 70},
// WB7a.
{wbHebrewLetter, prSingleQuote}: {wbAny, wbDontBreak, 71},
// WB7c. Transitions to wbWB7c handled by transitionWordBreakState().
{wbWB7c, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 73},
// WB8.
{wbAny, prNumeric}: {wbNumeric, wbBreak, 9990},
{wbNumeric, prNumeric}: {wbNumeric, wbDontBreak, 80},
// WB9.
{wbALetter, prNumeric}: {wbNumeric, wbDontBreak, 90},
{wbHebrewLetter, prNumeric}: {wbNumeric, wbDontBreak, 90},
// WB10.
{wbNumeric, prALetter}: {wbALetter, wbDontBreak, 100},
{wbNumeric, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 100},
// WB11. Transitions to wbWB11 handled by transitionWordBreakState().
{wbWB11, prNumeric}: {wbNumeric, wbDontBreak, 110},
// WB13.
{wbAny, prKatakana}: {wbKatakana, wbBreak, 9990},
{wbKatakana, prKatakana}: {wbKatakana, wbDontBreak, 130},
// WB13a.
{wbAny, prExtendNumLet}: {wbExtendNumLet, wbBreak, 9990},
{wbALetter, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
{wbHebrewLetter, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
{wbNumeric, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
{wbKatakana, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
{wbExtendNumLet, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
// WB13b.
{wbExtendNumLet, prALetter}: {wbALetter, wbDontBreak, 132},
{wbExtendNumLet, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 132},
{wbExtendNumLet, prNumeric}: {wbNumeric, wbDontBreak, 132},
{wbExtendNumLet, prKatakana}: {prKatakana, wbDontBreak, 132},
}
// transitionWordBreakState determines the new state of the word break parser
// given the current state and the next code point. It also returns whether a
// word boundary was detected. If more than one code point is needed to
// determine the new state, the byte slice or the string starting after rune "r"
// can be used (whichever is not nil or empty) for further lookups.
func transitionWordBreakState(state int, r rune, b []byte, str string) (newState int, wordBreak bool) {
// Determine the property of the next character.
nextProperty := property(workBreakCodePoints, r)
// "Replacing Ignore Rules".
if nextProperty == prZWJ {
// WB4 (for zero-width joiners).
if state == wbNewline || state == wbCR || state == wbLF {
return wbAny | wbZWJBit, true // Make sure we don't apply WB4 to WB3a.
}
if state < 0 {
return wbAny | wbZWJBit, false
}
return state | wbZWJBit, false
} else if nextProperty == prExtend || nextProperty == prFormat {
// WB4 (for Extend and Format).
if state == wbNewline || state == wbCR || state == wbLF {
return wbAny, true // Make sure we don't apply WB4 to WB3a.
}
if state == wbWSegSpace || state == wbAny|wbZWJBit {
return wbAny, false // We don't break but this is also not WB3d or WB3c.
}
if state < 0 {
return wbAny, false
}
return state, false
} else if nextProperty == prExtendedPictographic && state >= 0 && state&wbZWJBit != 0 {
// WB3c.
return wbAny, false
}
if state >= 0 {
state = state &^ wbZWJBit
}
// Find the applicable transition in the table.
var rule int
transition, ok := wbTransitions[[2]int{state, nextProperty}]
if ok {
// We have a specific transition. We'll use it.
newState, wordBreak, rule = transition[0], transition[1] == wbBreak, transition[2]
} else {
// No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := wbTransitions[[2]int{state, prAny}]
transAnyState, okAnyState := wbTransitions[[2]int{wbAny, nextProperty}]
if okAnyProp && okAnyState {
// Both apply. We'll use a mix (see comments for grTransitions).
newState, wordBreak, rule = transAnyState[0], transAnyState[1] == wbBreak, transAnyState[2]
if transAnyProp[2] < transAnyState[2] {
wordBreak, rule = transAnyProp[1] == wbBreak, transAnyProp[2]
}
} else if okAnyProp {
// We only have a specific state.
newState, wordBreak, rule = transAnyProp[0], transAnyProp[1] == wbBreak, transAnyProp[2]
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
} else if okAnyState {
// We only have a specific property.
newState, wordBreak, rule = transAnyState[0], transAnyState[1] == wbBreak, transAnyState[2]
} else {
// No known transition. WB999: Any ÷ Any.
newState, wordBreak, rule = wbAny, true, 9990
}
}
// For those rules that need to look up runes further in the string, we
// determine the property after nextProperty, skipping over Format, Extend,
// and ZWJ (according to WB4). It's -1 if not needed, if such a rune cannot
// be determined (because the text ends or the rune is faulty).
farProperty := -1
if rule > 60 &&
(state == wbALetter || state == wbHebrewLetter || state == wbNumeric) &&
(nextProperty == prMidLetter || nextProperty == prMidNumLet || nextProperty == prSingleQuote || // WB6.
nextProperty == prDoubleQuote || // WB7b.
nextProperty == prMidNum) { // WB12.
for {
var (
r rune
length int
)
if b != nil { // Byte slice version.
r, length = utf8.DecodeRune(b)
b = b[length:]
} else { // String version.
r, length = utf8.DecodeRuneInString(str)
str = str[length:]
}
if r == utf8.RuneError {
break
}
prop := property(workBreakCodePoints, r)
if prop == prExtend || prop == prFormat || prop == prZWJ {
continue
}
farProperty = prop
break
}
}
// WB6.
if rule > 60 &&
(state == wbALetter || state == wbHebrewLetter) &&
(nextProperty == prMidLetter || nextProperty == prMidNumLet || nextProperty == prSingleQuote) &&
(farProperty == prALetter || farProperty == prHebrewLetter) {
return wbWB7, false
}
// WB7b.
if rule > 72 &&
state == wbHebrewLetter &&
nextProperty == prDoubleQuote &&
farProperty == prHebrewLetter {
return wbWB7c, false
}
// WB12.
if rule > 120 &&
state == wbNumeric &&
(nextProperty == prMidNum || nextProperty == prMidNumLet || nextProperty == prSingleQuote) &&
farProperty == prNumeric {
return wbWB11, false
}
// WB15 and WB16.
if newState == wbAny && nextProperty == prRegionalIndicator {
if state != wbOddRI && state != wbEvenRI { // Includes state == -1.
// Transition into the first RI.
return wbOddRI, true
}
if state == wbOddRI {
// Don't break pairs of Regional Indicators.
return wbEvenRI, false
}
return wbOddRI, true // We can break after a pair.
}
return
}