// Copyright 2015 Huan Du. All rights reserved. // Licensed under the MIT license that can be found in the LICENSE file. package xstrings import ( "bytes" "strings" "unicode/utf8" ) // Reverse a utf8 encoded string. func Reverse(str string) string { var size int tail := len(str) buf := make([]byte, tail) s := buf for len(str) > 0 { _, size = utf8.DecodeRuneInString(str) tail -= size s = append(s[:tail], []byte(str[:size])...) str = str[size:] } return string(buf) } // Slice a string by rune. // // Start must satisfy 0 <= start <= rune length. // // End can be positive, zero or negative. // If end >= 0, start and end must satisfy start <= end <= rune length. // If end < 0, it means slice to the end of string. // // Otherwise, Slice will panic as out of range. func Slice(str string, start, end int) string { var size, startPos, endPos int origin := str if start < 0 || end > len(str) || (end >= 0 && start > end) { panic("out of range") } if end >= 0 { end -= start } for start > 0 && len(str) > 0 { _, size = utf8.DecodeRuneInString(str) start-- startPos += size str = str[size:] } if end < 0 { return origin[startPos:] } endPos = startPos for end > 0 && len(str) > 0 { _, size = utf8.DecodeRuneInString(str) end-- endPos += size str = str[size:] } if len(str) == 0 && (start > 0 || end > 0) { panic("out of range") } return origin[startPos:endPos] } // Partition splits a string by sep into three parts. // The return value is a slice of strings with head, match and tail. // // If str contains sep, for example "hello" and "l", Partition returns // "he", "l", "lo" // // If str doesn't contain sep, for example "hello" and "x", Partition returns // "hello", "", "" func Partition(str, sep string) (head, match, tail string) { index := strings.Index(str, sep) if index == -1 { head = str return } head = str[:index] match = str[index : index+len(sep)] tail = str[index+len(sep):] return } // LastPartition splits a string by last instance of sep into three parts. // The return value is a slice of strings with head, match and tail. // // If str contains sep, for example "hello" and "l", LastPartition returns // "hel", "l", "o" // // If str doesn't contain sep, for example "hello" and "x", LastPartition returns // "", "", "hello" func LastPartition(str, sep string) (head, match, tail string) { index := strings.LastIndex(str, sep) if index == -1 { tail = str return } head = str[:index] match = str[index : index+len(sep)] tail = str[index+len(sep):] return } // Insert src into dst at given rune index. // Index is counted by runes instead of bytes. // // If index is out of range of dst, panic with out of range. func Insert(dst, src string, index int) string { return Slice(dst, 0, index) + src + Slice(dst, index, -1) } // Scrub scrubs invalid utf8 bytes with repl string. // Adjacent invalid bytes are replaced only once. func Scrub(str, repl string) string { var buf *bytes.Buffer var r rune var size, pos int var hasError bool origin := str for len(str) > 0 { r, size = utf8.DecodeRuneInString(str) if r == utf8.RuneError { if !hasError { if buf == nil { buf = &bytes.Buffer{} } buf.WriteString(origin[:pos]) hasError = true } } else if hasError { hasError = false buf.WriteString(repl) origin = origin[pos:] pos = 0 } pos += size str = str[size:] } if buf != nil { buf.WriteString(origin) return buf.String() } // No invalid byte. return origin } // WordSplit splits a string into words. Returns a slice of words. // If there is no word in a string, return nil. // // Word is defined as a locale dependent string containing alphabetic characters, // which may also contain but not start with `'` and `-` characters. func WordSplit(str string) []string { var word string var words []string var r rune var size, pos int inWord := false for len(str) > 0 { r, size = utf8.DecodeRuneInString(str) switch { case isAlphabet(r): if !inWord { inWord = true word = str pos = 0 } case inWord && (r == '\'' || r == '-'): // Still in word. default: if inWord { inWord = false words = append(words, word[:pos]) } } pos += size str = str[size:] } if inWord { words = append(words, word[:pos]) } return words }