package main import ( "path/filepath" "bufio" "log" "os" "strconv" "strings" "unicode" "unicode/utf8" "golang.org/x/text/transform" "github.com/leo-liu/zhmakeindex/kpathsea" ) // 输入格式 // 这里使用简单 struct 实现,需要大量分情况讨论。 // 也可以用 map 实现,代码可能会简短并易于扩展,但要动态处理类型 type InputStyle struct { keyword string arg_open rune arg_close rune actual rune encap rune escape rune level rune quote rune page_compositor string range_open rune range_close rune comment rune } func NewInputStyle() *InputStyle { in := &InputStyle{ keyword: "\\indexentry", arg_open: '{', arg_close: '}', actual: '@', encap: '|', escape: '\\', level: '!', quote: '"', page_compositor: "-", range_open: '(', range_close: ')', comment: '%', } return in } type OutputStyle struct { preamble string postamble string setpage_prefix string setpage_suffix string group_skip string headings_flag int heading_prefix string heading_suffix string symhead_positive string symhead_negative string numhead_positive string numhead_negative string stroke_prefix string stroke_suffix string radical_prefix string radical_suffix string radical_simplified_flag int radical_simplified_prefix string radical_simplified_suffix string item_0 string item_1 string item_2 string item_01 string item_x1 string item_12 string item_x2 string delim_0 string delim_1 string delim_2 string delim_n string delim_r string delim_t string encap_prefix string encap_infix string encap_suffix string page_precedence string line_max int indent_space string indent_length int suffix_2p string suffix_3p string suffix_mp string } func NewOutputStyle() *OutputStyle { out := &OutputStyle{ preamble: "\\begin{theindex}\n", postamble: "\n\n\\end{theindex}\n", setpage_prefix: "\n \\setcounter{page}{", setpage_suffix: "}\n", group_skip: "\n\n \\indexspace\n", headings_flag: 0, heading_prefix: "", heading_suffix: "", symhead_positive: "Symbols", symhead_negative: "symbols", numhead_positive: "Numbers", numhead_negative: "numbers", stroke_prefix: "", stroke_suffix: " 画", radical_prefix: "", radical_suffix: "部", radical_simplified_flag: 1, radical_simplified_prefix: "(", radical_simplified_suffix: ")", item_0: "\n \\item ", item_1: "\n \\subitem ", item_2: "\n \\subsubitem ", item_01: "\n \\subitem ", item_x1: "\n \\subitem ", item_12: "\n \\subsubitem ", item_x2: "\n \\subsubitem ", delim_0: ", ", delim_1: ", ", delim_2: ", ", delim_n: ", ", delim_r: "--", delim_t: "", encap_prefix: "\\", encap_infix: "{", encap_suffix: "}", page_precedence: "rnaRA", line_max: 72, indent_space: "\t\t", indent_length: 16, suffix_2p: "", suffix_3p: "", suffix_mp: "", } return out } func NewStyles(o *StyleOptions) (*InputStyle, *OutputStyle) { in := NewInputStyle() out := NewOutputStyle() if o.style == "" { return in, out } if filepath.Ext(o.style) == "" { o.style += ".ist" } // 读取格式文件,处理格式 o.style = kpathsea.FindFile(o.style, kpathsea.IST_FORMAT, false) if o.style == "" { log.Fatalln("找不到格式文件。") } styleFile, err := os.Open(o.style) if err != nil { log.Fatalln(err.Error()) } defer styleFile.Close() scanner := bufio.NewScanner(transform.NewReader(styleFile, o.style_decoder)) scanner.Split(ScanStyleTokens) for scanner.Scan() { if err := scanner.Err(); err != nil { log.Println(err.Error()) } key := scanner.Text() if !scanner.Scan() { log.Println("格式文件不完整") } if err := scanner.Err(); err != nil { log.Println(err.Error()) } value := scanner.Text() switch key { // 输入参数 case "keyword": in.keyword = unquote(value) case "arg_open": in.arg_open = unquoteChar(value) case "arg_close": in.arg_close = unquoteChar(value) case "actual": in.actual = unquoteChar(value) case "encap": in.encap = unquoteChar(value) case "escape": in.escape = unquoteChar(value) case "level": in.level = unquoteChar(value) case "quote": in.quote = unquoteChar(value) case "page_compositor": in.page_compositor = unquote(value) case "range_open": in.range_open = unquoteChar(value) case "range_close": in.range_close = unquoteChar(value) case "comment": in.comment = unquoteChar(value) // 输出参数 case "preamble": out.preamble = unquote(value) case "postamble": out.postamble = unquote(value) case "setpage_prefix": out.setpage_prefix = unquote(value) case "setpage_suffix": out.setpage_suffix = unquote(value) case "group_skip": out.group_skip = unquote(value) case "headings_flag", "lethead_flag": out.headings_flag = parseInt(value) case "heading_prefix", "lethead_prefix": out.heading_prefix = unquote(value) case "heading_suffix", "lethead_suffix": out.heading_suffix = unquote(value) case "symhead_positive": out.symhead_positive = unquote(value) case "symhead_negative": out.symhead_negative = unquote(value) case "numhead_positive": out.numhead_positive = unquote(value) case "numhead_negative": out.numhead_negative = unquote(value) case "stroke_prefix": out.stroke_prefix = unquote(value) case "stroke_suffix": out.stroke_suffix = unquote(value) case "radical_prefix": out.radical_prefix = unquote(value) case "radical_suffix": out.radical_suffix = unquote(value) case "radical_simplify_flag": out.radical_simplified_flag = parseInt(value) case "radical_simplified_prefix": out.radical_simplified_prefix = unquote(value) case "radical_simplified_suffix": out.radical_simplified_suffix = unquote(value) case "item_0": out.item_0 = unquote(value) case "item_1": out.item_1 = unquote(value) case "item_2": out.item_2 = unquote(value) case "item_01": out.item_01 = unquote(value) case "item_x1": out.item_x1 = unquote(value) case "item_12": out.item_12 = unquote(value) case "item_x2": out.item_x2 = unquote(value) case "delim_0": out.delim_0 = unquote(value) case "delim_1": out.delim_1 = unquote(value) case "delim_2": out.delim_2 = unquote(value) case "delim_n": out.delim_n = unquote(value) case "delim_r": out.delim_r = unquote(value) case "delim_t": out.delim_t = unquote(value) case "encap_prefix": out.encap_prefix = unquote(value) case "encap_infix": out.encap_infix = unquote(value) case "encap_suffix": out.encap_suffix = unquote(value) case "line_max": out.line_max = parseInt(value) case "indent_space": out.indent_space = unquote(value) case "indent_length": out.indent_length = parseInt(value) case "suffix_2p": out.suffix_2p = unquote(value) case "suffix_3p": out.suffix_3p = unquote(value) case "suffix_mp": out.suffix_mp = unquote(value) // 其他 default: log.Printf("忽略未知格式 %s\n", key) } } return in, out } func unquote(src string) string { // 处理双引号中有换行符的串 if src[0] == '"' { src = strings.Replace(src, "\n", "\\n", -1) } dst, err := strconv.Unquote(src) if err != nil { log.Println(err.Error()) } return dst } func unquoteChar(src string) rune { src = unquote(src) dst, _, tail, err := strconv.UnquoteChar(src, 0) if tail != "" { err = strconv.ErrSyntax } if err != nil { log.Println(err.Error()) } return dst } func parseInt(src string) int { i, err := strconv.ParseInt(src, 0, 0) if err != nil { log.Println(err.Error()) } return int(i) } // bufio.SplitFunc 的实例 // 查找标识符、数字、单引号内的 rune、双引号或反引号内的串;跳过以 % 开头的注释(未完成) // 实现参考了 bufio.ScanWords func ScanStyleTokens(data []byte, atEOF bool) (advance int, token []byte, err error) { // 跳过空白和注释 start := 0 in_comment := false for width := 0; start < len(data); start += width { var r rune r, width = utf8.DecodeRune(data[start:]) if !in_comment && r == '%' { in_comment = true } else if in_comment && r == '\n' { in_comment = false } else if !in_comment && !unicode.IsSpace(r) { break } // 其他情况跳过:注释中未遇到换行符,或者注释外遇到空白符 } if atEOF && len(data) == 0 { return 0, nil, nil } // 首先读出第一个字符,按不同类型扫描 token switch first, firstwidth := utf8.DecodeRune(data[start:]); first { // 读引号内的 rune 或串,从引号后开始扫描 case '\'', '"', '`': for width, i := 0, start+firstwidth; i < len(data); i += width { var r rune r, width = utf8.DecodeRune(data[i:]) if r == '\\' { // 跳过转义符 _, newwidth := utf8.DecodeRune(data[i+width:]) width += newwidth } else if r == first { // 找到终点 return i + width, data[start : i+width], nil } } // 读标识符、数字等,读到空格或注释符为止 default: for width, i := 0, start; i < len(data); i += width { var r rune r, width = utf8.DecodeRune(data[i:]) if unicode.IsSpace(r) || r == '%' { return i, data[start:i], nil } } } // 进入 EOF,剩下的部分全是一个 token(规则不足) if atEOF && len(data) > start { return len(data), data[start:], nil } // 要求更长的数据 return 0, nil, nil }