blob: 9f46d7e246de47fdc1ba95b31b441df6ac889003 [file] [log] [blame]
# File src/library/tools/R/recode.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2012 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
### Remap a character string from encoded text to LaTeX escapes
encoded_text_to_latex <-
function(x, encoding = c("latin1", "latin2", "latin9", "UTF-8", "utf8"))
{
encoding <- match.arg(encoding)
do_latin1 <- function(x) {
xx <- charToRaw(x)
paste(latin1table[as.integer(xx)], collapse="")}
do_latin2 <- function(x) {
xx <- charToRaw(x)
paste(latin2table[as.integer(xx)], collapse="")}
do_latin9 <- function(x) {
xx <- charToRaw(x)
paste(latin9table[as.integer(xx)], collapse="")}
do_utf8 <- function(x) {
xx <- utf8ToInt(x)
y <- rep.int("?", length(x))
y[xx < 512] <- utf8table[xx]
y[xx == 0x02C6] <- "{\\textasciicircum}"
y[xx == 0x02C7] <- "{\\textasciicaron}"
y[xx == 0x02CA] <- "{\\textasciitilde}"
y[xx == 0x02D8] <- "{\\textasciibreve}"
y[xx == 0x02D9] <- "{\\textperiodcentered}"
y[xx == 0x02DD] <- "{\\textacutedbl}"
y[xx == 0x200C] <- "{\\textcompwordmark}"
y[xx == 0x2018] <- "{\\textquoteleft}"
y[xx == 0x2019] <- "{\\textquoteright}"
y[xx == 0x201C] <- "{\\textquotedblleft}"
y[xx == 0x201D] <- "{\\textquotedblright}"
y[xx == 0x2020] <- "{\\textdagger}"
y[xx == 0x2022] <- "{\\textbullet}"
y[xx == 0x2026] <- "{\\textellipsis}"
y[xx == 0x20AC] <- "{\\texteuro}"
paste(y, collapse="")
}
as.vector(switch(encoding,
"latin1" = sapply(x, do_latin1),
"latin2" = sapply(x, do_latin2),
"latin9" = sapply(x, do_latin9),
"UTF-8" = sapply(x, do_utf8),
"utf8" = sapply(x, do_utf8),
stop("unimplemented encoding")
))
}
latin1table <- c(
rep.int("?", 31), ## omit 0x0
## 0x20 to %x7F
rawToChar(as.raw(seq.int(32, 126)), multiple=TRUE), "?",
## 0x80 to 0x9F
rep.int("?", 32),
## 0xA0 = 160 on
"{\\nobreakspace}", "{\\textexclamdown}", "{\\textcent}", "{\\textsterling}", "{\\textcurrency}", "{\\textyen}", "{\\textbrokenbar}", "{\\S}",
'\\"{}', "{\\textcopyright}", "{\\textordfeminine}", "{\\guillemotleft}", "{\\textlnot}", "\\-", "{\\textregistered}", "{\\a={}}",
"{\\textdegree}", "{\\textpm}", "{\\mathtwosuperior}", "{\\maththreesuperior}", "{\\a'{}}", "{\\textmu}", "{\\P}", "{\\textperiodcentered}",
"{\\c\\ }", "{\\mathonesuperior}", "{\\textordmasculine}", "{\\guillemotright}", "{\\textonequarter}", "{\\textonehalf}", "{\\textthreequarters}", "{\\textquestiondown}",
"{\\a`A}", "{\\a'A}", "{\\^A}", "{\\~A}", '{\\"A}', "{\\r A}", "{\\AE}", "{\\c C}",
"{\\a`E}", "{\\a'E}", "{\\^E}", "{\\a`I}", "{\\a'I}", "{\\^I}", "{\\~I}", '{\\"I}',
"{\\DH}", "{\\~N}", "{\\a`O}", "{\\a'O}", "{\\^O}", "{\\~O}", '{\\"O}', "{\\texttimes}",
"{\\O}", "{\\a`U}", "{\\a'U}", "{\\^U}", '{\\"U}', "{\\a`Y}", "{\\TH}", "{\\ss}",
"{\\a`a}", "{\\a'a}", "{\\^a}", "{\\~a}", '{\\"a}', "{\\r a}", "{\\ae}", "{\\c c}",
"{\\a`e}", "{\\a'e}", "{\\^e}", '{\\"e}',"{\\a`\\i}", "{\\a'\\i}", "{\\^\\i}", '{\\"\\i}',
"{\\dh}", "{\\~n}", "{\\a`o}", "{\\a'o}", "{\\^o}", "{\\~o}", '{\\"o}', "{\\textdiv}",
"{\\o}", "{\\a`u}", "{\\a'u}", "{\\^u}", '{\\"u}', "{\\a`y}", "{\\th}", '{\\"y}'
)
latin2table <- c(
rep.int("?", 31), ## omit 0x0
## 0x20 to %x7F
rawToChar(as.raw(seq.int(32, 126)), multiple=TRUE), "?",
## 0x80 to 0x9F
rep.int("?", 32),
## 0xA0 = 160 on
"{\\nobreakspace}", "{\\k A}", "{\\u{}}", "{\\L}", "{\\textcurrency}", "{\\v L}", "{\\a'S}", "{\\S}",
'\\"{}', "{\\v S}", "{\\c S}", "{\\v T}", "{\\\'Z}", "\\-", "{\\v Z}", "{\\.Z}",
"{\\textdegree}", "{\\k A}", "{\\k\\ }", "{\\l}", "{\\a'{}}", "{\\v l}", "{\\a's}", "{\\v{}}",
"{\\c\\ }", "{\\v s}", "{\\c s}", "{\\v t}", "{\\'z}", "{\\H{}}", "{\\v z}", "{\\.z}",
"{\\a'R}", "{\\a'A}", "{\\^A}", "{\\u A}", '{\\"A}', "{\\'L}", "{\\a'C}", "{\\c C}",
"{\\v C}", "{\\a'E}", "{\\k E}", '{\\"E}', "{\\v E}", "{\\'I}", "{\\^I}", '{\\v D}',
"{\\DJ}", "{\\a'N}", "{\\v N}", "{\\a'O}", "{\\^O}", "{\\H O}", '{\\"O}', "{\\texttimes}",
"{\\v R}", "{\\r U}", "{\\a'U}", "{\\H U}", '{\\"U}', "{\\a`Y}", "{\\c I}", "{\\ss}",
"{\\a'r}", "{\\a'a}", "{\\^a}", "{\\u a}", '{\\"a}', "{\\'l}", "{\\a'c}", "{\\c c}",
"{\\v c}", "{\\a'e}", "{\\k e}", '{\\"e}', "{\\v e}", "{\\'\\i}", "{\\^\\i}", '{\\v d}',
"{\\dj}", "{\\a'n}", "{\\c n}", "{\\a'o}", '{\\"a}', "{\\H o}", '{\\"o}', "{\\textdiv}",
"{\\v r}", "{\\r u}", "{\\a'u}", "{\\H u}", '{\\"u}', "{\\a`y}", "{\\c t}", '{\\.{}}'
)
latin9table <- c(
rep.int("?}", 31),
## 0x20 to %x7F
rawToChar(as.raw(seq.int(32, 126)), multiple=TRUE), "?}",
## 0x80 to 0x9F
rep.int("?}", 32),
## 0xA0 = 160 on
"{\\nobreakspace}", "{\\textexclamdown}", "{\\textcent}", "{\\textsterling}", "{\\texteuro}", "{\\textyen}", "{\\v S}", "{\\S}",
'{\\v s}', "{\\copyright}", "{\\textordfeminine}", "{\\guillemotleft}", "{\\textlnot}", "\\-", "{\\textregistered}", "{\\a={}}",
"{\\textdegree}", "{\\textpm}", "{\\mathtwosuperior}", "{\\maththreesuperior}", "{\\v Z}", "{\\textmu}", "{\\P}", "{\\textperiodcentered}",
"{\\v z}", "{\\mathonesuperior}", "{\\textordmasculine}", "{\\guillemotright}", "{\\OE}", "{\\oe}", '{\\"Y}', "{\\textquestiondown}",
"{\\a`A}", "{\\a'A}", "{\\^A}", "{\\~A}", '{\\"A}', "{\\r A}", "{\\AE}", "{\\c C}",
"{\\a`E}", "{\\a'E}", "{\\^E}", "{\\a`I}", "{\\a'I}", "{\\^I}", "{\\~I}", '{\\"I}',
"{\\DH}", "{\\~N}", "{\\a`O}", "{\\a'O}", "{\\^O}", "{\\~O}", '{\\"O}', "{\\texttimes}",
"{\\O}", "{\\a`u}", "{\\a'U}", "{\\^U}", '\\"U', "{\\a`Y}", "{\\TH}", "{\\ss}",
"{\\a`a}", "{\\a'a}", "{\\^a}", "{\\~a}", '{\\"a}', "{\\r a}", "{\\ae}", "{\\c c}",
"{\\a`e}", "{\\a'e}", "{\\^e}", '{\\"e}',"{\\a`\\i}", "{\\a'\\i}", "{\\^\\i}", '{\\"\\i}',
"{\\dh}", "{\\~n}", "{\\a`o}", "{\\a'o}", "{\\^o}", "{\\~o}", '{\\"o}', "{\\textdiv}",
"{\\o}", "{\\a`u}", "{\\a'u}", "{\\^u}", '\\"u', "{\\a`y}", "{\\th}", '{\\"y}'
)
utf8table <- c(latin1table, rep.int("?", 256))
utf8table[0x0102:0x107] <-
c("{\\u A}","{\\u a}", "{\\k A}", "{\\k a}", "{\\a'C}", "{\\a'c}")
utf8table[0x010C:0x111] <-
c( "{\\v C}","{\\v c}","{\\v D}","{\\v d}","{\\DJ}","{\\dj}")
utf8table[0x0118:0x11B] <- c("{\\k E}","{\\k e}", "{\\v E}","{\\v e}")
utf8table[0x011E:0x11F] <- c("{\\u G}","{\\u g}")
utf8table[0x0130:0x131] <- c("{\\.I}","{\\i}")
utf8table[0x0139:0x13A] <- c("{\\a'L}","{\\a'l}")
utf8table[0x013D:0x13E] <- c("{\\v L}","{\\v l}")
utf8table[0x0141L:0x144] <- c("{\\L}","{\\l}","{\\a'N}","{\\a'n}")
utf8table[0x0147:0x14B] <- c("{\\v N}","{\\v n}","?","{\\NG}","{\\ng}")
utf8table[0x0150:0x155] <- c("{\\H O}","{\\H o}","{\\OE}","{\\oe}","{\\a'R}","{\\a'r}")
utf8table[0x0158:0x15B] <- c("{\\v R}","{\\v r}","{\\a'S}","{\\a's}")
utf8table[0x015E:0x165] <- c("{\\c S}","{\\c s}","{\\v S}","{\\v s}",
"{\\c T}","{\\c t}","{\\v T}","{\\v t}")
utf8table[0x016E:0x171] <- c("{\\r U}","{\\r u}","{\\H U}","{\\H u}")
utf8table[0x0178:0x17E] <- c('{\\"Y}',"{\\a'Z}","{\\a'z}","{\\.Z}", "{\\.z}","{\\v Z}","{\\v z}")
utf8table[0x0192] <- "{\\textflorin}"