1
0
mirror of https://github.com/square/okhttp.git synced 2026-01-12 10:23:16 +03:00

Straightforward implementation of IDNA mapping, for tests only (#7783)

* Straightforward implementation of IDNA mapping, for tests only

As described in UTS #46, https://www.unicode.org/reports/tr46

This is working towards OkHttp's own implementation of what
IDN.toASCII() does on the JVM.

* Address code review feedback

* Comment the mapping for ¼

* TM should be tm
This commit is contained in:
Jesse Wilson
2023-04-23 16:53:54 -04:00
committed by GitHub
parent f408411ff9
commit 5d1e6b74ff
4 changed files with 9371 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
/*
* Copyright (C) 2023 Square, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package okhttp3.internal.idn
import okio.BufferedSink
interface IdnaMappingTable {
/**
* Returns true if the [codePoint] was applied successfully. Returns false if it was disallowed.
*/
fun map(codePoint: Int, sink: BufferedSink): Boolean
}

View File

@@ -0,0 +1,101 @@
/*
* Copyright (C) 2023 Square, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package okhttp3.internal.idn
import assertk.assertThat
import assertk.assertions.isEqualTo
import assertk.assertions.isGreaterThan
import kotlin.test.assertFailsWith
import okio.Buffer
import okio.FileSystem
import okio.Path.Companion.toPath
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
class IdnaMappingTableTest {
private lateinit var table: IdnaMappingTable
@BeforeEach
fun setUp() {
table = FileSystem.RESOURCES.read("/okhttp3/internal/idna/IdnaMappingTable.txt".toPath()) {
readPlainTextIdnaMappingTable()
}
}
@Test fun regularMappings() {
assertThat("hello".map()).isEqualTo("hello")
assertThat("hello-world".map()).isEqualTo("hello-world")
assertThat("HELLO".map()).isEqualTo("hello")
assertThat("Hello".map()).isEqualTo("hello")
// These compound characters map their its components.
assertThat("¼".map()).isEqualTo("14")
assertThat("".map()).isEqualTo("tm")
}
@Test fun deviations() {
assertThat("ß".map()).isEqualTo("ss")
assertThat("ς".map()).isEqualTo("σ")
assertThat("\u200c".map()).isEqualTo("")
assertThat("\u200d".map()).isEqualTo("")
}
@Test fun ignored() {
assertThat("\u200b".map()).isEqualTo("")
assertThat("\ufeff".map()).isEqualTo("")
}
@Test fun disallowed() {
assertThat("\u0080".mapExpectingErrors()).isEqualTo("")
}
@Test fun disallowedStd3Valid() {
assertThat("/".map()).isEqualTo("/")
}
@Test fun disallowedStd3Mapped() {
assertThat("\u00b8".map()).isEqualTo("\u0020\u0327")
}
@Test fun outOfBounds() {
assertFailsWith<IllegalArgumentException> {
table.map(-1, Buffer())
}
table.map(0, Buffer()) // Lowest legal code point.
table.map(0x10ffff, Buffer()) // Highest legal code point.
assertFailsWith<IllegalArgumentException> {
table.map(0x110000, Buffer())
}
}
private fun String.map(): String {
val result = Buffer()
for (codePoint in codePoints()) {
require(table.map(codePoint, result))
}
return result.readUtf8()
}
private fun String.mapExpectingErrors(): String {
val result = Buffer()
var errorCount = 0
for (codePoint in codePoints()) {
if (!table.map(codePoint, result)) errorCount++
}
assertThat(errorCount).isGreaterThan(0)
return result.readUtf8()
}
}

View File

@@ -0,0 +1,217 @@
/*
* Copyright (C) 2023 Square, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package okhttp3.internal.idn
import okio.Buffer
import okio.BufferedSink
import okio.BufferedSource
import okio.ByteString
import okio.ByteString.Companion.encodeUtf8
import okio.IOException
import okio.Options
/**
* A decoded [mapping table] that can perform the [mapping step] of IDNA processing.
*
* This implementation is optimized for readability over efficiency.
*
* [mapping table]: https://www.unicode.org/reports/tr46/#IDNA_Mapping_Table
* [mapping step]: https://www.unicode.org/reports/tr46/#ProcessingStepMap
*/
class PlainTextIdnaMappingTable internal constructor(
private val mappings: List<Mapping>,
) : IdnaMappingTable {
override fun map(codePoint: Int, sink: BufferedSink): Boolean {
val index = mappings.binarySearch {
when {
it.sourceCodePoint1 < codePoint -> -1
it.sourceCodePoint0 > codePoint -> 1
else -> 0
}
}
// Code points must be in 0..0x10ffff.
require(index in mappings.indices) { "unexpected code point: $codePoint" }
val mapping = mappings[index]
var result = true
when (mapping.type) {
TYPE_IGNORED -> Unit
TYPE_DEVIATION, TYPE_MAPPED, TYPE_DISALLOWED_STD3_MAPPED -> {
sink.write(mapping.mappedTo)
}
TYPE_DISALLOWED_STD3_VALID, TYPE_VALID -> {
sink.writeUtf8CodePoint(codePoint)
}
TYPE_DISALLOWED -> result = false
}
return result
}
}
private val optionsDelimiter = Options.of(
".".encodeUtf8(), // 0.
" ".encodeUtf8(), // 1.
";".encodeUtf8(), // 2.
"#".encodeUtf8(), // 3.
"\n".encodeUtf8(), // 4.
)
private val optionsDot = Options.of(
".".encodeUtf8(), // 0.
)
private const val DELIMITER_DOT = 0
private const val DELIMITER_SPACE = 1
private const val DELIMITER_SEMICOLON = 2
private const val DELIMITER_HASH = 3
private const val DELIMITER_NEWLINE = 4
private val optionsType = Options.of(
"deviation ".encodeUtf8(), // 0.
"disallowed ".encodeUtf8(), // 1.
"disallowed_STD3_mapped ".encodeUtf8(), // 2.
"disallowed_STD3_valid ".encodeUtf8(), // 3.
"ignored ".encodeUtf8(), // 4.
"mapped ".encodeUtf8(), // 5.
"valid ".encodeUtf8(), // 6.
)
private const val TYPE_DEVIATION = 0
private const val TYPE_DISALLOWED = 1
private const val TYPE_DISALLOWED_STD3_MAPPED = 2
private const val TYPE_DISALLOWED_STD3_VALID = 3
private const val TYPE_IGNORED = 4
private const val TYPE_MAPPED = 5
private const val TYPE_VALID = 6
private fun BufferedSource.skipWhitespace() {
while (!exhausted()) {
if (buffer[0] != ' '.code.toByte()) return
skip(1L)
}
}
private fun BufferedSource.skipRestOfLine() {
when (val newline = indexOf('\n'.code.toByte())) {
-1L -> skip(buffer.size) // Exhaust this source.
else -> skip(newline + 1)
}
}
/**
* Reads lines from `IdnaMappingTable.txt`.
*
* Comment lines are either blank or start with a `#` character. Lines may also end with a comment.
* All comments are ignored.
*
* Regular lines contain fields separated by semicolons.
*
* The first element on each line is a single hex code point (like 0041) or a hex code point range
* (like 0030..0039).
*
* The second element on each line is a mapping type, like `valid` or `mapped`.
*
* For lines that contain a mapping target, the next thing is a sequence of hex code points (like
* 0031 2044 0034).
*
* All other data is ignored.
*/
fun BufferedSource.readPlainTextIdnaMappingTable(): PlainTextIdnaMappingTable {
val mappedTo = Buffer()
val result = mutableListOf<Mapping>()
while (!exhausted()) {
// Skip comment and empty lines.
when (select(optionsDelimiter)) {
DELIMITER_HASH -> {
skipRestOfLine()
continue
}
DELIMITER_NEWLINE -> {
continue
}
DELIMITER_DOT, DELIMITER_SPACE, DELIMITER_SEMICOLON -> {
throw IOException("unexpected delimiter")
}
}
// "002F" or "0000..002C"
val sourceCodePoint0 = readHexadecimalUnsignedLong()
val sourceCodePoint1 = when (select(optionsDot)) {
DELIMITER_DOT -> {
if (readByte() != '.'.code.toByte()) throw IOException("expected '..'")
readHexadecimalUnsignedLong()
}
else -> sourceCodePoint0
}
skipWhitespace()
if (readByte() != ';'.code.toByte()) throw IOException("expected ';'")
// "valid" or "mapped"
skipWhitespace()
val type = select(optionsType)
when (type) {
TYPE_DEVIATION, TYPE_MAPPED, TYPE_DISALLOWED_STD3_MAPPED -> {
skipWhitespace()
if (readByte() != ';'.code.toByte()) throw IOException("expected ';'")
// Like "0061" or "0031 2044 0034".
while (true) {
skipWhitespace()
when (select(optionsDelimiter)) {
DELIMITER_HASH -> {
break
}
DELIMITER_DOT, DELIMITER_SEMICOLON, DELIMITER_NEWLINE -> {
throw IOException("unexpected delimiter")
}
}
mappedTo.writeUtf8CodePoint(readHexadecimalUnsignedLong().toInt())
}
}
TYPE_DISALLOWED, TYPE_DISALLOWED_STD3_VALID, TYPE_IGNORED, TYPE_VALID -> Unit
else -> throw IOException("unexpected type")
}
skipRestOfLine()
result += Mapping(
sourceCodePoint0.toInt(),
sourceCodePoint1.toInt(),
type,
mappedTo.readByteString(),
)
}
return PlainTextIdnaMappingTable(result)
}
internal data class Mapping(
val sourceCodePoint0: Int,
val sourceCodePoint1: Int,
val type: Int,
val mappedTo: ByteString,
)

File diff suppressed because it is too large Load Diff