mirror of
https://github.com/square/okhttp.git
synced 2026-01-12 10:23:16 +03:00
Straightforward implementation of IDNA mapping, for tests only (#7783)
* Straightforward implementation of IDNA mapping, for tests only As described in UTS #46, https://www.unicode.org/reports/tr46 This is working towards OkHttp's own implementation of what IDN.toASCII() does on the JVM. * Address code review feedback * Comment the mapping for ¼ * TM should be tm
This commit is contained in:
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Square, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package okhttp3.internal.idn
|
||||
|
||||
import okio.BufferedSink
|
||||
|
||||
interface IdnaMappingTable {
|
||||
|
||||
/**
|
||||
* Returns true if the [codePoint] was applied successfully. Returns false if it was disallowed.
|
||||
*/
|
||||
fun map(codePoint: Int, sink: BufferedSink): Boolean
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Square, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package okhttp3.internal.idn
|
||||
|
||||
import assertk.assertThat
|
||||
import assertk.assertions.isEqualTo
|
||||
import assertk.assertions.isGreaterThan
|
||||
import kotlin.test.assertFailsWith
|
||||
import okio.Buffer
|
||||
import okio.FileSystem
|
||||
import okio.Path.Companion.toPath
|
||||
import org.junit.jupiter.api.BeforeEach
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class IdnaMappingTableTest {
|
||||
private lateinit var table: IdnaMappingTable
|
||||
|
||||
@BeforeEach
|
||||
fun setUp() {
|
||||
table = FileSystem.RESOURCES.read("/okhttp3/internal/idna/IdnaMappingTable.txt".toPath()) {
|
||||
readPlainTextIdnaMappingTable()
|
||||
}
|
||||
}
|
||||
|
||||
@Test fun regularMappings() {
|
||||
assertThat("hello".map()).isEqualTo("hello")
|
||||
assertThat("hello-world".map()).isEqualTo("hello-world")
|
||||
assertThat("HELLO".map()).isEqualTo("hello")
|
||||
assertThat("Hello".map()).isEqualTo("hello")
|
||||
|
||||
// These compound characters map their its components.
|
||||
assertThat("¼".map()).isEqualTo("1⁄4")
|
||||
assertThat("™".map()).isEqualTo("tm")
|
||||
}
|
||||
|
||||
@Test fun deviations() {
|
||||
assertThat("ß".map()).isEqualTo("ss")
|
||||
assertThat("ς".map()).isEqualTo("σ")
|
||||
assertThat("\u200c".map()).isEqualTo("")
|
||||
assertThat("\u200d".map()).isEqualTo("")
|
||||
}
|
||||
|
||||
@Test fun ignored() {
|
||||
assertThat("\u200b".map()).isEqualTo("")
|
||||
assertThat("\ufeff".map()).isEqualTo("")
|
||||
}
|
||||
|
||||
@Test fun disallowed() {
|
||||
assertThat("\u0080".mapExpectingErrors()).isEqualTo("")
|
||||
}
|
||||
|
||||
@Test fun disallowedStd3Valid() {
|
||||
assertThat("/".map()).isEqualTo("/")
|
||||
}
|
||||
|
||||
@Test fun disallowedStd3Mapped() {
|
||||
assertThat("\u00b8".map()).isEqualTo("\u0020\u0327")
|
||||
}
|
||||
|
||||
@Test fun outOfBounds() {
|
||||
assertFailsWith<IllegalArgumentException> {
|
||||
table.map(-1, Buffer())
|
||||
}
|
||||
table.map(0, Buffer()) // Lowest legal code point.
|
||||
table.map(0x10ffff, Buffer()) // Highest legal code point.
|
||||
assertFailsWith<IllegalArgumentException> {
|
||||
table.map(0x110000, Buffer())
|
||||
}
|
||||
}
|
||||
|
||||
private fun String.map(): String {
|
||||
val result = Buffer()
|
||||
for (codePoint in codePoints()) {
|
||||
require(table.map(codePoint, result))
|
||||
}
|
||||
return result.readUtf8()
|
||||
}
|
||||
|
||||
private fun String.mapExpectingErrors(): String {
|
||||
val result = Buffer()
|
||||
var errorCount = 0
|
||||
for (codePoint in codePoints()) {
|
||||
if (!table.map(codePoint, result)) errorCount++
|
||||
}
|
||||
assertThat(errorCount).isGreaterThan(0)
|
||||
return result.readUtf8()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,217 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Square, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package okhttp3.internal.idn
|
||||
|
||||
import okio.Buffer
|
||||
import okio.BufferedSink
|
||||
import okio.BufferedSource
|
||||
import okio.ByteString
|
||||
import okio.ByteString.Companion.encodeUtf8
|
||||
import okio.IOException
|
||||
import okio.Options
|
||||
|
||||
/**
|
||||
* A decoded [mapping table] that can perform the [mapping step] of IDNA processing.
|
||||
*
|
||||
* This implementation is optimized for readability over efficiency.
|
||||
*
|
||||
* [mapping table]: https://www.unicode.org/reports/tr46/#IDNA_Mapping_Table
|
||||
* [mapping step]: https://www.unicode.org/reports/tr46/#ProcessingStepMap
|
||||
*/
|
||||
class PlainTextIdnaMappingTable internal constructor(
|
||||
private val mappings: List<Mapping>,
|
||||
) : IdnaMappingTable {
|
||||
override fun map(codePoint: Int, sink: BufferedSink): Boolean {
|
||||
val index = mappings.binarySearch {
|
||||
when {
|
||||
it.sourceCodePoint1 < codePoint -> -1
|
||||
it.sourceCodePoint0 > codePoint -> 1
|
||||
else -> 0
|
||||
}
|
||||
}
|
||||
|
||||
// Code points must be in 0..0x10ffff.
|
||||
require(index in mappings.indices) { "unexpected code point: $codePoint" }
|
||||
|
||||
val mapping = mappings[index]
|
||||
var result = true
|
||||
|
||||
when (mapping.type) {
|
||||
TYPE_IGNORED -> Unit
|
||||
TYPE_DEVIATION, TYPE_MAPPED, TYPE_DISALLOWED_STD3_MAPPED -> {
|
||||
sink.write(mapping.mappedTo)
|
||||
}
|
||||
TYPE_DISALLOWED_STD3_VALID, TYPE_VALID -> {
|
||||
sink.writeUtf8CodePoint(codePoint)
|
||||
}
|
||||
TYPE_DISALLOWED -> result = false
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private val optionsDelimiter = Options.of(
|
||||
".".encodeUtf8(), // 0.
|
||||
" ".encodeUtf8(), // 1.
|
||||
";".encodeUtf8(), // 2.
|
||||
"#".encodeUtf8(), // 3.
|
||||
"\n".encodeUtf8(), // 4.
|
||||
)
|
||||
|
||||
private val optionsDot = Options.of(
|
||||
".".encodeUtf8(), // 0.
|
||||
)
|
||||
|
||||
private const val DELIMITER_DOT = 0
|
||||
private const val DELIMITER_SPACE = 1
|
||||
private const val DELIMITER_SEMICOLON = 2
|
||||
private const val DELIMITER_HASH = 3
|
||||
private const val DELIMITER_NEWLINE = 4
|
||||
|
||||
private val optionsType = Options.of(
|
||||
"deviation ".encodeUtf8(), // 0.
|
||||
"disallowed ".encodeUtf8(), // 1.
|
||||
"disallowed_STD3_mapped ".encodeUtf8(), // 2.
|
||||
"disallowed_STD3_valid ".encodeUtf8(), // 3.
|
||||
"ignored ".encodeUtf8(), // 4.
|
||||
"mapped ".encodeUtf8(), // 5.
|
||||
"valid ".encodeUtf8(), // 6.
|
||||
)
|
||||
|
||||
private const val TYPE_DEVIATION = 0
|
||||
private const val TYPE_DISALLOWED = 1
|
||||
private const val TYPE_DISALLOWED_STD3_MAPPED = 2
|
||||
private const val TYPE_DISALLOWED_STD3_VALID = 3
|
||||
private const val TYPE_IGNORED = 4
|
||||
private const val TYPE_MAPPED = 5
|
||||
private const val TYPE_VALID = 6
|
||||
|
||||
private fun BufferedSource.skipWhitespace() {
|
||||
while (!exhausted()) {
|
||||
if (buffer[0] != ' '.code.toByte()) return
|
||||
skip(1L)
|
||||
}
|
||||
}
|
||||
|
||||
private fun BufferedSource.skipRestOfLine() {
|
||||
when (val newline = indexOf('\n'.code.toByte())) {
|
||||
-1L -> skip(buffer.size) // Exhaust this source.
|
||||
else -> skip(newline + 1)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads lines from `IdnaMappingTable.txt`.
|
||||
*
|
||||
* Comment lines are either blank or start with a `#` character. Lines may also end with a comment.
|
||||
* All comments are ignored.
|
||||
*
|
||||
* Regular lines contain fields separated by semicolons.
|
||||
*
|
||||
* The first element on each line is a single hex code point (like 0041) or a hex code point range
|
||||
* (like 0030..0039).
|
||||
*
|
||||
* The second element on each line is a mapping type, like `valid` or `mapped`.
|
||||
*
|
||||
* For lines that contain a mapping target, the next thing is a sequence of hex code points (like
|
||||
* 0031 2044 0034).
|
||||
*
|
||||
* All other data is ignored.
|
||||
*/
|
||||
fun BufferedSource.readPlainTextIdnaMappingTable(): PlainTextIdnaMappingTable {
|
||||
val mappedTo = Buffer()
|
||||
val result = mutableListOf<Mapping>()
|
||||
|
||||
while (!exhausted()) {
|
||||
// Skip comment and empty lines.
|
||||
when (select(optionsDelimiter)) {
|
||||
DELIMITER_HASH -> {
|
||||
skipRestOfLine()
|
||||
continue
|
||||
}
|
||||
DELIMITER_NEWLINE -> {
|
||||
continue
|
||||
}
|
||||
DELIMITER_DOT, DELIMITER_SPACE, DELIMITER_SEMICOLON -> {
|
||||
throw IOException("unexpected delimiter")
|
||||
}
|
||||
}
|
||||
|
||||
// "002F" or "0000..002C"
|
||||
val sourceCodePoint0 = readHexadecimalUnsignedLong()
|
||||
val sourceCodePoint1 = when (select(optionsDot)) {
|
||||
DELIMITER_DOT -> {
|
||||
if (readByte() != '.'.code.toByte()) throw IOException("expected '..'")
|
||||
readHexadecimalUnsignedLong()
|
||||
}
|
||||
else -> sourceCodePoint0
|
||||
}
|
||||
|
||||
skipWhitespace()
|
||||
if (readByte() != ';'.code.toByte()) throw IOException("expected ';'")
|
||||
|
||||
// "valid" or "mapped"
|
||||
skipWhitespace()
|
||||
val type = select(optionsType)
|
||||
|
||||
when (type) {
|
||||
TYPE_DEVIATION, TYPE_MAPPED, TYPE_DISALLOWED_STD3_MAPPED -> {
|
||||
skipWhitespace()
|
||||
if (readByte() != ';'.code.toByte()) throw IOException("expected ';'")
|
||||
|
||||
// Like "0061" or "0031 2044 0034".
|
||||
while (true) {
|
||||
skipWhitespace()
|
||||
|
||||
when (select(optionsDelimiter)) {
|
||||
DELIMITER_HASH -> {
|
||||
break
|
||||
}
|
||||
DELIMITER_DOT, DELIMITER_SEMICOLON, DELIMITER_NEWLINE -> {
|
||||
throw IOException("unexpected delimiter")
|
||||
}
|
||||
}
|
||||
|
||||
mappedTo.writeUtf8CodePoint(readHexadecimalUnsignedLong().toInt())
|
||||
}
|
||||
}
|
||||
|
||||
TYPE_DISALLOWED, TYPE_DISALLOWED_STD3_VALID, TYPE_IGNORED, TYPE_VALID -> Unit
|
||||
|
||||
else -> throw IOException("unexpected type")
|
||||
}
|
||||
|
||||
skipRestOfLine()
|
||||
|
||||
result += Mapping(
|
||||
sourceCodePoint0.toInt(),
|
||||
sourceCodePoint1.toInt(),
|
||||
type,
|
||||
mappedTo.readByteString(),
|
||||
)
|
||||
}
|
||||
|
||||
return PlainTextIdnaMappingTable(result)
|
||||
}
|
||||
|
||||
internal data class Mapping(
|
||||
val sourceCodePoint0: Int,
|
||||
val sourceCodePoint1: Int,
|
||||
val type: Int,
|
||||
val mappedTo: ByteString,
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user