1
0
mirror of https://github.com/square/okhttp.git synced 2026-01-14 07:22:20 +03:00

Add some IDN tests (#7715)

* Add some IDN tests

* Fix copyright holder

* Remove unnecessary return
This commit is contained in:
Jesse Wilson
2023-05-03 20:52:37 -04:00
committed by GitHub
parent b6eeec4575
commit ad166b00a8
5 changed files with 338 additions and 1 deletions

View File

@@ -97,6 +97,8 @@ kotlin {
implementation(projects.okhttpSse)
implementation(projects.okhttpCoroutines)
implementation(libs.kotlinx.coroutines.core)
implementation(libs.squareup.moshi)
implementation(libs.squareup.moshi.kotlin)
implementation(libs.squareup.okio.fakefilesystem)
implementation(libs.conscrypt.openjdk)
implementation(libs.junit)

View File

@@ -0,0 +1,50 @@
/*
* Copyright (C) 2023 Block, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package okhttp3
import com.squareup.moshi.Moshi
import com.squareup.moshi.adapter
import com.squareup.moshi.kotlin.reflect.KotlinJsonAdapterFactory
import okio.FileSystem
import okio.Path.Companion.toPath
/**
* A test from the [Web Platform To ASCII](https://github.com/web-platform-tests/wpt/blob/master/url/resources/toascii.json).
*
* Each test is a line of the file `toascii.json`.
*/
class WebPlatformToAsciiData {
var input: String? = null
var output: String? = null
var comment: String? = null
override fun toString() = "input=$input output=$output"
companion object {
fun load(): List<WebPlatformToAsciiData> {
val moshi = Moshi.Builder()
.add(KotlinJsonAdapterFactory())
.build()
@OptIn(ExperimentalStdlibApi::class)
val adapter = moshi.adapter<List<WebPlatformToAsciiData>>()
return FileSystem.RESOURCES.read("/web-platform-test-toascii.json".toPath()) {
adapter.fromJson(this)!!
}
}
}
}

View File

@@ -0,0 +1,93 @@
/*
* Copyright (C) 2023 Block, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package okhttp3
import assertk.assertThat
import assertk.assertions.isEqualTo
import assertk.assertions.isNotNull
import okhttp3.HttpUrl.Companion.toHttpUrlOrNull
import org.junit.jupiter.api.DynamicTest
import org.junit.jupiter.api.TestFactory
/** Runs the web platform ToAscii tests. */
class WebPlatformToAsciiTest {
val knownFailures = setOf(
// OkHttp rejects empty labels.
"x..xn--zca",
"x..ß",
// OkHttp rejects labels longer than 63 code points, the web platform tests don't.
"x01234567890123456789012345678901234567890123456789012345678901x.xn--zca",
"x01234567890123456789012345678901234567890123456789012345678901x.ß",
"x01234567890123456789012345678901234567890123456789012345678901x",
"x01234567890123456789012345678901234567890123456789012345678901†",
// OkHttp rejects domain names longer than 253 code points, the web platform tests don't.
"01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x",
"01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--zca",
"01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.ß",
// OkHttp incorrectly does transitional processing, so it maps 'ß' to 'ss'
"-x.ß",
"ab--c.ß",
"x-.ß",
"xn--a.ß",
"xn--zca.ß",
"ශ්‍රී",
// OkHttp does not reject invalid Punycode.
"xn--",
"xn--a",
"xn--a.xn--zca",
"xn--a-yoc",
"xn--ls8h=",
// OkHttp doesn't reject U+FFFD encoded in Punycode.
"xn--zn7c.com",
// OkHttp doesn't reject a U+200D. https://www.rfc-editor.org/rfc/rfc5892.html#appendix-A.2
"xn--1ug.example",
// OkHttp returns `xn--mgba3gch31f`, not `xn--mgba3gch31f060k`.
"نامه‌ای",
)
@TestFactory
fun testFactory(): List<DynamicTest> {
val list = WebPlatformToAsciiData.load()
return list.map { entry ->
DynamicTest.dynamicTest(entry.input!!) {
var failure: AssertionError? = null
try {
testToAscii(entry.input!!, entry.output, entry.comment)
} catch (e: AssertionError) {
failure = e
}
if (entry.input in knownFailures) {
assertThat(failure).isNotNull()
} else {
if (failure != null) throw failure
}
}
}
}
private fun testToAscii(input: String, output: String?, comment: String?) {
val url = "https://$input/".toHttpUrlOrNull()
assertThat(url?.host, name = comment ?: input).isEqualTo(output)
}
}

View File

@@ -20,7 +20,7 @@ import okio.Buffer
import okio.BufferedSource
/**
* A test from the [Web Platform URL test suite](https://github.com/w3c/web-platform-tests/tree/master/url).
* A test from the [Web Platform URL test suite](https://github.com/web-platform-tests/wpt/blob/master/url/resources/urltestdata.json).
*
* Each test is a line of the file `urltestdata.txt`. The format is informally specified by its
* JavaScript parser `urltestparser.js` with which this class attempts to be compatible.

View File

@@ -0,0 +1,192 @@
[
{
"comment": "Label with hyphens in 3rd and 4th position",
"input": "aa--",
"output": "aa--"
},
{
"input": "a†--",
"output": "xn--a---kp0a"
},
{
"input": "ab--c",
"output": "ab--c"
},
{
"comment": "Label with leading hyphen",
"input": "-x",
"output": "-x"
},
{
"input": "-†",
"output": "xn----xhn"
},
{
"input": "-x.xn--zca",
"output": "-x.xn--zca"
},
{
"input": "-x.ß",
"output": "-x.xn--zca"
},
{
"comment": "Label with trailing hyphen",
"input": "x-.xn--zca",
"output": "x-.xn--zca"
},
{
"input": "x-.ß",
"output": "x-.xn--zca"
},
{
"comment": "Empty labels",
"input": "x..xn--zca",
"output": "x..xn--zca"
},
{
"input": "x..ß",
"output": "x..xn--zca"
},
{
"comment": "Invalid Punycode",
"input": "xn--a",
"output": null
},
{
"input": "xn--a.xn--zca",
"output": null
},
{
"input": "xn--a.ß",
"output": null
},
{
"input": "xn--ls8h=",
"output": null
},
{
"comment": "Invalid Punycode (contains non-ASCII character)",
"input": "xn--tešla",
"output": null
},
{
"comment": "Valid Punycode",
"input": "xn--zca.xn--zca",
"output": "xn--zca.xn--zca"
},
{
"comment": "Mixed",
"input": "xn--zca.ß",
"output": "xn--zca.xn--zca"
},
{
"input": "ab--c.xn--zca",
"output": "ab--c.xn--zca"
},
{
"input": "ab--c.ß",
"output": "ab--c.xn--zca"
},
{
"comment": "CheckJoiners is true",
"input": "\u200D.example",
"output": null
},
{
"input": "xn--1ug.example",
"output": null
},
{
"comment": "CheckBidi is true",
"input": "يa",
"output": null
},
{
"input": "xn--a-yoc",
"output": null
},
{
"comment": "processing_option is Nontransitional_Processing",
"input": "ශ්‍රී",
"output": "xn--10cl1a0b660p"
},
{
"input": "نامه‌ای",
"output": "xn--mgba3gch31f060k"
},
{
"comment": "U+FFFD",
"input": "\uFFFD.com",
"output": null
},
{
"comment": "U+FFFD character encoded in Punycode",
"input": "xn--zn7c.com",
"output": null
},
{
"comment": "Label longer than 63 code points",
"input": "x01234567890123456789012345678901234567890123456789012345678901x",
"output": "x01234567890123456789012345678901234567890123456789012345678901x"
},
{
"input": "x01234567890123456789012345678901234567890123456789012345678901†",
"output": "xn--x01234567890123456789012345678901234567890123456789012345678901-6963b"
},
{
"input": "x01234567890123456789012345678901234567890123456789012345678901x.xn--zca",
"output": "x01234567890123456789012345678901234567890123456789012345678901x.xn--zca"
},
{
"input": "x01234567890123456789012345678901234567890123456789012345678901x.ß",
"output": "x01234567890123456789012345678901234567890123456789012345678901x.xn--zca"
},
{
"comment": "Domain excluding TLD longer than 253 code points",
"input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x",
"output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x"
},
{
"input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--zca",
"output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--zca"
},
{
"input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.ß",
"output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--zca"
},
{
"comment": "IDNA ignored code points",
"input": "a\u00ADb",
"output": "ab"
},
{
"input": "a%C2%ADb",
"output": "ab"
},
{
"comment": "Empty host after domain to ASCII",
"input": "\u00AD",
"output": null
},
{
"input": "%C2%AD",
"output": null
},
{
"input": "xn--",
"output": null
},
{
"comment": "Interesting UseSTD3ASCIIRules=false cases",
"input": "≠",
"output": "xn--1ch"
},
{
"input": "≮",
"output": "xn--gdh"
},
{
"input": "≯",
"output": "xn--hdh"
}
]