1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Bind sqlite3_db_filename() and (closely related) (A) add many more docs about the UTF-8/MUTF-8 discrepancy (B) start adding internals to enable us to perform the standard-UTF-8-to-Java conversion from C.

FossilOrigin-Name: 586720fa714ac74491cd85d0c6645242e55e5989ad312ef6e15e0b0acc6906ff
This commit is contained in:
stephan
2023-08-06 10:14:53 +00:00
parent 153288dc89
commit 1bce6b468e
7 changed files with 171 additions and 25 deletions

View File

@ -187,7 +187,10 @@
#define PtrGet_sqlite3_value(OBJ) getNativePointer(env,OBJ,S3ClassNames.sqlite3_value)
#define PtrGet_sqlite3_context(OBJ) getNativePointer(env,OBJ,S3ClassNames.sqlite3_context)
/* Helpers for Java value reference management. */
#define REF_G(VAR) (*env)->NewGlobalRef(env, VAR)
static inline jobject new_global_ref(JNIEnv *env, jobject v){
return v ? (*env)->NewGlobalRef(env, v) : NULL;
}
#define REF_G(VAR) new_global_ref(env, (VAR))
#define REF_L(VAR) (*env)->NewLocalRef(env, VAR)
#define UNREF_G(VAR) if(VAR) (*env)->DeleteGlobalRef(env, (VAR))
#define UNREF_L(VAR) if(VAR) (*env)->DeleteLocalRef(env, (VAR))
@ -337,9 +340,15 @@ struct NphCacheLine {
typedef struct JNIEnvCacheLine JNIEnvCacheLine;
struct JNIEnvCacheLine {
JNIEnv *env /* env in which this cache entry was created */;
//! The various refs to global classes might be cacheable a single
// time globally. Information online seems inconsistent on that
// point.
jclass globalClassObj /* global ref to java.lang.Object */;
jclass globalClassLong /* global ref to java.lang.Long */;
jclass globalClassString /* global ref to java.lang.String */;
jobject globalClassCharsetUtf8 /* global ref to StandardCharset.UTF_8 */;
jmethodID ctorLong1 /* the Long(long) constructor */;
jmethodID ctorStringBA /* the String(byte[],Charset) constructor */;
jobject currentStmt /* Current Java sqlite3_stmt object being
prepared, stepped, reset, or
finalized. Needed for tracing, the
@ -563,11 +572,32 @@ static JNIEnvCacheLine * S3Global_JNIEnvCache_cache(JNIEnv * const env){
row->env = env;
row->globalClassObj = REF_G((*env)->FindClass(env,"java/lang/Object"));
EXCEPTION_IS_FATAL("Error getting reference to Object class.");
row->globalClassLong = REF_G((*env)->FindClass(env,"java/lang/Long"));
EXCEPTION_IS_FATAL("Error getting reference to Long class.");
row->ctorLong1 = (*env)->GetMethodID(env, row->globalClassLong,
"<init>", "(J)V");
EXCEPTION_IS_FATAL("Error getting reference to Long constructor.");
row->globalClassString = REF_G((*env)->FindClass(env,"java/lang/String"));
EXCEPTION_IS_FATAL("Error getting reference to String class.");
row->ctorStringBA =
(*env)->GetMethodID(env, row->globalClassString,
"<init>", "([BLjava/nio/charset/Charset;)V");
EXCEPTION_IS_FATAL("Error getting reference to String(byte[],Charset) ctor.");
{ /* StandardCharsets.UTF_8 */
jfieldID fUtf8;
jclass const klazzSC =
(*env)->FindClass(env,"java/nio/charset/StandardCharsets");
EXCEPTION_IS_FATAL("Error getting reference to StndardCharsets class.");
fUtf8 = (*env)->GetStaticFieldID(env, klazzSC, "UTF_8",
"Ljava/nio/charset/Charset;");
EXCEPTION_IS_FATAL("Error getting StndardCharsets.UTF_8 field.");
row->globalClassCharsetUtf8 =
REF_G((*env)->GetStaticObjectField(env, klazzSC, fUtf8));
EXCEPTION_IS_FATAL("Error getting reference to StandardCharsets.UTF_8.");
}
return row;
}
@ -639,6 +669,9 @@ static void JNIEnvCacheLine_clear(JNIEnvCacheLine * const p){
int i;
UNREF_G(p->globalClassObj);
UNREF_G(p->globalClassLong);
UNREF_G(p->globalClassString);
UNREF_G(p->globalClassCharsetUtf8);
UNREF_G(p->currentStmt);
#ifdef SQLITE_ENABLE_FTS5
UNREF_G(p->jFtsExt);
UNREF_G(p->jPhraseIter.klazz);
@ -1993,12 +2026,43 @@ JDECL(jint,1create_1function)(JENV_JSELF, jobject jDb, jstring jFuncName,
return create_function(env, jDb, jFuncName, nArg, eTextRep, jFunctor);
}
/*
JDECL(jint,1create_1window_1function)(JENV_JSELF, jstring jFuncName, jint nArg,
jint eTextRep, jobject jFunctor){
return create_function_mega(env, jFuncName, nArg, eTextRep, jFunctor);
JDECL(jbyteArray,1db_1filename)(JENV_JSELF, jobject jDb, jbyteArray jDbName){
#if 1
PerDbStateJni * const ps = PerDbStateJni_for_db(env, jDb, 0, 0);
jbyte *zFilename = (ps && jDbName) ? JBA_TOC(jDbName) : 0;
const char *zRv;
jbyteArray jRv = 0;
if( !ps || (jDbName && !zFilename) ) return 0;
zRv = sqlite3_db_filename(ps->pDb, (const char *)zFilename);
if( zRv ){
const int n = sqlite3Strlen30(zRv);
jRv = (*env)->NewByteArray(env, (jint)n);
if( jRv ){
(*env)->SetByteArrayRegion(env, jRv, 0, (jint)n, (const jbyte *)zRv);
}
}
JBA_RELEASE(jDbName, zFilename);
return jRv;
#else
/* For comparison, this impl expects a jstring jDbName and returns a
jstring for significant code savings but it's not
MUTF-8-safe. With this impl, the Java-side byte-array-using
sqlite3_db_filename() impl is unnecessary. */
JDECL(jstring,1db_1filename)(JENV_JSELF, jobject jDb, jstring jDbName){
PerDbStateJni * const ps = PerDbStateJni_for_db(env, jDb, 0, 0);
const char *zFilename = (ps && jDbName) ? JSTR_TOC(jDbName) : 0;
const char *zRv;
if( !ps || (jDbName && !zFilename)) return 0;
zRv = sqlite3_db_filename(ps->pDb, zFilename ? zFilename : "main");
JSTR_RELEASE(jDbName, zFilename);
return zRv ? (*env)->NewStringUTF(env, zRv) : 0;
}
#endif
}
*/
JDECL(jstring,1errmsg)(JENV_JSELF, jobject jpDb){
return (*env)->NewStringUTF(env, sqlite3_errmsg(PtrGet_sqlite3(jpDb)));

View File

@ -1091,6 +1091,14 @@ JNIEXPORT jint JNICALL Java_org_sqlite_jni_SQLite3Jni_sqlite3_1create_1function
JNIEXPORT jint JNICALL Java_org_sqlite_jni_SQLite3Jni_sqlite3_1data_1count
(JNIEnv *, jclass, jobject);
/*
* Class: org_sqlite_jni_SQLite3Jni
* Method: sqlite3_db_filename
* Signature: (Lorg/sqlite/jni/sqlite3;[B)[B
*/
JNIEXPORT jbyteArray JNICALL Java_org_sqlite_jni_SQLite3Jni_sqlite3_1db_1filename
(JNIEnv *, jclass, jobject, jbyteArray);
/*
* Class: org_sqlite_jni_SQLite3Jni
* Method: sqlite3_errcode

View File

@ -22,12 +22,8 @@ public interface Authorizer {
callback, with one caveat: the string values passed here were
initially (at the C level) encoded in standard UTF-8. If they
contained any constructs which are not compatible with MUTF-8,
these strings will not have the expected values. The strings
passed through the authorizer would only be adversely affected by
that if the database tables and columns use "highly exotic"
names. Any names which contain no NUL bytes, nor characters
outside of the Basic Multilingual Plane are unaffected by this
discrepancy.
these strings will not have the expected values. For further
details, see the documentation for the SQLite3Jni class.
Must not throw.
*/

View File

@ -60,7 +60,67 @@ import java.lang.annotation.ElementType;
https://sqlite.org/c3ref/intro.html
A small handful of Java-specific APIs have been added.
A handful of Java-specific APIs have been added.
******************************************************************
*** Warning regarding Java's Modified UTF-8 vs standard UTF-8: ***
******************************************************************
SQLite internally uses UTF-8 encoding, whereas Java natively uses
UTF-16. Java JNI has routines for converting to and from UTF-8,
_but_ JNI uses what its docs call modified UTF-8 (see links below)
Care must be taken when converting Java strings to or from standard
UTF-8 to ensure that the proper conversion is performed. In short,
Java's `String.getBytes(StandardCharsets.UTF_8)` performs the proper
conversion in Java, and there are no JNI C APIs for that conversion
(JNI's `NewStringUTF()` requires its input to be in MUTF-8).
The known consequences and limitations this discrepancy places on
the SQLite3 JNI binding include:
- Any functions which return client-side data from a database
take extra care to perform proper conversion, at the cost of
efficiency.
- Functions which return database identifiers require those
identifiers to have identical representations in UTF-8 and
MUTF-8. They do not perform such conversions (A) because of the
much lower risk of an encoding discrepancy and (B) to avoid
significant extra code involved (see both the Java- and C-side
implementations of sqlite3_db_filename() for an example). Names
of databases, tables, columns, collations, and functions MUST NOT
contain characters which differ in MUTF-8 and UTF-8, or certain
APIs will mis-translate them on their way between languages
(possibly leading to a crash).
- sqlite3_trace_v2() is also currently affected by this, in that
it requires that traced SQL statements be compatible with
MUTF-8. The alternative would be to perform two extra layers of
conversion for that performance-sensitive function: one from
UTF-8 to a byte-array before passing the data from C to Java,
and then from byte-array to String in the tracer implementation.
- C functions which take C-style strings without a length argument
require special care when taking input from Java. In particular,
Java strings converted to byte arrays for encoding purposes are
not NUL-terminated, and conversion to a Java byte array must be
careful to add one. Functions which take a length do not require
this. Search the SQLite3Jni class for "\0" for many examples.
- Similarly, C-side code which deals with strings which might not be
NUL-terminated (e.g. while tokenizing in FTS5-related code) cannot
use JNI's new-string functions to return them to Java because none
of those APIs take a string-length argument. Such cases must
return byte arrays instead of strings.
Further reading:
- https://stackoverflow.com/questions/57419723
- https://stackoverflow.com/questions/7921016
- https://docs.oracle.com/javase/8/docs/api/java/lang/Character.html#unicode
- https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8
*/
public final class SQLite3Jni {
static {
@ -84,7 +144,7 @@ public final class SQLite3Jni {
undefined if any database objects are (A) still active at the
time it is called _and_ (B) calls are subsequently made into the
library with such a database. Doing so will, at best, lead to a
crash. It worst, it will lead to the db possibly misbehaving
crash. Azt worst, it will lead to the db possibly misbehaving
because some of its Java-bound state has been cleared. There is
no immediate harm in (A) so long as condition (B) is not met.
This process does _not_ actually close any databases or finalize
@ -344,6 +404,23 @@ public final class SQLite3Jni {
public static native int sqlite3_data_count(@NotNull sqlite3_stmt stmt);
/** In order to support the full range of UTF-8 filenames, we
require an extra layer of conversion via a byte[]. */
private static native byte[] sqlite3_db_filename(@NotNull sqlite3 db,
@NotNull byte dbName[]);
/**
As for the C API of the same name except that if dbName is null then
"main" is assumed.
*/
public static String sqlite3_db_filename(@NotNull sqlite3 db,
@Nullable String dbName){
final byte[] bName =
(((null == dbName) ? "main" : dbName)+"\0").getBytes(StandardCharsets.UTF_8);
final byte[] rv = sqlite3_db_filename(db, bName);
return (null == rv) ? null : new String(rv, StandardCharsets.UTF_8);
}
public static native int sqlite3_errcode(@NotNull sqlite3 db);
public static native int sqlite3_extended_errcode(@NotNull sqlite3 db);

View File

@ -734,6 +734,7 @@ public class Tester1 {
rc = sqlite3_open(dbName, db2);
++metrics.dbOpen;
affirm( 0 == rc );
affirm( sqlite3_db_filename(db1, null).endsWith(dbName) );
final ValueHolder<Boolean> xDestroyed = new ValueHolder<>(false);
final ValueHolder<Integer> xBusyCalled = new ValueHolder<>(0);