BigQueryの標準SQL
ため それは何
#standardSQL
CREATE TEMP FUNCTION DecodeUnicode(s STRING) AS (
(SELECT CODE_POINTS_TO_STRING(ARRAY_AGG(CAST(CONCAT('0x', x) AS INT64)))
FROM UNNEST(SPLIT(s, '\\u')) AS x
WHERE x != ''
)
);
WITH `yourTable` AS (
SELECT r'$-\u6599\u91d1\u304c\u9ad8\u3059\u304e\uff01\uff01\uff01' AS original UNION ALL
SELECT r'abcd'
), uchars AS (
SELECT DISTINCT
c,
DecodeUnicode(c) uchar
FROM `yourTable`,
UNNEST(REGEXP_EXTRACT_ALL(original, r'(\\u[abcdef0-9]{4})')) c
)
SELECT
original,
STRING_AGG(IFNULL(uchar, x), '' ORDER BY pos) decoded
FROM (
SELECT
original,
pos,
SUBSTR(original,
SUM(CASE char WHEN '' THEN 1 ELSE 6 END)
OVER(PARTITION BY original ORDER BY pos) - CASE char WHEN '' THEN 0 ELSE 5 END,
CASE char WHEN '' THEN 1 ELSE 6 END) x,
uchar
FROM `yourTable`,
UNNEST(REGEXP_EXTRACT_ALL(original, r'(\\u[abcdef0-9]{4})|.')) char WITH OFFSET AS pos
LEFT JOIN uchars u ON u.c = char
)
GROUP BY original
-- ORDER BY original
である - それはすべてのUnicode文字を抽出し、それらをデコードするので、そのまま非ユニコードが滞在残して元の文字列でそれらを置き換えます出力は以下のようになります。
original decoded
$-\u6599\u91d1\u304c\u9ad8\u3059\u304e\uff01\uff01\uff01 $-料金が高すぎ!!!
abcd abcd
また、 –