我有一個列值,如:
輸入:
This is ssn12345 string dob 12-09-1998 stringThis is ssn12345786 stringabc string
輸出:
This is ******** string ************** stringThis is *********** stringabc string
每當我們找到 ssn 或 dob 時,值都應該被屏蔽,如上例所示。動態長度的字串。
我正在嘗試以下查詢 -
SELECT CASE
WHEN start_pos > 0
THEN SUBSTR( 'This is ssn12345 string dob 12-09-1998 string', 1, start_pos - 1)
|| RPAD('*', end_pos - start_pos, '*')
|| SUBSTR('This is ssn12345 string dob 12-09-1998 string', end_pos)
ELSE 'This is ssn12345 string dob 12-09-1998 string'
END AS masked_value
FROM (
SELECT 'This is ssn12345 string dob 12-09-1998 string',
REGEXP_INSTR( 'This is ssn12345 string dob 12-09-1998 string', 'ssn[0-9. -] ' || 'dob[0-9. -] ', 1, 1, 0, NULL) AS start_pos,
REGEXP_INSTR( 'This is ssn12345 string dob 12-09-1998 string', 'ssn[0-9. -] ' || 'dob[0-9. -] ', 1, 1, 1, NULL) AS end_pos
FROM dual
)
我正在嘗試上面的查詢沒有像上面那樣得到預期的結果它只有在我們有 - 輸入時才起作用 -This is ssn12345 dob 12-09-1998 string然后它將輸出屏蔽為 - This is ******** ************** string
uj5u.com熱心網友回復:
從您之前的問題中,只需更改您匹配的正則運算式:
WITH data ( value, masked_value, end_pos ) AS (
SELECT value,
REGEXP_REPLACE(
value,
'ssn([ #]?\d ) |dob ?(-?\d ) ',
RPAD(
'*',
LENGTH(
REGEXP_SUBSTR(value, 'ssn([ #]?\d ) |dob ?(-?\d ) ', 1, 1)
),
'*'
),
1,
1
),
REGEXP_INSTR(
value,
'ssn([ #]?\d ) |dob ?(-?\d ) ',
1,
1,
1
)
FROM table_name
UNION ALL
SELECT value,
REGEXP_REPLACE(
masked_value,
'ssn([ #]?\d ) |dob ?(-?\d ) ',
RPAD(
'*',
LENGTH(
REGEXP_SUBSTR(masked_value, 'ssn([ #]?\d ) |dob ?(-?\d ) ', end_pos, 1)
),
'*'
),
end_pos,
1
),
REGEXP_INSTR(
masked_value,
'ssn([ #]?\d ) |dob ?(-?\d ) ',
end_pos,
1,
1
)
FROM data
WHERE end_pos > 0
)
SEARCH DEPTH FIRST BY value SET value_order
SELECT value, masked_value
FROM data
WHERE end_pos = 0;
其中,對于樣本資料:
CREATE TABLE table_name ( value ) AS
SELECT 'This is ssn1 string' FROM DUAL UNION ALL
SELECT 'This is ssn12 string' FROM DUAL UNION ALL
SELECT 'This is ssn12345 string' FROM DUAL UNION ALL
SELECT 'This is ssn123456789 string' FROM DUAL UNION ALL
SELECT 'This is ssn#12345 string ssn 5678 9765' FROM DUAL UNION ALL
SELECT 'This is ssn12345 string dob 12-09-1998 string' FROM DUAL UNION ALL
SELECT 'This is dob 12-09-1998 string' FROM DUAL;
輸出:
價值 MASKED_VALUE 這是 dob 12-09-1998 字串 這是 **************** 字串 這是 ssn#12345 字串 ssn 5678 9765 這是 ********* 字串 ************* 這是 ssn1 字串 這是****字串 這是 ssn12 字串 這是*****字串 這是 ssn12345 字串 這是********字串 這是 ssn12345 字串 dob 12-09-1998 字串 這是 ******** 字串 **************** 字串 這是 ssn123456789 字串 這是 ************ 字串
db<>在這里擺弄
uj5u.com熱心網友回復:
如果要替換固定次數的模式,則可以使用:
SELECT value,
CASE
WHEN start_pos2 > 0
THEN SUBSTR( value, 1, start_pos1 - 1)
|| RPAD('*', end_pos1 - start_pos1 - 1, '*')
|| SUBSTR(value, end_pos1, start_pos2 - end_pos1)
|| RPAD('*', end_pos2 - start_pos2, '*')
|| SUBSTR(value, end_pos2)
WHEN start_pos1 > 0
THEN SUBSTR( value, 1, start_pos1 - 1)
|| RPAD('*', end_pos1 - start_pos1, '*')
|| SUBSTR(value, end_pos1)
ELSE value
END AS masked_value
FROM (
SELECT value,
REGEXP_INSTR( value, 'ssn([ #]*\d ) |dob([. -]*\d ) ', 1, 1, 0, NULL) AS start_pos1,
REGEXP_INSTR( value, 'ssn([ #]*\d ) |dob([. -]*\d ) ', 1, 1, 1, NULL) AS end_pos1,
REGEXP_INSTR( value, 'ssn([ #]*\d ) |dob([. -]*\d ) ', 1, 2, 0, NULL) AS start_pos2,
REGEXP_INSTR( value, 'ssn([ #]*\d ) |dob([. -]*\d ) ', 1, 2, 1, NULL) AS end_pos2
FROM table_name
);
注意:這僅適用于字串中的前兩次出現。如果您需要匹配動態的出現次數,那么您將需要使用遞回查詢。
其中,對于樣本資料:
CREATE TABLE table_name ( value ) AS
SELECT 'This is ssn1 string' FROM DUAL UNION ALL
SELECT 'This is ssn12 string' FROM DUAL UNION ALL
SELECT 'This is ssn12345 string' FROM DUAL UNION ALL
SELECT 'This is ssn123456789 string' FROM DUAL UNION ALL
SELECT 'This is ssn#12345 string ssn 5678 9765' FROM DUAL UNION ALL
SELECT 'This is ssn12345 string dob 12-09-1998 string' FROM DUAL UNION ALL
SELECT 'This is dob 12-09-1998 string' FROM DUAL;
輸出:
價值 MASKED_VALUE 這是 ssn1 字串 這是****字串 這是 ssn12 字串 這是*****字串 這是 ssn12345 字串 這是********字串 這是 ssn123456789 字串 這是 ************ 字串 這是 ssn#12345 字串 ssn 5678 9765 這是 ******** 字串 ************ 這是 ssn12345 字串 dob 12-09-1998 字串 這是 ******* 字串 *************** 字串 這是 dob 12-09-1998 字串 這是 **************** 字串
db<>在這里擺弄
uj5u.com熱心網友回復:
這有效,但它用固定數量的星號替換任何匹配的字串。
WITH sample_data (c) AS
(
SELECT 'This is ssn12. 34-5 string dob 12-09-1998 string' FROM DUAL UNION ALL
SELECT '5 Strings: ssn12. 34-5, dob 12-09-1998, nr 3: ssn18898, nr 4: dob 01-01-2000, nr 5:ssn1663' FROM DUAL UNION ALL
SELECT 'This is ssn12345786 stringabc string' FROM DUAL
)
SELECT regexp_replace(c,'((ssn[0-9. -] )|(dob [0-9. -] ))[0-9.-]','*******') FROM sample_data;
This is ******* string ******* string
5 Strings: *******, *******, nr 3: *******, nr 4: *******, nr 5:*******
This is ******* stringabc string
轉載請註明出處,本文鏈接:https://www.uj5u.com/caozuo/327526.html
上一篇:按日期間隔查找重復項
