2017-11-15 10 views
2
Need help with extracting the firstname, middlename and lastname from a 
freetext fullname. How to extract them out with all these formats? 
Need to figure out how to handle format 2,5,9,7 

--fullname sample data 
DECLARE @name TABLE 
(fullname VARCHAR(100)) 
INSERT INTO @name SELECT 
'Malone,Susan M' UNION ALL SELECT --1 
'Conn,Chris G' UNION ALL SELECT --2 
'Van Pess,Wen B' UNION ALL SELECT --3 
'DESHPANDE, ANN W.' UNION ALL SELECT --4 
'Asif,LEE' UNION ALL SELECT --5 
'CERVANTES MANDY'UNION ALL SELECT --6 
'Bill, Dave' UNION ALL SELECT --7 
'SMITH,ANN M' UNION ALL SELECT --8 
'BHULLER, MATT' UNION ALL SELECT --9 
'KIM (DAUM), GAIL' UNION ALL SELECT --10 
'John.Mills'--11 

DECLARE @DELIMITER1 varchar(5), @DELIMITER2 varchar(5), @DELIMITER3 
varchar(5),@MAX_LENGTH int 
SET @DELIMITER1 = ',' 
SET @DELIMITER2 = ' ' 
SET @MAX_LENGTH = 50 


--LastName 
SELECT fullname, 
case when 
CHARINDEX(@DELIMITER2, fullname) >=1 
then replace(SUBSTRING(fullname, 1, CHARINDEX(@DELIMITER2, fullname) 
),',','')--replace to empty string if contains a "," 
when 
CHARINDEX(@DELIMITER2, fullname) =0 
then replace(SUBSTRING(fullname, 1, CHARINDEX(@DELIMITER1, fullname) 
),',','')--replace to empty string if contains a "," 
else null 
end as Lastname, 


--Middle Name 
CASE 
-- Middle fullname follows two-fullname first fullnames like Mary Ann 
    WHEN LEN(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 
2,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(fullname, 
CHARINDEX(@DELIMITER1,fullname)+ 2,@MAX_LENGTH), @DELIMITER2, '')) > 0 
--when len is greater than 0 
    THEN SUBSTRING(fullname, LEN(fullname) - CHARINDEX(@DELIMITER2, 
REVERSE(fullname))+2, @MAX_LENGTH) 
     ELSE NULL 
END AS Middlefullname, 


--First Name 
CASE 
-- Count the number of @DELIMITER2. Choose the string between the 

    WHEN LEN(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 
2,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(fullname, 
CHARINDEX(@DELIMITER1,fullname)+ 2,@MAX_LENGTH), @DELIMITER2, '')) > 0 -- 
--when len is greater than 0 
    Then replace(ltrim(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 
1, 
---need help here 
(LEN(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 2,@MAX_LENGTH))- 
LEN(SUBSTRING(fullname, LEN(fullname) - CHARINDEX(@DELIMITER2, 
REVERSE(fullname))+2, @MAX_LENGTH))))),'-','') --replace the "-" to empty 
string 
     ELSE ltrim(SUBSTRING(fullname,CHARINDEX(@DELIMITER1,fullname)+ 
    1,@MAX_LENGTH))--trimmed leading spaces 
END AS Firstname 
FROM @name 
order by fullname 
+0

このようなデータをデータベースに保存していますか?それは 'アンパック'を必要とし、 'アトミック'データではありません。いずれかの規則を破ると、最初の正規形に違反する。 – Zorkolot

答えて

0

「名前部分」をドットで連結する最初のロジック。私のコメントに注意してください。

--fullname sample data 
DECLARE @name TABLE 
(nameid int identity, fullname VARCHAR(100)) 
INSERT INTO @name SELECT 
'Malone,Susan M' UNION ALL SELECT --1 
'Conn,Chris G' UNION ALL SELECT --2 
'Van Pess,Wen B' UNION ALL SELECT --3 
'DESHPANDE, ANN W.' UNION ALL SELECT --4 
'Asif,LEE' UNION ALL SELECT --5 
'CERVANTES MANDY'UNION ALL SELECT --6 
'Bill, Dave' UNION ALL SELECT --7 
'SMITH,ANN M' UNION ALL SELECT --8 
'BHULLER, MATT' UNION ALL SELECT --9 
'KIM (DAUM), GAIL' UNION ALL SELECT --10 
'John.Mills';--11 


select original = fullname, prepped = dotted.fn, total.spaces 
from @name 
cross apply (values (patindex('%(%),%',fullname),fullname)) prep1(x,fn) -- check for parentheses: 
cross apply (values (-- remove parentheses if they exist, replace commas w/ dots, dots w/ spaces: 
    case 
    when prep1.x > 1 then substring(fn,1,x-1) + substring(fn,charindex(',',fn,x)+1,8000) 
    else replace(replace(fn, ',', '.'),'.',' ') 
    end)) prep(fn) 
cross apply (values (replace(rtrim(ltrim(replace(prep.fn,' ',' '))),' ',' '))) clean(fn) 
cross apply (values (len(clean.fn)-len(replace(clean.fn,' ','')))) total(spaces) -- count spaces 
cross apply (values (replace(clean.fn, ' ','.'))) dotted(fn); -- replace spaces with dots 

これは、残りはそうのようなparsenameを使用して行うことができ

original     prepped    spaces 
------------------------ -------------------- ------- 
Malone,Susan M   Malone.Susan.M  2 
Conn,Chris G    Conn.Chris.G   2 
Van Pess,Wen B   Van.Pess.Wen.B  3 
DESHPANDE, ANN W.  DESHPANDE.ANN.W  2 
Asif,LEE     Asif.LEE    1 
CERVANTES MANDY   CERVANTES.MANDY  1 
Bill, Dave    Bill.Dave   1 
SMITH,ANN M    SMITH.ANN.M   2 
BHULLER, MATT   BHULLER.MATT   1 
KIM (DAUM), GAIL   KIM.GAIL    1 
John.Mills    John.Mills   1 

を返します。

--fullname sample data 
DECLARE @name TABLE 
(nameid int identity, fullname VARCHAR(100)) 
INSERT INTO @name SELECT 
'Malone,Susan M' UNION ALL SELECT --1 
'Conn,Chris G' UNION ALL SELECT --2 
'Van Pess,Wen B' UNION ALL SELECT --3 
'DESHPANDE, ANN W.' UNION ALL SELECT --4 
'Asif,LEE' UNION ALL SELECT --5 
'CERVANTES MANDY'UNION ALL SELECT --6 
'Bill, Dave' UNION ALL SELECT --7 
'SMITH,ANN M' UNION ALL SELECT --8 
'BHULLER, MATT' UNION ALL SELECT --9 
'KIM (DAUM), GAIL' UNION ALL SELECT --10 
'John.Mills';--11 

with clean as 
(
    select original = fullname, prepped = dotted.fn, total.spaces 
    from @name 
    cross apply (values (patindex('%(%),%',fullname),fullname)) prep1(x,fn) -- check for parentheses: 
    cross apply (values (-- remove parentheses if they exist, replace commas w/ dots, dots w/ spaces: 
    case 
     when prep1.x > 1 then substring(fn,1,x-1) + substring(fn,charindex(',',fn,x)+1,8000) 
     else replace(replace(fn, ',', '.'),'.',' ') 
    end)) prep(fn) 
    cross apply (values (replace(rtrim(ltrim(replace(prep.fn,' ',' '))),' ',' '))) clean(fn) 
    cross apply (values (len(clean.fn)-len(replace(clean.fn,' ','')))) total(spaces) -- count spaces 
    cross apply (values (replace(clean.fn, ' ','.'))) dotted(fn) 
) 
select original, cleaned = 
    case spaces 
    when 1 then parsename(prepped,1)+' '+parsename(prepped,2) 
    when 2 then parsename(prepped,2)+' '+parsename(prepped,1)+' '+parsename(prepped,3) 
    when 3 then parsename(prepped,2)+' '+parsename(prepped,1)+' '+parsename(prepped,3)+ 
      ' '+parsename(prepped,4) 
    end 
from clean 

戻り値:

original    cleaned 
-------------------- ------------------ 
Malone,Susan M  Susan M Malone 
Conn,Chris G   Chris G Conn 
Van Pess,Wen B  Wen B Pess Van 
DESHPANDE, ANN W. ANN W DESHPANDE 
Asif,LEE    LEE Asif 
CERVANTES MANDY  MANDY CERVANTES 
Bill, Dave   Dave Bill 
SMITH,ANN M   ANN M SMITH 
BHULLER, MATT  MATT BHULLER 
KIM (DAUM), GAIL  GAIL KIM 
John.Mills   Mills John 

また、regex clr(mdq.regexreplace)を使用することもできます。数年前の私のpost on SSCに注意してください。

+0

ちょっとアランが助けてくれてありがとう! parsenameを使用しているときにデータに4ドット以上ある場合はどうすればよいですか? – Jason312

+0

あなたが3つ以上のパーツを持っていると、私が投稿したものはうまくいかないでしょう。私は今日/今晩3部以上を処理する方法を投稿しようとします。 –

関連する問題