2016-08-08 6 views
3

私はこのようなデータの何千もの行を持つテーブルを持っています。特定の文字列を返す方法

note 
---- 
LB MN IM 12 18 20 CIN # EW80851R This is tJ 
ified KGM nteal icne cinac12345T Cannot locate 
NCR Last verified 06 05 14 cin number ty56478P for the front 
OD 00612 Last Verified cin#ad89521Y Me side C 05 05 14 
SPC 0 VERIFIED PD IMB cin PU12301R Last Verified 
PC PO CON FC D Verified 02/29/2016 No Copy CIN#FG62301F 

各行には、単語cinとそれに続くいくつかの文字が含まれています。例えば

CIN # EW80851R, cinac12345T, cin number ty56478P, cin#ad89521Y, cin PU12301R, CIN#FG62301F

がどのように私は返すようにクエリを記述します。

note 
---- 
cinEW80851R 
cinac12345T 
cinty56478P 
cinad89521Y 
cinPU12301R 
CINFG62301F 

答えて

1

をXMLから少し助けを借りて、文字列パーサとクロスが適用され、あなたはあなたの全体のデータを処理することができますセット。

あなたはシンボルをいじりとしてあなただけ#持っていると仮定すると、私は2番目のレコード

Declare @YourTable table (ID int,Note varchar(max)) 
Insert into @YourTable values 
(1,'LB MN IM 12 18 20 CIN # EW80851R This is tJ ified KGM nteal icne cinac12345T Cannot locate NCR Last verified 06 05 14 cin number ty56478P for the front OD 00612 Last Verified cin#ad89521Y Me side C 05 05 14 SPC 0 VERIFIED PD IMB cin PU12301R Last Verified PC PO CON FC D Verified 02/29/2016 No Copy CIN#FG62301F'), 
(2,'L This is tJ KGM teal icne Last verified 06 05 14 for the front OD 00612 Last Verified cin#ZZ89256Y Me side C 05 05 14 SPC 0 VERIFIED PD IMB cin ZZPU12301R Last Verified PC PO CON FC D Verified 02/29/2016 No Copy CIN#ZZ62301F') 

-- Create a Mapping/Normaization Table 
Declare @Map table (MapSeq int,MapFrom varchar(25),MapTo varchar(25)) 
Insert into @Map values (1,char(13),' '),(2,char(10),' '),(3,' cin number ',' cin'),(4,' cin # ',' cin'),(5,' cin#',' cin'),(6,' cin ',' cin') 

-- Convert your Base Data to XML 
Declare @XML XML,@String varchar(max) 
Set @XML = (Select KeyID=ID,String=+' '+Note+' ' from @YourTable For XML RAW) 

-- Convert XML to Varchar(max) and Apply Global Search & Replace 
Select @String = cast(@XML as varchar(max)) 
Select @String = Replace(@String,MapFrom,MapTo) From @Map Order by MapSeq 

-- Convert Back to XML 
Select @XML = cast(@String as XML) 

-- Generate Final Results 
Select A.ID 
     ,CIN = B.Key_Value 
From (
     Select ID  = t.col.value('@KeyID', 'int') 
       ,NewString = t.col.value('@String', 'varchar(max)') 
     From @XML.nodes('/row') AS t (col) 
    ) A 
Cross Apply (Select * from [dbo].[udf-Str-Parse](A.NewString,' ') where Key_Value like 'cin%') B 

戻り

ID CIN 
1 cinEW80851R 
1 cinac12345T 
1 cinty56478P 
1 cinad89521Y 
1 cinPU12301R 
1 cinFG62301F 
2 cinZZ89256Y  << Dummy Record 
2 cinZZPU12301R << Dummy Record 
2 cinZZ62301F  << Dummy Record 

を追加したことをUDF

CREATE FUNCTION [dbo].[udf-Str-Parse] (@String varchar(max),@Delimeter varchar(10)) 
--Usage: Select * from [dbo].[udf-Str-Parse]('Dog,Cat,House,Car',',') 
--  Select * from [dbo].[udf-Str-Parse]('John Cappelletti was here',' ') 
--  Select * from [dbo].[udf-Str-Parse]('id26,id46|id658,id967','|') 
--  Select * from [dbo].[udf-Str-Parse]('hello world. It. is. . raining.today','.') 

Returns @ReturnTable Table (Key_PS int IDENTITY(1,1), Key_Value varchar(max)) 
As 
Begin 
    Declare @XML xml;Set @XML = Cast('<x>' + Replace(@String,@Delimeter,'</x><x>')+'</x>' as XML) 
    Insert Into @ReturnTable Select Key_Value = ltrim(rtrim(String.value('.', 'varchar(max)'))) FROM @XML.nodes('x') as T(String) 
    Return 
End 
+0

重大な解決策+1 Iのみc正規化テーブルにすべての非数字記号と非文字記号を追加することを推奨します。 – gofr1

1

に注意しましょう、と長さ必要な部分は8で、この値の3〜7の部分はintエゲル - あなたは、このようにXML(XQueryを)使用しようとすることができます:あなたが提供しているテーブル部から

DECLARE @xml xml 

SELECT @xml = (
    SELECT CAST('<d>'+REPLACE(REPLACE(SUBSTRING(Note,CHARINDEX('cin',Note)+3,LEN(Note)),' ','#'),'#','</d><d>')+'</d>' as xml) 
    FROM YourTable 
    FOR XML PATH('') 
) 

SELECT t.v.value('.','nvarchar(8)')   
FROM @xml.nodes('/d') as t(v) 
WHERE LEN(t.v.value('.','nvarchar(8)')) = 8 --check if length = 8 
    AND ISNULL(t.v.value('substring(string(.), 3, 5) cast as xs:int ?','int'),0) != 0 --check if part of value is int 

は、出力は次のようになります(@ JohnCappelletiのソリューションに触発)

EW80851R 
ac12345T 
ty56478P 
ad89521Y 
PU12301R 
FG62301F 

EDIT

さらに複雑な解決策:

--Declare variables 
DECLARE @xml xml, @string nvarchar(max) = '' 
--Rows becomes one big string 
SELECT @string = @string + SUBSTRING(Note,CHARINDEX('cin',Note)+3,LEN(Note)) +' ' 
FROM YourTable y 
--CTE with all not numeric and not alphabetical charecters for normalization 
;WITH Symbols AS (
    SELECT 0 as d, 
      CHAR(0) as s, 
      1 as isUsed 
    UNION ALL 
    SELECT d+1, 
      CHAR(d+1), 
      CASE WHEN d+1 between 48 and 57 
        OR d+1 between 65 and 90 
        OR d+1 between 97 and 122 THEN 0 ELSE 1 END 
    FROM Symbols 
    WHERE d < 255 
) 
--replace all not numeric and not alphabetic 
SELECT @string = REPLACE(@string,s.s,'#') 
FROM Symbols s 
WHERE isUsed = 1 
OPTION(MAXRECURSION 0) 
--convert to xml 
SELECT @xml= CAST('<d>'+REPLACE(@string,'#','</d><d>')+'</d>' as xml) 
--convert XML to table 
SELECT t.v.value('.','nvarchar(8)')   
FROM @xml.nodes('/d') as t(v) 
WHERE LEN(t.v.value('.','nvarchar(8)')) = 8 --check if length = 8 
    AND ISNULL(t.v.value('substring(string(.), 3, 5) cast as xs:int ?','int'),0) != 0 
    --check if part of value is int 
+0

Plus1 OK、それはクールだった - 私の答えを削除した –

関連する問題