자유롭게 질의 및 응답을 할 수 있는 게시판입니다. 개발자 여러분의 답변이 큰 도움이 됩니다.
- 제품설치/등록 오류 문의: 설치/등록 Q&A 이용 (제품 구매 고객 한정)
Delphi [질문] 험프리님 말씀하신대로 제가 작성한 내용 댓글 붙였습니다. 어떻게 수정하면 원하는 결과가 나오는지요? 델파이 정규식에서 어떻게 검색조건을 줘야 원하는 결과가 나올까요?
2016.04.05 00:24
본 게시판 사용시 당부 사항
* 이 게시판은 자유롭게 질문을 올리고 자발적으로 답변을 공유하는 게시판입니다.
* 어느 누구도 답변을 달아야만 하는 책임은 없습니다.
* 따라서 질문을 올리실 때에는 최대한 자세하고 정중하게 질문을 올려 주세요.
* 최대한 질문을 자세히 올려야 답변도 자세히 올라 옵니다.
* 본 질문에 답변을 주시는 여러 개발자님들께 미리 감사드립니다.
-----------------------------------------------------------------------------------------------
프로그램으로 and trees bearing fruit, wherein is the seed 문장 에서
검색조건 and the-tree 으로 검색하여 비슷한 문장 또는 같은 문장을 을 찾고자 검색 하면 결과가
and t
이렇게 나옵니다
제가 원하는 결과는 and the-tree 문장으로 검색하여도 유사한 문장 인 and tree 라는 문장으로 검색되는 조건을 구현 하고 싶습니다
댓글 3
-
험프리
2016.04.05 11:52
-
delphi
2016.04.05 13:51
var pat: TPattern;
str,maxstr, bi,bi_str: string;
begin
// Create pattern
bi := '';
bi_str := '';
edit4.text := '';
pat:=PSE([PSEAny, PSEAnyStr[str],
PSEAny, '||||', PSEAny, PSEVar(str), PSEAny]);
try
// Activate pattern on some string
pat.Activate(EditA.Text + '||||' + EditB.Text);
maxstr:='';
// for all successful matches
while pat.Match do begin
if Length(str) > Length(maxstr) then begin
// LogMemo.Lines.Add('Found "'+str+'"');
maxstr:=str;
if ((bi <> '') and (bi <> (copy(str,1,1)))) then begin
edit4.text := edit4.text + bi_str + ' ';
bi := '';
bi_str := '';
end;
bi := copy(str,1,1);
bi_str := str;
end;
pat.MatchNext;
end;
finally
pat.Free;
end;
if bi <> '' then begin
edit4.text := edit4.text + bi_str;
bi := '';
bi_str := '';
end;
// Write result
if maxstr <> '' then
bonmun2_edit.Lines.Add(edit4.text)
else
bonmun2_edit.Lines.Add('');
================= PSE 관련
type
PatternException = class(Exception);
TPattern = class;
TPatItemType = (piNil,piSimple,piPattern);
TPatItem = class
private
function GetMatchLen: integer;
public
ItemType: TPatItemType;
pSimple: string;
pPattern: TPattern;
constructor Create(const VarRec: TVarRec);
destructor Destroy; override;
property MatchLen: integer read GetMatchLen;
end;
TPatList = class(TList)
private
Owner: TPattern;
function GetItems(Index: integer): TPatItem;
public
constructor Create(AOwner: TPattern; const Pattern: array of const);
destructor Destroy; override;
procedure NewItem(const VarRec: TVarRec);
property Items[Index: integer]: TPatItem read GetItems; default;
end;
TPattern = class(TObject)
private
Line: string;
function GetSetValue(var AValue: string): TPattern;
function GetMatchStr: string;
function GetMatchNextPtr: PChar;
protected
PStack: integer;
Pattern: TPatList;
Value: PString;
OldValue: string;
PStart,PS: PChar;
CanResume: Boolean;
FMatch: Boolean;
FMatchLen: integer;
procedure MatchOK;
procedure MatchFail;
procedure BackValue;
procedure Init; virtual;
function CompareStr(S: PChar; Len: integer): Boolean; virtual;
public
constructor Create(const APattern: array of const);
destructor Destroy; override;
procedure Activate(const S: string);
procedure ActivatePtr(P: PChar);
function Match: Boolean;
procedure MatchNext; virtual; abstract;
property MatchPtr: PChar read PS;
property MatchNextPtr: PChar read GetMatchNextPtr;
property MatchStr: string read GetMatchStr;
property MatchLen: integer read FMatchLen;
property IsMatch: Boolean read FMatch;
property SetValue[var AValue: string]: TPattern read GetSetValue; default;
end;
TPSE = class(TPattern)
public
procedure MatchNext; override;
end;
TPSEOneOf = class(TPattern)
public
procedure MatchNext; override;
end;
TPSEMayBe = class(TPSE)
private
SecondResume: Boolean;
protected
procedure Init; override;
public
procedure MatchNext; override;
end;
TPSECharSet = class(TPattern)
private
CharSet: set of Char;
MaxLen: integer;
public
// SCharSet:: 'asd..g0..5.8' -> ['a','s','d'..'g','0'..'5','.','8']
// if #0 in CharSet then goal may be empty length
constructor Create(const SCharSet: string);
constructor CreateLen(const SCharSet: string; AMaxLen: integer);
procedure MatchNext; override;
end;
TPSEVar = class(TPattern)
private
Value: PString;
public
constructor Create(var AValue: string);
procedure MatchNext; override;
end;
TPSESetLen = class(TPSE)
private
MinLen,MaxLen: integer;
public
constructor Create(const APattern: array of const; Min,Max: integer);
procedure MatchNext; override;
end;
TPatternCallBack = function: Boolean;
TPSEBack = class(TPattern)
private
CallBack: TPatternCallBack;
public
constructor Create(ACallBack: TPatternCallBack);
procedure MatchNext; override;
end;
// sequential group of patterns
function PSE(const APattern: array of const): TPattern;
// APattern[1] or APattern[2] or APattern[3] or ...
function PSEOneOf(const APattern: array of const): TPattern;
// APattern or nothink
function PSEMayBe(const APattern: array of const): TPattern;
// any string with symbols from charset
function PSECharSet(const SCharSet: string): TPattern;
// any string with symbols from charset and with length less or eq to AMaxLen
function PSECharSetLen(const SCharSet: string; AMaxLen: integer): TPattern;
// single char from charset
function PSEChar(const SCharSet: string): TPattern;
// string eq to AValue.
function PSEVar(var AValue: string): TPattern;
// Pattern, but with chars count in range Min..Max
function PSESetLen(const APattern: array of const; Min,Max: integer): TPattern;
// Pattern with user defined callback
function PSEBack(ACallBack: TPatternCallBack): TPattern;
// some frequently used predefined patterns:
function PSEDigit: TPattern; // 0..9
function PSEDigits: TPattern; // <digit>{<digit>}
function PSEInteger: TPattern; // [+-]<digits>
function PSENumber: TPattern; // [+-]<digits>[.<digits>]
function PSEIdent: TPattern; // a..zA..Z_{a..zA..Z0..9_}
function PSEAny: TPattern; // any string or empty
function PSEAnyStr: TPattern; // any string with length > 0
function PSEAnyChar: TPattern; // any char
implementation
function PSEBack(ACallBack: TPatternCallBack): TPattern;
begin
Result:=TPSEBack.Create(ACallBack);
end;
function PSEAnyStr: TPattern;
begin
Result:=PSECharSet(#1'..'#255);
end;
function PSEAnyChar: TPattern;
begin
Result:=PSECharSetLen(#1'..'#255,1);
end;
function PSEAny: TPattern;
begin
Result:=PSECharSet(#1'..'#255#0);
end;
function PSESetLen(const APattern: array of const; Min,Max: integer): TPattern;
begin
Result:=TPSESetLen.Create(APattern,Min,Max);
end;
function PSEMayBe(const APattern: array of const): TPattern;
begin
Result:=TPSEMayBe.Create(APattern);
end;
function PSEVar(var AValue: string): TPattern;
begin
Result:=TPSEVar.Create(AValue);
end;
function PSENumber: TPattern;
begin
Result:=PSE([PSEChar('+-'#0),PSEDigits,PSEMayBe([PSEMayBe([DecimalSeparator]),PSEDigits])]);
end;
function PSEInteger: TPattern;
begin
Result:=PSE([PSEChar('+-'#0),PSEDigits]);
end;
function PSEIdent: TPattern;
begin
Result:=PSE([PSEChar('a..zA..Z_'),PSECharSet('a..zA..Z_0..9'#0)]);
end;
function PSEDigits: TPattern;
begin
Result:=PSECharSet('0..9');
end;
function PSEDigit: TPattern;
begin
Result:=PSEChar('0..9');
end;
function PSE(const APattern: array of const): TPattern;
begin
Result:=TPSE.Create(APattern);
end;
function PSEOneOf(const APattern: array of const): TPattern;
begin
Result:=TPSEOneOf.Create(APattern);
end;
function PSEChar(const SCharSet: string): TPattern;
begin
Result:=TPSECharSet.CreateLen(SCharSet,1);
end;
function PSECharSet(const SCharSet: string): TPattern;
begin
Result:=TPSECharSet.Create(SCharSet);
end;
function PSECharSetLen(const SCharSet: string; AMaxLen: integer): TPattern;
begin
Result:=TPSECharSet.CreateLen(SCharSet,AMaxLen);
end;
//------------------------------ TPSEBack --------------------------------------
constructor TPSEBack.Create(ACallBack: TPatternCallBack);
begin
inherited Create([nil]);
CallBack:=ACallBack;
end;
procedure TPSEBack.MatchNext;
begin
if Assigned(CallBack) then
if CallBack then MatchOK
else MatchFail
else MatchOK;
CanResume:=False;
end;
//---------------------------- TPSESetLen --------------------------------------
constructor TPSESetLen.Create(const APattern: array of const; Min,Max: integer);
var t: integer;
begin
inherited Create(APattern);
if Min < 0 then Min:=-1;
if Max < 0 then Max:=-1;
if (Min >= 0) and (Max >= 0) and (Min > Max) then begin
t:=Min; Min:=Max; Max:=t;
end;
MinLen:=Min; MaxLen:=Max;
end;
procedure TPSESetLen.MatchNext;
begin
repeat
inherited MatchNext;
if not FMatch then Break;
if MinLen >= 0 then
FMatch:=FMatch and (MatchLen >= MinLen);
if MaxLen >= 0 then
FMatch:=FMatch and (MatchLen <= MaxLen);
until FMatch;
end;
//------------------------------- TPSEVar --------------------------------------
constructor TPSEVar.Create(var AValue: string);
begin
inherited Create([nil]);
Value:=@AValue;
end;
procedure TPSEVar.MatchNext;
begin
if CompareStr(PChar(Value^),Length(Value^)) then begin
PS:=PS+Length(Value^);
MatchOK;
end else
MatchFail;
CanResume:=False;
end;
//------------------------------- TPSEMayBe ------------------------------------
procedure TPSEMayBe.MatchNext;
begin
if not CanResume then MatchOK
else begin
CanResume:=SecondResume;
inherited MatchNext;
SecondResume:=True;
end;
end;
procedure TPSEMayBe.Init;
begin
inherited Init;
SecondResume:=False;
end;
//------------------------------ TPSECharSet -----------------------------------
procedure TPSECharSet.MatchNext;
begin
if (PS-PStart) >= MaxLen then
MatchFail
else
if not CanResume and (#0 in CharSet) then
MatchOK
else if (PS^ <> #0) and (PS^ in CharSet) then begin
PS:=PS+1;
MatchOK;
CanResume:=((PS-PStart) < MaxLen) and (PS^ in CharSet) and (PS^ <> #0);
end else
MatchFail;
end;
constructor TPSECharSet.CreateLen(const SCharSet: string; AMaxLen: integer);
var j: integer;
LastChar: Char;
begin
inherited Create([nil]);
MaxLen:=AMaxLen;
if MaxLen < 0 then MaxLen:=0;
CharSet:=[];
if Length(SCharSet) > 0 then begin
j:=1; LastChar:=#0;
while j <= Length(SCharSet) do begin
if (SCharSet[j] = '.') and (j in [2..Length(SCharSet)-2])
and (SCharSet[j+1] = '.') then begin
CharSet:=CharSet+[LastChar..SCharSet[j+2]];
LastChar:=SCharSet[j+2];
Inc(j,3);
end else begin
LastChar:=SCharSet[j];
CharSet:=CharSet+[LastChar];
Inc(j);
end;
end;
end;
end;
constructor TPSECharSet.Create(const SCharSet: string);
begin
CreateLen(SCharSet,$7FFFFFFF);
end;
//------------------------------ TPSEOneOf ---------------------------------------
procedure TPSEOneOf.MatchNext;
label L_OK;
var len: integer;
begin
PS:=PStart;
while PStack < Pattern.Count do begin
case Pattern[PStack].ItemType of
piSimple:
begin
len:=Length(Pattern[PStack].pSimple);
if CompareStr(PChar(Pattern[PStack].pSimple),len) then begin
PS:=PS+len;
Inc(PStack);
goto L_OK;
end;
end;
piPattern:
begin
if not CanResume then
Pattern[PStack].pPattern.ActivatePtr(PS)
else
Pattern[PStack].pPattern.MatchNext;
CanResume:=False;
if Pattern[PStack].pPattern.IsMatch then begin
PS:=PS+Pattern[PStack].pPattern.MatchLen;
goto L_OK;
end;
end;
end;
Inc(PStack);
end;
MatchFail;
Exit;
L_OK:
MatchOK;
end;
//-------------------------------- TPSE ----------------------------------------
procedure TPSE.MatchNext;
label L_FAIL;
var len: integer;
function GoBack: Boolean;
var j: integer;
begin
repeat
Dec(PStack);
if PStack < 0 then begin
CanResume:=False;
Result:=False;
Exit;
end;
if Pattern[PStack].ItemType = piPattern then
Pattern[PStack].pPattern.BackValue;
until (Pattern[PStack].ItemType = piPattern) and
Pattern[PStack].pPattern.CanResume;
Result:=(PStack >= 0) and (Pattern[PStack].ItemType = piPattern);
CanResume:=Result;
if Result then begin
PS:=PStart;
if PStack > 0 then
for j:=0 to PStack-1 do
PS:=PS+Pattern[j].MatchLen;
end;
end;
begin
if PStack < 0 then goto L_FAIL;
if CanResume then
if not GoBack then goto L_FAIL;
while PStack < Pattern.Count do begin
case Pattern[PStack].ItemType of
piSimple:
begin
len:=Length(Pattern[PStack].pSimple);
if CompareStr(PChar(Pattern[PStack].pSimple),len) then
PS:=PS+len
else
if GoBack then Continue
else goto L_FAIL;
end;
piPattern:
begin
if not CanResume then
Pattern[PStack].pPattern.ActivatePtr(PS)
else
Pattern[PStack].pPattern.MatchNext;
CanResume:=False;
if Pattern[PStack].pPattern.IsMatch then
PS:=PS+Pattern[PStack].pPattern.MatchLen
else
if GoBack then Continue
else goto L_FAIL;
end;
end;
Inc(PStack);
end;
MatchOK;
Exit;
L_FAIL:
MatchFail;
end;
//------------------------------- TPattern ---------------------------------
function TPattern.GetSetValue(var AValue: string): TPattern;
begin
Value:=@AValue;
Result:=Self;
end;
function TPattern.CompareStr(S: PChar; Len: integer): Boolean;
begin
Result:=StrLComp(S,PS,Len) = 0;
end;
procedure TPattern.MatchFail;
begin
FMatch:=False;
CanResume:=False;
FMatchLen:=0;
if Value <> nil then Value^:='';
end;
procedure TPattern.MatchOK;
begin
FMatch:=True;
CanResume:=True;
FMatchLen:=PS-PStart;
if Value <> nil then Value^:=MatchStr;
end;
function TPattern.Match: Boolean;
begin
while FMatch and (MatchNextPtr^ <> #0) do MatchNext;
Result:=FMatch and (MatchNextPtr^ = #0);
end;
function TPattern.GetMatchNextPtr: PChar;
begin
Result:=PStart+MatchLen;
end;
function TPattern.GetMatchStr: string;
begin
SetLength(Result,MatchLen);
if MatchLen > 0 then
StrLCopy(PChar(Result),PStart,MatchLen);
end;
procedure TPattern.Activate(const S: string);
begin
Line:=S; PStart:=PChar(Line); PS:=PStart;
Init;
MatchNext;
end;
procedure TPattern.ActivatePtr(P: PChar);
begin
Line:=''; PStart:=P; PS:=P;
Init;
MatchNext;
end;
constructor TPattern.Create(const APattern: array of const);
begin
inherited Create;
Pattern:=TPatList.Create(Self,APattern);
Value:=nil;
Line:=''; PStart:=PChar(Line); PS:=PStart;
end;
destructor TPattern.Destroy;
begin
Pattern.Free;
inherited Destroy;
end;
procedure TPattern.BackValue;
begin
if Value <> nil then Value^:=OldValue;
end;
procedure TPattern.Init;
begin
PStack:=0; FMatch:=False; CanResume:=False;
if Value <> nil then OldValue:=Value^;
end;
//-------------------------------- TPatList ------------------------------------
constructor TPatList.Create(AOwner: TPattern; const Pattern: array of const);
var j: integer;
begin
inherited Create;
Owner:=AOwner;
for j:=Low(Pattern) to High(Pattern) do
NewItem(Pattern[j]);
end;
function TPatList.GetItems(Index: integer): TPatItem;
begin
Result:=TPatItem(inherited Items[Index]);
end;
destructor TPatList.Destroy;
var Item: TPatItem;
j: integer;
begin
for j:=0 to Pred(Count) do begin
Item:=Items[j];
Item.Free;
end;
inherited Destroy;
end;
procedure TPatList.NewItem(const VarRec: TVarRec);
begin
inherited Add(TPatItem.Create(VarRec));
end;
//-------------------------------- TPatItem ------------------------------------
constructor TPatItem.Create(const VarRec: TVarRec);
begin
inherited Create;
ItemType:=piNil;
case VarRec.VType of
vtChar:
begin
pSimple:=VarRec.VChar;
ItemType:=piSimple;
end;
vtPChar:
begin
pSimple:=VarRec.VPChar;
ItemType:=piSimple;
end;
vtString:
begin
pSimple:=VarRec.VString^;
ItemType:=piSimple;
end;
vtAnsiString:
begin
pSimple:=string(VarRec.VAnsiString);
ItemType:=piSimple;
end;
vtObject:
if VarRec.VObject <> nil then
if VarRec.VObject is TPattern then begin
pPattern:=TPattern(VarRec.VObject);
ItemType:=piPattern;
end;
end;
end;
function TPatItem.GetMatchLen: integer;
begin
case ItemType of
piSimple: Result:=Length(pSimple);
piPattern: Result:=pPattern.MatchLen;
else Result:=0;
end;
end;
destructor TPatItem.Destroy;
begin
if ItemType = piPattern then pPattern.Free;
inherited Destroy;
end;
-
얄리
2016.04.06 08:27
delphi님께서 질문하신 것은 델파이 프로그램이나 문법 관련 질문이 아니라 로직을 어떻게 구현할지에 대한 아이디어 문제라 이곳 게시판에서 답이 쉽게 나올지 모르겠습니다.
일단 "유사"한 것을 검색하고 싶다고 하셨는데 "유사"하다라는 것이 어떤 기준일까요?
검색어의 단어 중 몇 개가 포함되어야 유사하다고 판단할 것인지?
"and the-tree" 라는 검색어일 경우 불필요한 특수문자인 "-"는 검색할 단어에서 제외해야 할테고 이와 마찬가지로 의미가 없는 정관사인 "the"와 "and"도 제외하는 것이 일반적으로 더 좋은 결과를 가져옵니다. 물론 필요하다면 "and", "the", "tree" 모두를 검색 해야 할 때도 있겠죠. 그 다음에는 이 검색어가 문장에서 몇 개까지 포함된 것을 가져올 것인지도 결정해야 할 것입니다.
이러한 것들은 답이 정해진 것이 아니라 알고리즘 및 개발자의 노하우 문제라 스스로 고민하고 코딩해야할 영역입니다.
형태소 분석, 유사어 분석 등의 알고리즘이나 컴포넌트 들을 알아보시는 것이 좋을 것 같습니다. 통합검색 솔루션(형태소 분석 등)이나 표절검색 솔루션을 하신 분들이 있다면 잘 아실것 같은데 말이죠.
Delphi [질문] 험프리님 말씀하신대로 제가 작성한 내용 댓글 붙였습니다. 어떻게 수정하면 원하는 결과가 나오는지요? 델파이 정규식에서 어떻게 검색조건을 줘야 원하는 결과가 나올까요?
2016.04.05 00:24
본 게시판 사용시 당부 사항
* 이 게시판은 자유롭게 질문을 올리고 자발적으로 답변을 공유하는 게시판입니다.
* 어느 누구도 답변을 달아야만 하는 책임은 없습니다.
* 따라서 질문을 올리실 때에는 최대한 자세하고 정중하게 질문을 올려 주세요.
* 최대한 질문을 자세히 올려야 답변도 자세히 올라 옵니다.
* 본 질문에 답변을 주시는 여러 개발자님들께 미리 감사드립니다.
-----------------------------------------------------------------------------------------------
프로그램으로 and trees bearing fruit, wherein is the seed 문장 에서
검색조건 and the-tree 으로 검색하여 비슷한 문장 또는 같은 문장을 을 찾고자 검색 하면 결과가
and t
이렇게 나옵니다
제가 원하는 결과는 and the-tree 문장으로 검색하여도 유사한 문장 인 and tree 라는 문장으로 검색되는 조건을 구현 하고 싶습니다
댓글 3
-
험프리
2016.04.05 11:52
-
delphi
2016.04.05 13:51
var pat: TPattern;
str,maxstr, bi,bi_str: string;
begin
// Create pattern
bi := '';
bi_str := '';
edit4.text := '';
pat:=PSE([PSEAny, PSEAnyStr[str],
PSEAny, '||||', PSEAny, PSEVar(str), PSEAny]);
try
// Activate pattern on some string
pat.Activate(EditA.Text + '||||' + EditB.Text);
maxstr:='';
// for all successful matches
while pat.Match do begin
if Length(str) > Length(maxstr) then begin
// LogMemo.Lines.Add('Found "'+str+'"');
maxstr:=str;
if ((bi <> '') and (bi <> (copy(str,1,1)))) then begin
edit4.text := edit4.text + bi_str + ' ';
bi := '';
bi_str := '';
end;bi := copy(str,1,1);
bi_str := str;
end;pat.MatchNext;
end;
finally
pat.Free;
end;if bi <> '' then begin
edit4.text := edit4.text + bi_str;
bi := '';
bi_str := '';
end;
// Write result
if maxstr <> '' then
bonmun2_edit.Lines.Add(edit4.text)
else
bonmun2_edit.Lines.Add('');================= PSE 관련
type
PatternException = class(Exception);
TPattern = class;TPatItemType = (piNil,piSimple,piPattern);
TPatItem = class
private
function GetMatchLen: integer;
public
ItemType: TPatItemType;
pSimple: string;
pPattern: TPattern;
constructor Create(const VarRec: TVarRec);
destructor Destroy; override;
property MatchLen: integer read GetMatchLen;
end;TPatList = class(TList)
private
Owner: TPattern;
function GetItems(Index: integer): TPatItem;
public
constructor Create(AOwner: TPattern; const Pattern: array of const);
destructor Destroy; override;
procedure NewItem(const VarRec: TVarRec);
property Items[Index: integer]: TPatItem read GetItems; default;
end;TPattern = class(TObject)
private
Line: string;
function GetSetValue(var AValue: string): TPattern;
function GetMatchStr: string;
function GetMatchNextPtr: PChar;
protected
PStack: integer;
Pattern: TPatList;
Value: PString;
OldValue: string;
PStart,PS: PChar;
CanResume: Boolean;
FMatch: Boolean;
FMatchLen: integer;
procedure MatchOK;
procedure MatchFail;
procedure BackValue;
procedure Init; virtual;
function CompareStr(S: PChar; Len: integer): Boolean; virtual;
public
constructor Create(const APattern: array of const);
destructor Destroy; override;procedure Activate(const S: string);
procedure ActivatePtr(P: PChar);function Match: Boolean;
procedure MatchNext; virtual; abstract;property MatchPtr: PChar read PS;
property MatchNextPtr: PChar read GetMatchNextPtr;
property MatchStr: string read GetMatchStr;
property MatchLen: integer read FMatchLen;
property IsMatch: Boolean read FMatch;property SetValue[var AValue: string]: TPattern read GetSetValue; default;
end;TPSE = class(TPattern)
public
procedure MatchNext; override;
end;TPSEOneOf = class(TPattern)
public
procedure MatchNext; override;
end;TPSEMayBe = class(TPSE)
private
SecondResume: Boolean;
protected
procedure Init; override;
public
procedure MatchNext; override;
end;TPSECharSet = class(TPattern)
private
CharSet: set of Char;
MaxLen: integer;
public
// SCharSet:: 'asd..g0..5.8' -> ['a','s','d'..'g','0'..'5','.','8']
// if #0 in CharSet then goal may be empty length
constructor Create(const SCharSet: string);
constructor CreateLen(const SCharSet: string; AMaxLen: integer);
procedure MatchNext; override;
end;TPSEVar = class(TPattern)
private
Value: PString;
public
constructor Create(var AValue: string);
procedure MatchNext; override;
end;TPSESetLen = class(TPSE)
private
MinLen,MaxLen: integer;
public
constructor Create(const APattern: array of const; Min,Max: integer);
procedure MatchNext; override;
end;TPatternCallBack = function: Boolean;
TPSEBack = class(TPattern)
private
CallBack: TPatternCallBack;
public
constructor Create(ACallBack: TPatternCallBack);
procedure MatchNext; override;
end;// sequential group of patterns
function PSE(const APattern: array of const): TPattern;// APattern[1] or APattern[2] or APattern[3] or ...
function PSEOneOf(const APattern: array of const): TPattern;// APattern or nothink
function PSEMayBe(const APattern: array of const): TPattern;// any string with symbols from charset
function PSECharSet(const SCharSet: string): TPattern;// any string with symbols from charset and with length less or eq to AMaxLen
function PSECharSetLen(const SCharSet: string; AMaxLen: integer): TPattern;// single char from charset
function PSEChar(const SCharSet: string): TPattern;// string eq to AValue.
function PSEVar(var AValue: string): TPattern;// Pattern, but with chars count in range Min..Max
function PSESetLen(const APattern: array of const; Min,Max: integer): TPattern;// Pattern with user defined callback
function PSEBack(ACallBack: TPatternCallBack): TPattern;// some frequently used predefined patterns:
function PSEDigit: TPattern; // 0..9
function PSEDigits: TPattern; // <digit>{<digit>}
function PSEInteger: TPattern; // [+-]<digits>
function PSENumber: TPattern; // [+-]<digits>[.<digits>]
function PSEIdent: TPattern; // a..zA..Z_{a..zA..Z0..9_}
function PSEAny: TPattern; // any string or empty
function PSEAnyStr: TPattern; // any string with length > 0
function PSEAnyChar: TPattern; // any charimplementation
function PSEBack(ACallBack: TPatternCallBack): TPattern;
begin
Result:=TPSEBack.Create(ACallBack);
end;function PSEAnyStr: TPattern;
begin
Result:=PSECharSet(#1'..'#255);
end;function PSEAnyChar: TPattern;
begin
Result:=PSECharSetLen(#1'..'#255,1);
end;function PSEAny: TPattern;
begin
Result:=PSECharSet(#1'..'#255#0);
end;function PSESetLen(const APattern: array of const; Min,Max: integer): TPattern;
begin
Result:=TPSESetLen.Create(APattern,Min,Max);
end;function PSEMayBe(const APattern: array of const): TPattern;
begin
Result:=TPSEMayBe.Create(APattern);
end;function PSEVar(var AValue: string): TPattern;
begin
Result:=TPSEVar.Create(AValue);
end;function PSENumber: TPattern;
begin
Result:=PSE([PSEChar('+-'#0),PSEDigits,PSEMayBe([PSEMayBe([DecimalSeparator]),PSEDigits])]);
end;function PSEInteger: TPattern;
begin
Result:=PSE([PSEChar('+-'#0),PSEDigits]);
end;function PSEIdent: TPattern;
begin
Result:=PSE([PSEChar('a..zA..Z_'),PSECharSet('a..zA..Z_0..9'#0)]);
end;function PSEDigits: TPattern;
begin
Result:=PSECharSet('0..9');
end;function PSEDigit: TPattern;
begin
Result:=PSEChar('0..9');
end;function PSE(const APattern: array of const): TPattern;
begin
Result:=TPSE.Create(APattern);
end;function PSEOneOf(const APattern: array of const): TPattern;
begin
Result:=TPSEOneOf.Create(APattern);
end;function PSEChar(const SCharSet: string): TPattern;
begin
Result:=TPSECharSet.CreateLen(SCharSet,1);
end;function PSECharSet(const SCharSet: string): TPattern;
begin
Result:=TPSECharSet.Create(SCharSet);
end;function PSECharSetLen(const SCharSet: string; AMaxLen: integer): TPattern;
begin
Result:=TPSECharSet.CreateLen(SCharSet,AMaxLen);
end;//------------------------------ TPSEBack --------------------------------------
constructor TPSEBack.Create(ACallBack: TPatternCallBack);
begin
inherited Create([nil]);
CallBack:=ACallBack;
end;procedure TPSEBack.MatchNext;
begin
if Assigned(CallBack) then
if CallBack then MatchOK
else MatchFail
else MatchOK;
CanResume:=False;
end;//---------------------------- TPSESetLen --------------------------------------
constructor TPSESetLen.Create(const APattern: array of const; Min,Max: integer);
var t: integer;
begin
inherited Create(APattern);
if Min < 0 then Min:=-1;
if Max < 0 then Max:=-1;
if (Min >= 0) and (Max >= 0) and (Min > Max) then begin
t:=Min; Min:=Max; Max:=t;
end;
MinLen:=Min; MaxLen:=Max;
end;procedure TPSESetLen.MatchNext;
begin
repeat
inherited MatchNext;
if not FMatch then Break;
if MinLen >= 0 then
FMatch:=FMatch and (MatchLen >= MinLen);
if MaxLen >= 0 then
FMatch:=FMatch and (MatchLen <= MaxLen);
until FMatch;
end;//------------------------------- TPSEVar --------------------------------------
constructor TPSEVar.Create(var AValue: string);
begin
inherited Create([nil]);
Value:=@AValue;
end;procedure TPSEVar.MatchNext;
begin
if CompareStr(PChar(Value^),Length(Value^)) then begin
PS:=PS+Length(Value^);
MatchOK;
end else
MatchFail;
CanResume:=False;
end;//------------------------------- TPSEMayBe ------------------------------------
procedure TPSEMayBe.MatchNext;
begin
if not CanResume then MatchOK
else begin
CanResume:=SecondResume;
inherited MatchNext;
SecondResume:=True;
end;
end;procedure TPSEMayBe.Init;
begin
inherited Init;
SecondResume:=False;
end;//------------------------------ TPSECharSet -----------------------------------
procedure TPSECharSet.MatchNext;
begin
if (PS-PStart) >= MaxLen then
MatchFail
else
if not CanResume and (#0 in CharSet) then
MatchOK
else if (PS^ <> #0) and (PS^ in CharSet) then begin
PS:=PS+1;
MatchOK;
CanResume:=((PS-PStart) < MaxLen) and (PS^ in CharSet) and (PS^ <> #0);
end else
MatchFail;
end;constructor TPSECharSet.CreateLen(const SCharSet: string; AMaxLen: integer);
var j: integer;
LastChar: Char;
begin
inherited Create([nil]);
MaxLen:=AMaxLen;
if MaxLen < 0 then MaxLen:=0;
CharSet:=[];
if Length(SCharSet) > 0 then begin
j:=1; LastChar:=#0;
while j <= Length(SCharSet) do begin
if (SCharSet[j] = '.') and (j in [2..Length(SCharSet)-2])
and (SCharSet[j+1] = '.') then begin
CharSet:=CharSet+[LastChar..SCharSet[j+2]];
LastChar:=SCharSet[j+2];
Inc(j,3);
end else begin
LastChar:=SCharSet[j];
CharSet:=CharSet+[LastChar];
Inc(j);
end;
end;
end;
end;constructor TPSECharSet.Create(const SCharSet: string);
begin
CreateLen(SCharSet,$7FFFFFFF);
end;//------------------------------ TPSEOneOf ---------------------------------------
procedure TPSEOneOf.MatchNext;
label L_OK;
var len: integer;
begin
PS:=PStart;
while PStack < Pattern.Count do begin
case Pattern[PStack].ItemType of
piSimple:
begin
len:=Length(Pattern[PStack].pSimple);
if CompareStr(PChar(Pattern[PStack].pSimple),len) then begin
PS:=PS+len;
Inc(PStack);
goto L_OK;
end;
end;
piPattern:
begin
if not CanResume then
Pattern[PStack].pPattern.ActivatePtr(PS)
else
Pattern[PStack].pPattern.MatchNext;
CanResume:=False;
if Pattern[PStack].pPattern.IsMatch then begin
PS:=PS+Pattern[PStack].pPattern.MatchLen;
goto L_OK;
end;
end;
end;
Inc(PStack);
end;
MatchFail;
Exit;
L_OK:
MatchOK;
end;//-------------------------------- TPSE ----------------------------------------
procedure TPSE.MatchNext;
label L_FAIL;
var len: integer;function GoBack: Boolean;
var j: integer;
begin
repeat
Dec(PStack);
if PStack < 0 then begin
CanResume:=False;
Result:=False;
Exit;
end;
if Pattern[PStack].ItemType = piPattern then
Pattern[PStack].pPattern.BackValue;
until (Pattern[PStack].ItemType = piPattern) and
Pattern[PStack].pPattern.CanResume;
Result:=(PStack >= 0) and (Pattern[PStack].ItemType = piPattern);
CanResume:=Result;
if Result then begin
PS:=PStart;
if PStack > 0 then
for j:=0 to PStack-1 do
PS:=PS+Pattern[j].MatchLen;
end;
end;begin
if PStack < 0 then goto L_FAIL;
if CanResume then
if not GoBack then goto L_FAIL;
while PStack < Pattern.Count do begin
case Pattern[PStack].ItemType of
piSimple:
begin
len:=Length(Pattern[PStack].pSimple);
if CompareStr(PChar(Pattern[PStack].pSimple),len) then
PS:=PS+len
else
if GoBack then Continue
else goto L_FAIL;
end;
piPattern:
begin
if not CanResume then
Pattern[PStack].pPattern.ActivatePtr(PS)
else
Pattern[PStack].pPattern.MatchNext;
CanResume:=False;
if Pattern[PStack].pPattern.IsMatch then
PS:=PS+Pattern[PStack].pPattern.MatchLen
else
if GoBack then Continue
else goto L_FAIL;
end;
end;
Inc(PStack);
end;
MatchOK;
Exit;
L_FAIL:
MatchFail;
end;//------------------------------- TPattern ---------------------------------
function TPattern.GetSetValue(var AValue: string): TPattern;
begin
Value:=@AValue;
Result:=Self;
end;function TPattern.CompareStr(S: PChar; Len: integer): Boolean;
begin
Result:=StrLComp(S,PS,Len) = 0;
end;procedure TPattern.MatchFail;
begin
FMatch:=False;
CanResume:=False;
FMatchLen:=0;
if Value <> nil then Value^:='';
end;procedure TPattern.MatchOK;
begin
FMatch:=True;
CanResume:=True;
FMatchLen:=PS-PStart;
if Value <> nil then Value^:=MatchStr;
end;function TPattern.Match: Boolean;
begin
while FMatch and (MatchNextPtr^ <> #0) do MatchNext;
Result:=FMatch and (MatchNextPtr^ = #0);
end;function TPattern.GetMatchNextPtr: PChar;
begin
Result:=PStart+MatchLen;
end;function TPattern.GetMatchStr: string;
begin
SetLength(Result,MatchLen);
if MatchLen > 0 then
StrLCopy(PChar(Result),PStart,MatchLen);
end;procedure TPattern.Activate(const S: string);
begin
Line:=S; PStart:=PChar(Line); PS:=PStart;
Init;
MatchNext;
end;procedure TPattern.ActivatePtr(P: PChar);
begin
Line:=''; PStart:=P; PS:=P;
Init;
MatchNext;
end;constructor TPattern.Create(const APattern: array of const);
begin
inherited Create;
Pattern:=TPatList.Create(Self,APattern);
Value:=nil;
Line:=''; PStart:=PChar(Line); PS:=PStart;
end;destructor TPattern.Destroy;
begin
Pattern.Free;
inherited Destroy;
end;procedure TPattern.BackValue;
begin
if Value <> nil then Value^:=OldValue;
end;procedure TPattern.Init;
begin
PStack:=0; FMatch:=False; CanResume:=False;
if Value <> nil then OldValue:=Value^;
end;//-------------------------------- TPatList ------------------------------------
constructor TPatList.Create(AOwner: TPattern; const Pattern: array of const);
var j: integer;
begin
inherited Create;
Owner:=AOwner;
for j:=Low(Pattern) to High(Pattern) do
NewItem(Pattern[j]);
end;function TPatList.GetItems(Index: integer): TPatItem;
begin
Result:=TPatItem(inherited Items[Index]);
end;destructor TPatList.Destroy;
var Item: TPatItem;
j: integer;
begin
for j:=0 to Pred(Count) do begin
Item:=Items[j];
Item.Free;
end;
inherited Destroy;
end;procedure TPatList.NewItem(const VarRec: TVarRec);
begin
inherited Add(TPatItem.Create(VarRec));
end;//-------------------------------- TPatItem ------------------------------------
constructor TPatItem.Create(const VarRec: TVarRec);
begin
inherited Create;
ItemType:=piNil;
case VarRec.VType of
vtChar:
begin
pSimple:=VarRec.VChar;
ItemType:=piSimple;
end;
vtPChar:
begin
pSimple:=VarRec.VPChar;
ItemType:=piSimple;
end;
vtString:
begin
pSimple:=VarRec.VString^;
ItemType:=piSimple;
end;
vtAnsiString:
begin
pSimple:=string(VarRec.VAnsiString);
ItemType:=piSimple;
end;
vtObject:
if VarRec.VObject <> nil then
if VarRec.VObject is TPattern then begin
pPattern:=TPattern(VarRec.VObject);
ItemType:=piPattern;
end;
end;
end;function TPatItem.GetMatchLen: integer;
begin
case ItemType of
piSimple: Result:=Length(pSimple);
piPattern: Result:=pPattern.MatchLen;
else Result:=0;
end;
end;destructor TPatItem.Destroy;
begin
if ItemType = piPattern then pPattern.Free;
inherited Destroy;
end; -
얄리
2016.04.06 08:27
delphi님께서 질문하신 것은 델파이 프로그램이나 문법 관련 질문이 아니라 로직을 어떻게 구현할지에 대한 아이디어 문제라 이곳 게시판에서 답이 쉽게 나올지 모르겠습니다.
일단 "유사"한 것을 검색하고 싶다고 하셨는데 "유사"하다라는 것이 어떤 기준일까요?
검색어의 단어 중 몇 개가 포함되어야 유사하다고 판단할 것인지?
"and the-tree" 라는 검색어일 경우 불필요한 특수문자인 "-"는 검색할 단어에서 제외해야 할테고 이와 마찬가지로 의미가 없는 정관사인 "the"와 "and"도 제외하는 것이 일반적으로 더 좋은 결과를 가져옵니다. 물론 필요하다면 "and", "the", "tree" 모두를 검색 해야 할 때도 있겠죠. 그 다음에는 이 검색어가 문장에서 몇 개까지 포함된 것을 가져올 것인지도 결정해야 할 것입니다.
이러한 것들은 답이 정해진 것이 아니라 알고리즘 및 개발자의 노하우 문제라 스스로 고민하고 코딩해야할 영역입니다.
형태소 분석, 유사어 분석 등의 알고리즘이나 컴포넌트 들을 알아보시는 것이 좋을 것 같습니다. 통합검색 솔루션(형태소 분석 등)이나 표절검색 솔루션을 하신 분들이 있다면 잘 아실것 같은데 말이죠.
본인이 테스트한 코드라도 넣어주셔야지 답변이 달릴 것 같습니다.
질문을 구체적으로 해주시면 구체적인 답변이 나올것 같습니다.