home *** CD-ROM | disk | FTP | other *** search
- { @author(Ralf Junker <delphi@zeitungsjunge.de> )
- @abstract(Contains @link(TDIHtmlGooglePlugin) plugin to extract properties of Google web search results.)
- <FONT size=-1 color=#999999>The @Name unit is part of the DIHtmlParser Library.</FONT> }
- unit DIHtmlGooglePlugin;
-
- {$I DI.inc}
-
- interface
-
- uses
- {$IFNDEF DI_No_Unicode_Component}
- Classes,
- {$ENDIF}
-
- DIHtmlParser;
-
- type
-
- TDIGoogleState = (
- gsQuery,
- gsAfterQuery,
- gsInResults,
- gsTitle,
- gsAfterTitle,
- gsFileFormat,
- gsAbstract,
- gsAfterAbstract,
- gsDescription,
- gsCategory,
- gsProperties,
- gsAfterProperties,
- gsAfterResults);
-
- TDIHtmlGooglePlugin = class(TDIHtmlParserPlugin)
- private
- FState: TDIGoogleState;
- FQuery,
- FLinkUrl,
- FTitle,
- FTranslationUrl,
- FFileFormat,
- FFileHtmlUrl,
- FAbstract,
- FDescription,
- FCategory,
- FCategoryUrl,
- FProperties,
- FCacheUrl,
- FRelatedUrl: WideString;
- FOnResult: TDIHtmlParserPluginNotifyEvent;
- procedure SetOnResult(const Value: TDIHtmlParserPluginNotifyEvent);
- protected
- procedure ClearResult;
- procedure HandleComment(var Show: Boolean); override;
- procedure HandleHtmlTag(var Show: Boolean); override;
- procedure HandleText(var Show: Boolean); override;
- public
- constructor Create{$IFNDEF DI_No_Unicode_Component}(AOwner: TComponent){$ENDIF}; override;
- destructor Destroy; override;
- procedure Reset; override;
- property Query: WideString read FQuery;
- property Title: WideString read FTitle;
- property LinkUrl: WideString read FLinkUrl;
- property TranslationUrl: WideString read FTranslationUrl;
- property FileFormat: WideString read FFileFormat;
- property FileHtmlUrl: WideString read FFileHtmlUrl;
- property Abstract: WideString read FAbstract;
- property Description: WideString read FDescription;
- property Category: WideString read FCategory;
- property CategoryUrl: WideString read FCategoryUrl;
- property Properties: WideString read FProperties;
- property CacheUrl: WideString read FCacheUrl;
- property RelatedUrl: WideString read FRelatedUrl;
- published
- property HtmlParser;
- property OnResult: TDIHtmlParserPluginNotifyEvent read FOnResult write SetOnResult;
- end;
-
- TDIGoogleResult = class(TObject)
- private
- FTitle: WideString;
- FLinkUrl: WideString;
- FTranslationUrl: WideString;
- FFileFormat: WideString;
- FFileHtmlUrl: WideString;
- FAbstract: WideString;
- FDescription: WideString;
- FCategory: WideString;
- FCategoryUrl: WideString;
- FProperties: WideString;
- FCacheUrl: WideString;
- FRelatedUrl: WideString;
- public
-
- procedure AssignFromPlugin(const Plugin: TDIHtmlGooglePlugin);
- property Title: WideString read FTitle write FTitle;
- property LinkUrl: WideString read FLinkUrl write FLinkUrl;
- property TranslationUrl: WideString read FTranslationUrl write FTranslationUrl;
- property FileFormat: WideString read FFileFormat write FFileFormat;
- property FileHtmlUrl: WideString read FFileHtmlUrl write FFileHtmlUrl;
- property Abstract: WideString read FAbstract write FAbstract;
- property Description: WideString read FDescription write FDescription;
- property Category: WideString read FCategory write FCategory;
- property CategoryUrl: WideString read FCategoryUrl write FCategoryUrl;
- property Properties: WideString read FProperties write FProperties;
- property CacheUrl: WideString read FCacheUrl write FCacheUrl;
- property RelatedUrl: WideString read FRelatedUrl write FRelatedUrl;
- end;
-
- {$IFNDEF DI_No_Unicode_Component}
- procedure Register;
- {$ENDIF}
-
- implementation
-
- uses
- DIHtmlMisc,
- DIHtmlColors,
- DIUtils;
-
- const
- GOOGLE_GREEN = $008000;
- GOOGLE_FILE_FORMAT = $6F6F6F;
-
- constructor TDIHtmlGooglePlugin.Create{$IFNDEF DI_No_Unicode_Component}(AOwner: TComponent){$ENDIF};
- begin
- inherited Create{$IFNDEF DI_No_Unicode_Component}(AOwner){$ENDIF};
- HtmlTagFilters := TDITagFilters.Create;
- end;
-
- destructor TDIHtmlGooglePlugin.Destroy;
- begin
- HtmlTagFilters.Free;
- inherited Destroy;
- end;
-
- procedure TDIHtmlGooglePlugin.ClearResult;
- begin
- FTitle := '';
- FLinkUrl := '';
- FTranslationUrl := '';
- FFileFormat := '';
- FFileHtmlUrl := '';
- FAbstract := '';
- FDescription := '';
- FCategory := '';
- FCategoryUrl := '';
- FProperties := '';
- FCacheUrl := '';
- FRelatedUrl := '';
- end;
-
- procedure TDIHtmlGooglePlugin.HandleComment(var Show: Boolean);
- begin
- case FState of
- gsAfterQuery:
- if HtmlParser.DataIsStrW('a') then
- begin
- FState := gsInResults;
- end;
-
- gsInResults:
- if HtmlParser.DataIsStrIW('m') then
- begin
- FState := gsTitle;
- ClearResult;
- HtmlTagFilters.SetStartEnd([TAG_A_ID, TAG_FONT_ID, TAG_SPAN_ID], fiShowLocal);
- HtmlTagFilters.SetStart(TAG_BR_ID, fiShowLocal);
- FilterText := fiShowLocal;
- Exit;
- end;
-
- gsTitle,
- gsAfterTitle,
- gsFileFormat,
- gsAbstract,
- gsAfterAbstract,
- gsDescription,
- gsCategory,
- gsProperties,
- gsAfterProperties:
- begin
- if HtmlParser.DataIsStrW('n') then
- begin
- FState := gsInResults;
- FOnResult(Self);
- HtmlTagFilters.Clear;
- FilterText := fiHide;
- end
- else
- if HtmlParser.DataIsStrIW('z') then
- begin
- FState := gsAfterResults;
- FilterComments := fiHide;
- HtmlTagFilters.Clear;
- FilterText := fiHide;
- Exit;
- end;
- end;
- end;
-
- end;
-
- procedure TDIHtmlGooglePlugin.HandleHtmlTag(var Show: Boolean);
- var
- s: WideString;
- begin
- case HtmlParser.HtmlTag.TagType of
- ttStartTag:
- case HtmlParser.HtmlTag.TagID of
- TAG_A_ID:
- case FState of
- gsTitle:
- begin
- FLinkUrl := HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_HREF_ID, 0];
- end;
- gsAfterTitle:
- begin
- FTranslationUrl := HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_HREF_ID, 0];
- end;
- gsFileFormat:
- begin
- FState := gsAfterTitle;
- FFileHtmlUrl := HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_HREF_ID, 0];
- end;
- gsAfterAbstract:
- begin
- FState := gsCategory;
- FCategoryUrl := HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_HREF_ID, 0];
- end;
- gsAfterProperties:
- begin
- s := HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_HREF_ID, 0];
- if StrContainsIW('cache:', s) then
- FCacheUrl := s
- else
- if StrContainsIW('related:', s) then
- FRelatedUrl := s;
- end;
- end;
-
- TAG_BR_ID:
- case FState of
- gsAfterTitle:
- begin
- FState := gsAbstract;
- FilterText := fiShowLocal;
- end;
- gsAbstract:
- begin
- FAbstract := FAbstract + ' ';
- end;
- gsDescription:
- begin
- FState := gsAfterAbstract;
- FilterText := fiHide;
- end;
- end;
-
- TAG_FONT_ID:
- case FState of
- gsAfterTitle, gsAbstract, gsAfterAbstract:
- case ColorFromHtml(HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_COLOR_ID, 0]) of
- GOOGLE_GREEN:
- begin
- FState := gsProperties;
- FilterText := fiShowLocal;
- FilterComments := fiShowLocal;
- end;
- GOOGLE_FILE_FORMAT:
- begin
- FState := gsFileFormat;
- FilterText := fiHide;
- end;
- end;
-
- end;
-
- TAG_INPUT_ID:
- case FState of
- gsQuery:
- if (Pointer(FQuery) = nil) and
- StrSameIA(HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_TYPE_ID, 0], 'text') and
- StrSameIA(HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_NAME_ID, 0], 'q') then
- begin
- FQuery := HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_VALUE_ID, 0];
- FState := gsAfterQuery;
- FilterComments := fiShowLocal;
- HtmlTagFilters.DeleteStartEnd(TAG_INPUT_ID);
- end;
- end;
-
- TAG_SPAN_ID:
- case FState of
- gsAbstract:
- begin
- FState := gsAfterAbstract;
- FilterText := fiHide;
- end;
- end;
-
- end;
-
- ttEndTag:
- case HtmlParser.HtmlTag.TagID of
-
- TAG_A_ID:
- case FState of
- gsTitle:
- begin
- FState := gsAfterTitle;
- FilterText := fiHide;
- end;
- gsCategory:
- begin
- FState := gsAfterAbstract;
- FilterText := fiHide;
- end;
- end;
-
- TAG_FONT_ID:
- case FState of
- gsProperties:
- begin
- FState := gsAfterProperties;
- FilterText := fiHide;
- end;
- gsFileFormat:
- begin
- FilterText := fiShowLocal;
- end;
- end;
-
- TAG_SPAN_ID:
- case FState of
- gsAfterAbstract:
- begin
- FilterText := fiShowLocal;
- end;
- end;
-
- end;
- end;
- end;
-
- procedure TDIHtmlGooglePlugin.HandleText(var Show: Boolean);
- procedure Add(var s: WideString);
- begin
- if Pointer(s) = nil then
- s := s + HtmlParser.DataAsStrTrimLeftW
- else
- s := s + HtmlParser.DataAsStrW;
- end;
- begin
- case FState of
- gsTitle:
- begin
- Add(FTitle);
- end;
- gsFileFormat:
- begin
- Add(FFileFormat);
- end;
- gsAbstract:
- begin
- Add(FAbstract);
- end;
- gsAfterAbstract:
- begin
- FState := gsDescription;
- Add(FDescription);
- end;
- gsDescription:
- begin
- Add(FDescription);
- end;
- gsCategory:
- begin
- Add(FCategory);
- end;
- gsProperties:
- begin
- Add(FProperties);
- end;
- end;
- end;
-
- procedure TDIHtmlGooglePlugin.Reset;
- begin
- FState := gsQuery;
- FQuery := '';
- ClearResult;
- FilterComments := fiHide;
- HtmlTagFilters.Clear;
- HtmlTagFilters.SetStart(TAG_INPUT_ID, fiShowLocal);
- FilterText := fiHide;
- end;
-
- procedure TDIHtmlGooglePlugin.SetOnResult(const Value: TDIHtmlParserPluginNotifyEvent);
- begin
- FOnResult := Value;
- Enabled := Assigned(Value);
- end;
-
- procedure TDIGoogleResult.AssignFromPlugin(const Plugin: TDIHtmlGooglePlugin);
- begin
- with Plugin do
- begin
- FTitle := Title;
- FLinkUrl := LinkUrl;
- FTranslationUrl := TranslationUrl;
- FFileFormat := FileFormat;
- FFileHtmlUrl := FileHtmlUrl;
- FAbstract := Abstract;
- FDescription := Description;
- FCategory := Category;
- FCategoryUrl := CategoryUrl;
- FProperties := Properties;
- FCacheUrl := CacheUrl;
- FRelatedUrl := RelatedUrl;
- end;
- end;
-
- {$IFNDEF DI_No_Unicode_Component}
- procedure Register;
- begin
- RegisterComponents('The Delphi Inspiration', [TDIHtmlGooglePlugin]);
- end;
- {$ENDIF}
-
- end.
-
-