home *** CD-ROM | disk | FTP | other *** search
/ Chip 2004 February / Chip_2004-02_cd1.bin / program / delphi / kompon / d4567 / google / DIHtmlGooglePlugin.exe / Source / DIHtmlGooglePlugin.pas < prev   
Encoding:
Pascal/Delphi Source File  |  2003-08-29  |  11.7 KB  |  433 lines

  1. { @author(Ralf Junker <delphi@zeitungsjunge.de> )
  2.   @abstract(Contains @link(TDIHtmlGooglePlugin) plugin to extract properties of Google web search results.)
  3.   <FONT size=-1 color=#999999>The @Name unit is part of the DIHtmlParser Library.</FONT> }
  4. unit DIHtmlGooglePlugin;
  5.  
  6. {$I DI.inc}
  7.  
  8. interface
  9.  
  10. uses
  11.   {$IFNDEF DI_No_Unicode_Component}
  12.   Classes,
  13.   {$ENDIF}
  14.  
  15.   DIHtmlParser;
  16.  
  17. type
  18.  
  19.   TDIGoogleState = (
  20.     gsQuery,
  21.     gsAfterQuery,
  22.     gsInResults,
  23.     gsTitle,
  24.     gsAfterTitle,
  25.     gsFileFormat,
  26.     gsAbstract,
  27.     gsAfterAbstract,
  28.     gsDescription,
  29.     gsCategory,
  30.     gsProperties,
  31.     gsAfterProperties,
  32.     gsAfterResults);
  33.  
  34.   TDIHtmlGooglePlugin = class(TDIHtmlParserPlugin)
  35.   private
  36.     FState: TDIGoogleState;
  37.     FQuery,
  38.       FLinkUrl,
  39.       FTitle,
  40.       FTranslationUrl,
  41.       FFileFormat,
  42.       FFileHtmlUrl,
  43.       FAbstract,
  44.       FDescription,
  45.       FCategory,
  46.       FCategoryUrl,
  47.       FProperties,
  48.       FCacheUrl,
  49.       FRelatedUrl: WideString;
  50.     FOnResult: TDIHtmlParserPluginNotifyEvent;
  51.     procedure SetOnResult(const Value: TDIHtmlParserPluginNotifyEvent);
  52.   protected
  53.     procedure ClearResult;
  54.     procedure HandleComment(var Show: Boolean); override;
  55.     procedure HandleHtmlTag(var Show: Boolean); override;
  56.     procedure HandleText(var Show: Boolean); override;
  57.   public
  58.     constructor Create{$IFNDEF DI_No_Unicode_Component}(AOwner: TComponent){$ENDIF}; override;
  59.     destructor Destroy; override;
  60.     procedure Reset; override;
  61.     property Query: WideString read FQuery;
  62.     property Title: WideString read FTitle;
  63.     property LinkUrl: WideString read FLinkUrl;
  64.     property TranslationUrl: WideString read FTranslationUrl;
  65.     property FileFormat: WideString read FFileFormat;
  66.     property FileHtmlUrl: WideString read FFileHtmlUrl;
  67.     property Abstract: WideString read FAbstract;
  68.     property Description: WideString read FDescription;
  69.     property Category: WideString read FCategory;
  70.     property CategoryUrl: WideString read FCategoryUrl;
  71.     property Properties: WideString read FProperties;
  72.     property CacheUrl: WideString read FCacheUrl;
  73.     property RelatedUrl: WideString read FRelatedUrl;
  74.   published
  75.     property HtmlParser;
  76.     property OnResult: TDIHtmlParserPluginNotifyEvent read FOnResult write SetOnResult;
  77.   end;
  78.  
  79.   TDIGoogleResult = class(TObject)
  80.   private
  81.     FTitle: WideString;
  82.     FLinkUrl: WideString;
  83.     FTranslationUrl: WideString;
  84.     FFileFormat: WideString;
  85.     FFileHtmlUrl: WideString;
  86.     FAbstract: WideString;
  87.     FDescription: WideString;
  88.     FCategory: WideString;
  89.     FCategoryUrl: WideString;
  90.     FProperties: WideString;
  91.     FCacheUrl: WideString;
  92.     FRelatedUrl: WideString;
  93.   public
  94.  
  95.     procedure AssignFromPlugin(const Plugin: TDIHtmlGooglePlugin);
  96.     property Title: WideString read FTitle write FTitle;
  97.     property LinkUrl: WideString read FLinkUrl write FLinkUrl;
  98.     property TranslationUrl: WideString read FTranslationUrl write FTranslationUrl;
  99.     property FileFormat: WideString read FFileFormat write FFileFormat;
  100.     property FileHtmlUrl: WideString read FFileHtmlUrl write FFileHtmlUrl;
  101.     property Abstract: WideString read FAbstract write FAbstract;
  102.     property Description: WideString read FDescription write FDescription;
  103.     property Category: WideString read FCategory write FCategory;
  104.     property CategoryUrl: WideString read FCategoryUrl write FCategoryUrl;
  105.     property Properties: WideString read FProperties write FProperties;
  106.     property CacheUrl: WideString read FCacheUrl write FCacheUrl;
  107.     property RelatedUrl: WideString read FRelatedUrl write FRelatedUrl;
  108.   end;
  109.  
  110.   {$IFNDEF DI_No_Unicode_Component}
  111. procedure Register;
  112. {$ENDIF}
  113.  
  114. implementation
  115.  
  116. uses
  117.   DIHtmlMisc,
  118.   DIHtmlColors,
  119.   DIUtils;
  120.  
  121. const
  122.   GOOGLE_GREEN = $008000;
  123.   GOOGLE_FILE_FORMAT = $6F6F6F;
  124.  
  125. constructor TDIHtmlGooglePlugin.Create{$IFNDEF DI_No_Unicode_Component}(AOwner: TComponent){$ENDIF};
  126. begin
  127.   inherited Create{$IFNDEF DI_No_Unicode_Component}(AOwner){$ENDIF};
  128.   HtmlTagFilters := TDITagFilters.Create;
  129. end;
  130.  
  131. destructor TDIHtmlGooglePlugin.Destroy;
  132. begin
  133.   HtmlTagFilters.Free;
  134.   inherited Destroy;
  135. end;
  136.  
  137. procedure TDIHtmlGooglePlugin.ClearResult;
  138. begin
  139.   FTitle := '';
  140.   FLinkUrl := '';
  141.   FTranslationUrl := '';
  142.   FFileFormat := '';
  143.   FFileHtmlUrl := '';
  144.   FAbstract := '';
  145.   FDescription := '';
  146.   FCategory := '';
  147.   FCategoryUrl := '';
  148.   FProperties := '';
  149.   FCacheUrl := '';
  150.   FRelatedUrl := '';
  151. end;
  152.  
  153. procedure TDIHtmlGooglePlugin.HandleComment(var Show: Boolean);
  154. begin
  155.   case FState of
  156.     gsAfterQuery:
  157.       if HtmlParser.DataIsStrW('a') then
  158.         begin
  159.           FState := gsInResults;
  160.         end;
  161.  
  162.     gsInResults:
  163.       if HtmlParser.DataIsStrIW('m') then
  164.         begin
  165.           FState := gsTitle;
  166.           ClearResult;
  167.           HtmlTagFilters.SetStartEnd([TAG_A_ID, TAG_FONT_ID, TAG_SPAN_ID], fiShowLocal);
  168.           HtmlTagFilters.SetStart(TAG_BR_ID, fiShowLocal);
  169.           FilterText := fiShowLocal;
  170.           Exit;
  171.         end;
  172.  
  173.     gsTitle,
  174.       gsAfterTitle,
  175.       gsFileFormat,
  176.       gsAbstract,
  177.       gsAfterAbstract,
  178.       gsDescription,
  179.       gsCategory,
  180.       gsProperties,
  181.       gsAfterProperties:
  182.       begin
  183.         if HtmlParser.DataIsStrW('n') then
  184.           begin
  185.             FState := gsInResults;
  186.             FOnResult(Self);
  187.             HtmlTagFilters.Clear;
  188.             FilterText := fiHide;
  189.           end
  190.         else
  191.           if HtmlParser.DataIsStrIW('z') then
  192.             begin
  193.               FState := gsAfterResults;
  194.               FilterComments := fiHide;
  195.               HtmlTagFilters.Clear;
  196.               FilterText := fiHide;
  197.               Exit;
  198.             end;
  199.       end;
  200.   end;
  201.  
  202. end;
  203.  
  204. procedure TDIHtmlGooglePlugin.HandleHtmlTag(var Show: Boolean);
  205. var
  206.   s: WideString;
  207. begin
  208.   case HtmlParser.HtmlTag.TagType of
  209.     ttStartTag:
  210.       case HtmlParser.HtmlTag.TagID of
  211.         TAG_A_ID:
  212.           case FState of
  213.             gsTitle:
  214.               begin
  215.                 FLinkUrl := HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_HREF_ID, 0];
  216.               end;
  217.             gsAfterTitle:
  218.               begin
  219.                 FTranslationUrl := HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_HREF_ID, 0];
  220.               end;
  221.             gsFileFormat:
  222.               begin
  223.                 FState := gsAfterTitle;
  224.                 FFileHtmlUrl := HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_HREF_ID, 0];
  225.               end;
  226.             gsAfterAbstract:
  227.               begin
  228.                 FState := gsCategory;
  229.                 FCategoryUrl := HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_HREF_ID, 0];
  230.               end;
  231.             gsAfterProperties:
  232.               begin
  233.                 s := HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_HREF_ID, 0];
  234.                 if StrContainsIW('cache:', s) then
  235.                   FCacheUrl := s
  236.                 else
  237.                   if StrContainsIW('related:', s) then
  238.                     FRelatedUrl := s;
  239.               end;
  240.           end;
  241.  
  242.         TAG_BR_ID:
  243.           case FState of
  244.             gsAfterTitle:
  245.               begin
  246.                 FState := gsAbstract;
  247.                 FilterText := fiShowLocal;
  248.               end;
  249.             gsAbstract:
  250.               begin
  251.                 FAbstract := FAbstract + ' ';
  252.               end;
  253.             gsDescription:
  254.               begin
  255.                 FState := gsAfterAbstract;
  256.                 FilterText := fiHide;
  257.               end;
  258.           end;
  259.  
  260.         TAG_FONT_ID:
  261.           case FState of
  262.             gsAfterTitle, gsAbstract, gsAfterAbstract:
  263.               case ColorFromHtml(HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_COLOR_ID, 0]) of
  264.                 GOOGLE_GREEN:
  265.                   begin
  266.                     FState := gsProperties;
  267.                     FilterText := fiShowLocal;
  268.                     FilterComments := fiShowLocal;
  269.                   end;
  270.                 GOOGLE_FILE_FORMAT:
  271.                   begin
  272.                     FState := gsFileFormat;
  273.                     FilterText := fiHide;
  274.                   end;
  275.               end;
  276.  
  277.           end;
  278.  
  279.         TAG_INPUT_ID:
  280.           case FState of
  281.             gsQuery:
  282.               if (Pointer(FQuery) = nil) and
  283.                 StrSameIA(HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_TYPE_ID, 0], 'text') and
  284.                 StrSameIA(HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_NAME_ID, 0], 'q') then
  285.                 begin
  286.                   FQuery := HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_VALUE_ID, 0];
  287.                   FState := gsAfterQuery;
  288.                   FilterComments := fiShowLocal;
  289.                   HtmlTagFilters.DeleteStartEnd(TAG_INPUT_ID);
  290.                 end;
  291.           end;
  292.  
  293.         TAG_SPAN_ID:
  294.           case FState of
  295.             gsAbstract:
  296.               begin
  297.                 FState := gsAfterAbstract;
  298.                 FilterText := fiHide;
  299.               end;
  300.           end;
  301.  
  302.       end;
  303.  
  304.     ttEndTag:
  305.       case HtmlParser.HtmlTag.TagID of
  306.  
  307.         TAG_A_ID:
  308.           case FState of
  309.             gsTitle:
  310.               begin
  311.                 FState := gsAfterTitle;
  312.                 FilterText := fiHide;
  313.               end;
  314.             gsCategory:
  315.               begin
  316.                 FState := gsAfterAbstract;
  317.                 FilterText := fiHide;
  318.               end;
  319.           end;
  320.  
  321.         TAG_FONT_ID:
  322.           case FState of
  323.             gsProperties:
  324.               begin
  325.                 FState := gsAfterProperties;
  326.                 FilterText := fiHide;
  327.               end;
  328.             gsFileFormat:
  329.               begin
  330.                 FilterText := fiShowLocal;
  331.               end;
  332.           end;
  333.  
  334.         TAG_SPAN_ID:
  335.           case FState of
  336.             gsAfterAbstract:
  337.               begin
  338.                 FilterText := fiShowLocal;
  339.               end;
  340.           end;
  341.  
  342.       end;
  343.   end;
  344. end;
  345.  
  346. procedure TDIHtmlGooglePlugin.HandleText(var Show: Boolean);
  347.   procedure Add(var s: WideString);
  348.   begin
  349.     if Pointer(s) = nil then
  350.       s := s + HtmlParser.DataAsStrTrimLeftW
  351.     else
  352.       s := s + HtmlParser.DataAsStrW;
  353.   end;
  354. begin
  355.   case FState of
  356.     gsTitle:
  357.       begin
  358.         Add(FTitle);
  359.       end;
  360.     gsFileFormat:
  361.       begin
  362.         Add(FFileFormat);
  363.       end;
  364.     gsAbstract:
  365.       begin
  366.         Add(FAbstract);
  367.       end;
  368.     gsAfterAbstract:
  369.       begin
  370.         FState := gsDescription;
  371.         Add(FDescription);
  372.       end;
  373.     gsDescription:
  374.       begin
  375.         Add(FDescription);
  376.       end;
  377.     gsCategory:
  378.       begin
  379.         Add(FCategory);
  380.       end;
  381.     gsProperties:
  382.       begin
  383.         Add(FProperties);
  384.       end;
  385.   end;
  386. end;
  387.  
  388. procedure TDIHtmlGooglePlugin.Reset;
  389. begin
  390.   FState := gsQuery;
  391.   FQuery := '';
  392.   ClearResult;
  393.   FilterComments := fiHide;
  394.   HtmlTagFilters.Clear;
  395.   HtmlTagFilters.SetStart(TAG_INPUT_ID, fiShowLocal);
  396.   FilterText := fiHide;
  397. end;
  398.  
  399. procedure TDIHtmlGooglePlugin.SetOnResult(const Value: TDIHtmlParserPluginNotifyEvent);
  400. begin
  401.   FOnResult := Value;
  402.   Enabled := Assigned(Value);
  403. end;
  404.  
  405. procedure TDIGoogleResult.AssignFromPlugin(const Plugin: TDIHtmlGooglePlugin);
  406. begin
  407.   with Plugin do
  408.     begin
  409.       FTitle := Title;
  410.       FLinkUrl := LinkUrl;
  411.       FTranslationUrl := TranslationUrl;
  412.       FFileFormat := FileFormat;
  413.       FFileHtmlUrl := FileHtmlUrl;
  414.       FAbstract := Abstract;
  415.       FDescription := Description;
  416.       FCategory := Category;
  417.       FCategoryUrl := CategoryUrl;
  418.       FProperties := Properties;
  419.       FCacheUrl := CacheUrl;
  420.       FRelatedUrl := RelatedUrl;
  421.     end;
  422. end;
  423.  
  424. {$IFNDEF DI_No_Unicode_Component}
  425. procedure Register;
  426. begin
  427.   RegisterComponents('The Delphi Inspiration', [TDIHtmlGooglePlugin]);
  428. end;
  429. {$ENDIF}
  430.  
  431. end.
  432.  
  433.