patches/tracker-10-web-history-module.diff
author yippi
Mon, 27 Sep 2010 21:07:51 +0000
changeset 20108 51df67ca9307
parent 16949 7800c41b1332
permissions -rw-r--r--
I had these modules listed as being owned by me, but they are really owned by wangke, correcting.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
16949
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
     1
--- tracker-0.6.95-orig/data/modules/Makefile.am	2009-07-02 13:37:17.847605000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
     2
+++ tracker-0.6.95/data/modules/Makefile.am	2009-06-11 16:16:47.941854000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
     3
@@ -6,6 +6,7 @@
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
     4
 	applications.module				\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
     5
 	evolution.module				\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
     6
 	files.module					\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
     7
-	gaim-conversations.module
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
     8
+	gaim-conversations.module			\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
     9
+	web-history.module
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    10
 
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    11
 EXTRA_DIST = $(config_DATA)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    12
--- /dev/null	2009-07-02 13:16:20.000000000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    13
+++ tracker-0.6.95/data/modules/web-history.module	2009-06-11 16:16:47.942036000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    14
@@ -0,0 +1,22 @@
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    15
+[General]
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    16
+Description=Web History
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    17
+Enabled=true
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    18
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    19
+[Monitors]
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    20
+Directories=
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    21
+RecurseDirectories=$HOME/.xesam/Firefox/ToIndex/;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    22
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    23
+[Ignored]
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    24
+Directories=
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    25
+Files=
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    26
+DirectoriesWithContent=
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    27
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    28
+[Index]
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    29
+Service=WebHistory
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    30
+MimeTypes=
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    31
+Files=
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    32
+ScanTimeout=6000
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    33
+CacheTimeout=120
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    34
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    35
+[Specific]
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    36
+# Options specific to this module
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    37
--- tracker-0.6.95-orig/src/tracker-indexer/modules/Makefile.am	2009-07-02 13:37:17.558151000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    38
+++ tracker-0.6.95/src/tracker-indexer/modules/Makefile.am	2009-06-11 16:16:42.324761000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    39
@@ -24,7 +24,8 @@
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    40
 indexer_modules_LTLIBRARIES = 						\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    41
 	libtracker-module-applications.la				\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    42
 	libtracker-module-files.la					\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    43
-	libtracker-module-gaim-conversations.la
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    44
+	libtracker-module-gaim-conversations.la				\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    45
+	libtracker-module-web-history.la
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    46
 
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    47
 # Applications module
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    48
 libtracker_module_applications_la_SOURCES = applications.c
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    49
@@ -53,6 +54,15 @@
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    50
 	$(GCOV_LIBS)							\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    51
 	$(GLIB2_LIBS)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    52
 
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    53
+# WebHistory module
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    54
+libtracker_module_web_history_la_SOURCES = web-history.c
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    55
+libtracker_module_web_history_la_LDFLAGS = $(module_flags)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    56
+libtracker_module_web_history_la_LIBADD = 			\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    57
+	$(GMODULE_LIBS)							\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    58
+	$(GIO_LIBS)							\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    59
+	$(GCOV_LIBS)							\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    60
+	$(GLIB2_LIBS)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    61
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    62
 if HAVE_GCONF
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    63
 
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    64
 indexer_modules_LTLIBRARIES += 						\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    65
--- /dev/null	2009-07-02 13:16:20.000000000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    66
+++ tracker-0.6.95/src/tracker-indexer/modules/web-history.c	2009-06-11 16:16:42.440289000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    67
@@ -0,0 +1,400 @@
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    68
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    69
+/*
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    70
+ * Copyright (C) 2006, Mr Jamie McCracken ([email protected])
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    71
+ * Copyright (C) 2008, Nokia
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    72
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    73
+ * This library is free software; you can redistribute it and/or
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    74
+ * modify it under the terms of the GNU General Public
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    75
+ * License as published by the Free Software Foundation; either
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    76
+ * version 2 of the License, or (at your option) any later version.
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    77
+ *
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    78
+ * This library is distributed in the hope that it will be useful,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    79
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    80
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    81
+ * General Public License for more details.
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    82
+ *
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    83
+ * You should have received a copy of the GNU General Public
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    84
+ * License along with this library; if not, write to the
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    85
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    86
+ * Boston, MA  02110-1301, USA.
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    87
+ */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    88
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    89
+#include <sys/stat.h>
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    90
+#include "config.h"
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    91
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    92
+#include <tracker-indexer/tracker-module.h>
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    93
+#define TEXT_MAX_SIZE		1048576  /* bytes */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    94
+#define TEXT_CHECK_SIZE 	65535
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    95
+#define METADATA_WEBHISTORY_URL 		"Doc:URL"
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    96
+#define METADATA_WEBHISTORY_PAGE_CONTENTTYPE 	"Web:PageContentType"
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    97
+#define METADATA_WEBHISTORY_CHARACTERSET 	"Web:CharacterSet"
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    98
+#define METADATA_WEBHISTORY_REFERRER 		"Web:Referrer"
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
    99
+#define METADATA_WEBHISTORY_KEYWORD		"User:Keywords"
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   100
+#define METADATA_WEBHISTORY_ACCESSED 		"File:Accessed"
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   101
+#define METADATA_WEBHISTORY_TITLE		"Doc:Title"
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   102
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   103
+#define TRACKER_TYPE_WEB_FILE    (tracker_web_file_get_type ())
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   104
+#define TRACKER_WEB_FILE(module) (G_TYPE_CHECK_INSTANCE_CAST ((module), TRACKER_TYPE_WEB_FILE, TrackerWebFile))
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   105
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   106
+typedef struct TrackerWebFile TrackerWebFile;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   107
+typedef struct TrackerWebFileClass TrackerWebFileClass;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   108
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   109
+struct TrackerWebFile {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   110
+        TrackerModuleFile parent_instance;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   111
+};
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   112
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   113
+struct TrackerWebFileClass {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   114
+        TrackerModuleFileClass parent_class;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   115
+};
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   116
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   117
+static GType                   	tracker_web_file_get_type      (void) G_GNUC_CONST;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   118
+static gchar *       		tracker_web_file_get_text      (TrackerModuleFile *file);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   119
+static TrackerModuleMetadata * 	tracker_web_file_get_metadata  (TrackerModuleFile *file);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   120
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   121
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   122
+G_DEFINE_DYNAMIC_TYPE (TrackerWebFile, tracker_web_file, TRACKER_TYPE_MODULE_FILE);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   123
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   124
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   125
+static void
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   126
+tracker_web_file_class_init (TrackerWebFileClass *klass)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   127
+{
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   128
+        TrackerModuleFileClass *file_class = TRACKER_MODULE_FILE_CLASS (klass);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   129
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   130
+        file_class->get_metadata = tracker_web_file_get_metadata;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   131
+	file_class->get_text = tracker_web_file_get_text;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   132
+}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   133
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   134
+static void
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   135
+tracker_web_file_class_finalize (TrackerWebFileClass *klass)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   136
+{
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   137
+}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   138
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   139
+static void
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   140
+tracker_web_file_init (TrackerWebFile *file)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   141
+{
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   142
+}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   143
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   144
+static gchar *
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   145
+tracker_web_file_get_text (TrackerModuleFile *file)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   146
+{
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   147
+	return tracker_module_metadata_utils_get_text (tracker_module_file_get_file (file));
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   148
+}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   149
+static gboolean
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   150
+get_file_is_utf8 (GString *s,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   151
+		  gssize  *bytes_valid)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   152
+{
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   153
+	const gchar *end;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   154
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   155
+	/* Check for UTF-8 validity, since we may
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   156
+	 * have cut off the end.
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   157
+	 */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   158
+	if (g_utf8_validate (s->str, s->len, &end)) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   159
+		*bytes_valid = (gssize) s->len;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   160
+		return TRUE;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   161
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   162
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   163
+	*bytes_valid = end - s->str;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   164
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   165
+	/* 4 is the maximum bytes for a UTF-8 character. */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   166
+	if (*bytes_valid > 4) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   167
+		return FALSE;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   168
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   169
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   170
+	if (g_utf8_get_char_validated (end, *bytes_valid) == (gunichar) -1) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   171
+		return FALSE;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   172
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   173
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   174
+	return TRUE;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   175
+}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   176
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   177
+#ifdef TRY_LOCALE_TO_UTF8_CONVERSION
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   178
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   179
+static GString *
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   180
+get_file_in_locale (GString *s)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   181
+{
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   182
+	GError *error = NULL;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   183
+	gchar  *str;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   184
+	gsize	bytes_read;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   185
+	gsize	bytes_written;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   186
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   187
+	str = g_locale_to_utf8 (s->str,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   188
+				s->len,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   189
+				&bytes_read,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   190
+				&bytes_written,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   191
+				&error);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   192
+	if (error) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   193
+		g_debug ("  Conversion to UTF-8 read %d bytes, wrote %d bytes",
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   194
+			 bytes_read,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   195
+			 bytes_written);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   196
+		g_message ("Could not convert file from locale to UTF-8, %s",
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   197
+			   error->message);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   198
+		g_error_free (error);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   199
+		g_free (str);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   200
+	} else {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   201
+		g_string_assign (s, str);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   202
+		g_free (str);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   203
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   204
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   205
+	return s;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   206
+}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   207
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   208
+#endif /* TRY_LOCALE_TO_UTF8_CONVERSION */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   209
+static gchar *
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   210
+get_file_content (const gchar *path)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   211
+{
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   212
+	GFile		 *file;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   213
+	GFileInputStream *stream;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   214
+	GError		 *error = NULL;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   215
+	GString		 *s;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   216
+	gssize		  bytes;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   217
+	gssize		  bytes_valid;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   218
+	gssize		  bytes_read_total;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   219
+	gssize		  buf_size;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   220
+	gchar		  buf[TEXT_CHECK_SIZE];
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   221
+	gboolean	  has_more_data;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   222
+	gboolean	  has_reached_max;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   223
+	gboolean	  is_utf8;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   224
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   225
+	file = g_file_new_for_path (path);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   226
+	stream = g_file_read (file, NULL, &error);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   227
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   228
+	if (error) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   229
+		g_message ("Could not get read file:'%s', %s",
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   230
+			   path,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   231
+			   error->message);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   232
+		g_error_free (error);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   233
+		g_object_unref (file);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   234
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   235
+		return NULL;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   236
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   237
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   238
+	s = g_string_new ("");
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   239
+	has_reached_max = FALSE;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   240
+	has_more_data = TRUE;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   241
+	bytes_read_total = 0;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   242
+	buf_size = TEXT_CHECK_SIZE - 1;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   243
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   244
+	g_debug ("  Starting read...");
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   245
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   246
+	while (has_more_data && !has_reached_max && !error) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   247
+		gssize bytes_read;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   248
+		gssize bytes_remaining;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   249
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   250
+		/* Leave space for NULL termination and make sure we
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   251
+		 * add it at the end now.
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   252
+		 */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   253
+		bytes_remaining = buf_size;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   254
+		bytes_read = 0;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   255
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   256
+		/* Loop until we hit the maximum */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   257
+		for (bytes = -1; bytes != 0 && !error; ) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   258
+			bytes = g_input_stream_read (G_INPUT_STREAM (stream),
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   259
+						     buf,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   260
+						     bytes_remaining,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   261
+						     NULL,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   262
+						     &error);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   263
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   264
+			bytes_read += bytes;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   265
+			bytes_remaining -= bytes;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   266
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   267
+			g_debug ("  Read %" G_GSSIZE_FORMAT " bytes", bytes);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   268
+		}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   269
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   270
+		/* Set the NULL termination after the last byte read */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   271
+		buf[buf_size - bytes_remaining] = '\0';
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   272
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   273
+		/* First of all, check if this is the first time we
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   274
+		 * have tried to read the file up to the TEXT_CHECK_SIZE
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   275
+		 * limit. Then make sure that we read the maximum size
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   276
+		 * of the buffer. If we don't do this, there is the
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   277
+		 * case where we read 10 bytes in and it is just one
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   278
+		 * line with no '\n'. Once we have confirmed this we
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   279
+		 * check that the buffer has a '\n' to make sure the
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   280
+		 * file is worth indexing. Similarly if the file has
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   281
+		 * <= 3 bytes then we drop it.
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   282
+		 */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   283
+		if (bytes_read_total == 0) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   284
+			if (bytes_read == buf_size &&
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   285
+			    strchr (buf, '\n') == NULL) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   286
+				g_debug ("  No '\\n' in the first %" G_GSSIZE_FORMAT " bytes, not indexing file",
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   287
+					 buf_size);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   288
+				break;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   289
+			} else if (bytes_read <= 2) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   290
+				g_debug ("  File has less than 3 characters in it, not indexing file");
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   291
+				break;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   292
+			}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   293
+		}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   294
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   295
+		/* Here we increment the bytes read total to evaluate
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   296
+		 * the next states. We don't do this before the
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   297
+		 * previous condition so we can know when we have
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   298
+		 * iterated > 1.
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   299
+		 */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   300
+		bytes_read_total += bytes_read;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   301
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   302
+		if (bytes_read != buf_size || bytes_read == 0) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   303
+			has_more_data = FALSE;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   304
+		}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   305
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   306
+		if (bytes_read_total >= TEXT_MAX_SIZE) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   307
+			has_reached_max = TRUE;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   308
+		}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   309
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   310
+		g_debug ("  Read "
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   311
+			 "%" G_GSSIZE_FORMAT " bytes total, "
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   312
+			 "%" G_GSSIZE_FORMAT " bytes this time, "
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   313
+			 "more data:%s, reached max:%s",
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   314
+			 bytes_read_total,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   315
+			 bytes_read,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   316
+			 has_more_data ? "yes" : "no",
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   317
+			 has_reached_max ? "yes" : "no");
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   318
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   319
+		/* The + 1 is for the NULL terminating byte */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   320
+		s = g_string_append_len (s, buf, bytes_read + 1);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   321
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   322
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   323
+	if (has_reached_max) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   324
+		g_debug ("  Maximum indexable limit reached");
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   325
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   326
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   327
+	if (error) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   328
+		g_message ("Could not read input stream for:'%s', %s",
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   329
+			   path,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   330
+			   error->message);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   331
+		g_error_free (error);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   332
+		g_string_free (s, TRUE);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   333
+		g_object_unref (stream);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   334
+		g_object_unref (file);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   335
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   336
+		return NULL;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   337
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   338
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   339
+	/* Check for UTF-8 Validity, if not try to convert it to the
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   340
+	 * locale we are in.
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   341
+	 */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   342
+	is_utf8 = get_file_is_utf8 (s, &bytes_valid);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   343
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   344
+	/* Make sure the string is NULL terminated and in the case
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   345
+	 * where the string is valid UTF-8 up to the last character
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   346
+	 * which was cut off, NULL terminate to the last most valid
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   347
+	 * character.
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   348
+	 */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   349
+#ifdef TRY_LOCALE_TO_UTF8_CONVERSION
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   350
+	if (!is_utf8) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   351
+		s = get_file_in_locale (s);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   352
+	} else {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   353
+		g_debug ("  Truncating to last valid UTF-8 character (%d/%d bytes)",
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   354
+			 bytes_valid,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   355
+			 s->len);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   356
+		s = g_string_truncate (s, bytes_valid);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   357
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   358
+#else	/* TRY_LOCALE_TO_UTF8_CONVERSION */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   359
+	g_debug ("  Truncating to last valid UTF-8 character (%" G_GSSIZE_FORMAT "/%" G_GSSIZE_FORMAT " bytes)",
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   360
+		 bytes_valid,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   361
+		 s->len);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   362
+	s = g_string_truncate (s, bytes_valid);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   363
+#endif	/* TRY_LOCALE_TO_UTF8_CONVERSION */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   364
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   365
+	g_object_unref (stream);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   366
+	g_object_unref (file);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   367
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   368
+	if (s->len < 1) {
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   369
+		g_string_free (s, TRUE);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   370
+		s = NULL;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   371
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   372
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   373
+	return s ? g_string_free (s, FALSE) : NULL;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   374
+}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   375
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   376
+static TrackerModuleMetadata *
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   377
+tracker_web_file_get_metadata (TrackerModuleFile *file)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   378
+{
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   379
+	TrackerModuleMetadata *metadata = NULL;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   380
+	gchar *content_path, *metadata_path, *metadata_content;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   381
+	gchar *base_name, *dir_name;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   382
+	gchar **metadata_array;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   383
+	gchar **temp_array;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   384
+	guint len;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   385
+	struct stat st;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   386
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   387
+	content_path = g_file_get_path (tracker_module_file_get_file (file));
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   388
+	base_name = g_path_get_basename (content_path);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   389
+	if (g_strrstr (base_name, "firefox-xesam-web") == NULL){
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   390
+		g_free (content_path);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   391
+		g_free (base_name);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   392
+		return NULL;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   393
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   394
+	dir_name = g_path_get_dirname (content_path);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   395
+	metadata_path = g_strconcat  (dir_name, "/.", base_name, NULL);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   396
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   397
+	if (!g_file_test (metadata_path, G_FILE_TEST_EXISTS)){
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   398
+		g_free (content_path);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   399
+		g_free (metadata_path);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   400
+		return NULL;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   401
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   402
+	metadata_content = get_file_content (metadata_path);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   403
+	metadata_array = g_strsplit (metadata_content, "\n", -1);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   404
+	len = g_strv_length (metadata_array);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   405
+	if (len < 4){
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   406
+		g_debug (" Is Metadata file right? The file is %s\n", metadata_path);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   407
+		g_strfreev (metadata_array);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   408
+		g_free (metadata_path);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   409
+		g_free (content_path);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   410
+		return NULL;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   411
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   412
+	metadata = tracker_module_metadata_new ();
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   413
+	
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   414
+	tracker_module_metadata_add_string (metadata, METADATA_WEBHISTORY_URL, metadata_array[0]);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   415
+	tracker_module_metadata_add_string (metadata, METADATA_WEBHISTORY_TITLE, metadata_array[1]);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   416
+	tracker_module_metadata_add_string (metadata,METADATA_WEBHISTORY_PAGE_CONTENTTYPE, metadata_array[3]);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   417
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   418
+	temp_array = g_strsplit (metadata_array[4], "=", 2);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   419
+	if (g_strrstr (temp_array[0], "encoding") != NULL){
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   420
+		tracker_module_metadata_add_string (metadata,METADATA_WEBHISTORY_CHARACTERSET, temp_array[1]);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   421
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   422
+	g_strfreev (temp_array);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   423
+	if (len > 6){
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   424
+		temp_array = g_strsplit (metadata_array[5], "=", 2);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   425
+		if (g_strrstr (temp_array[0], "referrer") != NULL){
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   426
+			tracker_module_metadata_add_string (metadata, METADATA_WEBHISTORY_REFERRER, temp_array[1]);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   427
+		} 
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   428
+		if (g_strrstr (temp_array[0], "keyword") != NULL){
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   429
+			tracker_module_metadata_add_string (metadata, METADATA_WEBHISTORY_KEYWORD, temp_array[1]);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   430
+		}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   431
+		g_strfreev (temp_array);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   432
+		temp_array = g_strsplit (metadata_array[6], "=", 2);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   433
+		if (g_strrstr (temp_array[0], "keyword") != NULL){
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   434
+			tracker_module_metadata_add_string (metadata, METADATA_WEBHISTORY_KEYWORD, temp_array[1]);		
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   435
+		}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   436
+		g_strfreev (temp_array);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   437
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   438
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   439
+	if(g_lstat (content_path, &st) >=0){
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   440
+		tracker_module_metadata_add_date (metadata, METADATA_WEBHISTORY_ACCESSED, st.st_mtim.tv_nsec);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   441
+	}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   442
+        g_free (content_path);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   443
+	g_free (metadata_path);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   444
+	g_strfreev (metadata_array);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   445
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   446
+	return metadata;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   447
+}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   448
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   449
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   450
+void
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   451
+indexer_module_initialize (GTypeModule *module)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   452
+{
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   453
+        tracker_web_file_register_type (module);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   454
+}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   455
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   456
+void
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   457
+indexer_module_shutdown (void)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   458
+{
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   459
+}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   460
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   461
+TrackerModuleFile *
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   462
+indexer_module_create_file (GFile *file)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   463
+{
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   464
+        return g_object_new (TRACKER_TYPE_WEB_FILE,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   465
+                             "file", file,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   466
+                             NULL);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   467
+}
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   468
--- tracker-0.6.95-orig/data/services/Makefile.am	2009-07-02 13:37:17.846918000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   469
+++ tracker-0.6.95/data/services/Makefile.am	2009-06-11 16:16:47.880119000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   470
@@ -13,6 +13,7 @@
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   471
 	file.metadata 			\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   472
 	image.metadata 			\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   473
 	video.metadata 			\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   474
-	playlist.metadata		
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   475
+	playlist.metadata		\
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   476
+	webhistory.metadata	
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   477
 
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   478
 EXTRA_DIST = $(config_DATA)
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   479
--- tracker-0.6.95-orig/data/services/default.service	2009-07-02 13:37:17.847003000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   480
+++ tracker-0.6.95/data/services/default.service	2009-06-11 16:16:47.880203000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   481
@@ -52,6 +52,7 @@
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   482
 KeyMetadata2=Doc:URL
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   483
 KeyMetadata3=Doc:Keywords
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   484
 KeyMetadata4=User:Keywords
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   485
+KeyMetadata5=File:Accessed
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   486
 TabularMetadata=File:Name;File:Mime;Doc:Title;Doc:URL;Doc:Author;File:Size;File:Modified;Doc:Created;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   487
 TileMetadata=Doc:Title;Doc:URL;Doc:Subject;Doc:Author;Doc:Created;Doc:PageCount;File:Size;
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   488
 ShowServiceFiles=false
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   489
--- /dev/null	2009-07-02 13:16:20.000000000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   490
+++ tracker-0.6.95/data/services/webhistory.metadata	2009-06-11 16:16:47.880464000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   491
@@ -0,0 +1,56 @@
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   492
+[Doc:Title]
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   493
+DisplayName=Title
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   494
+Description=The title of the web page
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   495
+DataType=index
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   496
+Parent=DC:Title
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   497
+Weight=25
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   498
+Filtered=false
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   499
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   500
+[Doc:Keywords]
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   501
+DisplayName=Doc Keywords
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   502
+Description=keywords embedded in the web page
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   503
+DataType=keyword
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   504
+Parent=DC:Keywords
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   505
+Weight=15
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   506
+Filtered=false
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   507
+Delimited=true
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   508
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   509
+[User:Keywords]
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   510
+DisplayName=Keywords
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   511
+Description=keywords user sets
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   512
+DataType=keyword
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   513
+Weight=30
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   514
+Filtered=false
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   515
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   516
+[Web:PageContentType]
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   517
+DisplayName=Page Type
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   518
+Description=the type of the web page
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   519
+DataType=index
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   520
+Weight=1
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   521
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   522
+[Web:CharacterSet]
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   523
+DisplayName=Character Set
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   524
+Description=Number of words in the document
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   525
+DataType=index
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   526
+Weight=1
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   527
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   528
+[Web:Referrer]
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   529
+DisplayName=Referrer
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   530
+Description=The link referrer of this web page
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   531
+DataType=string
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   532
+Weight=1
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   533
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   534
+[Doc:URL]
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   535
+DisplayName=URL
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   536
+Description=URL to this web page
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   537
+DataType=index
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   538
+Weight=25
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   539
+Filtered=false
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   540
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   541
+[File:Accessed]
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   542
+DisplayName=Accessed
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   543
+Description=Last acessed date
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   544
+DataType=date
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   545
+Parent=DC:Date
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   546
+FieldName=Accessed
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   547
+
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   548
--- tracker-0.6.95-orig/src/libtracker-gtk/tracker-metadata-tile.c	2009-07-02 13:37:17.596886000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   549
+++ tracker-0.6.95/src/libtracker-gtk/tracker-metadata-tile.c	2009-06-11 16:16:45.052830000 +0800
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   550
@@ -166,6 +166,7 @@
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   551
 	"File:Size",
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   552
 	"File:Mime",
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   553
 	"Doc:Keywords",
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   554
+	"File:Accessed",
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   555
 	NULL
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   556
 };
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   557
 
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   558
@@ -175,6 +176,7 @@
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   559
 	WEBHISTORY_SIZE,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   560
 	WEBHISTORY_MIME,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   561
 	WEBHISTORY_KEYWORDS,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   562
+	WEBHISTORY_ACCESSED,
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   563
 	WEBHISTORY_N_KEYS
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   564
 };
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   565
 
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   566
@@ -678,11 +680,11 @@
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   567
 	priv = TRACKER_METADATA_TILE_GET_PRIVATE (tile);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   568
 
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   569
 	/* create title */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   570
-	_property_to_label ( priv->title, array[WEBHISTORY_URL] , "<span size='large'><b>%s</b></span>");
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   571
+	_property_to_label ( priv->title, array[WEBHISTORY_TITLE] , "<span size='large'><b>%s</b></span>");
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   572
 
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   573
 	/* then set the remaining properties */
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   574
-	_property_to_label ( priv->info1, array[WEBHISTORY_TITLE] , _("Subject : <b>%s</b>"));
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   575
-	_property_to_label ( priv->info2, array[WEBHISTORY_KEYWORDS] , "Keywords: <b>%s</b>");
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   576
+	_property_to_label ( priv->info1, array[WEBHISTORY_URL] , _("URL: <b>%s</b>"));
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   577
+	_date_to_label ( priv->info2, array[WEBHISTORY_ACCESSED] , _("Accessed :<b>%s</b>"));
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   578
 
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   579
 	tracker_metadata_tile_show (tile);
7800c41b1332 add patch to enable web history index
jerrytan
parents:
diff changeset
   580
 	g_strfreev (array);