blob: 432d90923f3fdb25888dc996422f05e7e127145a [file] [log] [blame]
Bjorn Bringertd2670652010-09-13 14:06:41 +01001#!/usr/bin/python2.4
2#
3# Copyright (C) 2010 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17"""
18Creates the list of search engines
19
20The created list is placed in the res/values-<locale> directory. Also updates
21res/values/all_search_engines.xml if required with new data.
22
23Usage: get_search_engines.py
24
25Copyright (C) 2010 The Android Open Source Project
26"""
27
28import os
29import re
30import sys
31import urllib
32from xml.dom import minidom
33
34# Locales to generate search engine lists for
35locales = ["cs-CZ", "da-DK", "de-AT", "de-CH", "de-DE", "el-GR", "en-AU",
36 "en-GB", "en-IE", "en-NZ", "en-SG", "en-ZA", "es-ES", "fr-BE", "fr-FR",
37 "it-IT", "ja-JP", "ko-KR", "nb-NO", "nl-BE", "nl-NL", "pl-PL", "pt-PT",
38 "pt-BR", "ru-RU", "sv-SE", "tr-TR", "zh-CN", "zh-HK", "zh-MO", "zh-TW"]
39
Bjorn Bringert43465642010-09-15 20:46:42 +010040google_data = ["google", "Google", "google.com",
41 "http://www.google.com/favicon.ico",
Victoria Leasee5ce1bc2012-03-29 10:33:39 -070042 "http://www.google.com/search?ie={inputEncoding}&amp;source=android-browser&amp;q={searchTerms}",
Bjorn Bringert43465642010-09-15 20:46:42 +010043 "UTF-8",
Victoria Leasee5ce1bc2012-03-29 10:33:39 -070044 "http://www.google.com/complete/search?client=android&amp;q={searchTerms}"]
Bjorn Bringert43465642010-09-15 20:46:42 +010045
Bjorn Bringertd2670652010-09-13 14:06:41 +010046class SearchEngineManager(object):
47 """Manages list of search engines and creates locale specific lists.
48
49 The main method useful for the caller is generateListForLocale(), which
Bjorn Bringert534c6072011-01-19 20:04:13 +000050 creates a locale specific donottranslate-search_engines.xml file.
Bjorn Bringertd2670652010-09-13 14:06:41 +010051 """
52
53 def __init__(self):
54 """Inits SearchEngineManager with relevant search engine data.
55
56 The search engine data is downloaded from the Chrome source repository.
57 """
58 self.chrome_data = urllib.urlopen(
59 'http://src.chromium.org/viewvc/chrome/trunk/src/chrome/'
60 'browser/search_engines/template_url_prepopulate_data.cc').read()
61 if self.chrome_data.lower().find('repository not found') != -1:
62 print 'Unable to get Chrome source data for search engine list.\nExiting.'
63 sys.exit(2)
64
65 self.resdir = os.path.normpath(os.path.join(sys.path[0], '../res'))
66
67 self.all_engines = set()
68
69 def getXmlString(self, str):
70 """Returns an XML-safe string for the given string.
71
72 Given a string from the search engine data structure, convert it to a
73 string suitable to write to our XML data file by stripping away NULLs,
74 unwanted quotes, wide-string declarations (L"") and replacing C-style
75 unicode characters with XML equivalents.
76 """
77 str = str.strip()
78 if str.upper() == 'NULL':
79 return ''
80
81 if str.startswith('L"'):
82 str = str[2:]
83 if str.startswith('@') or str.startswith('?'):
84 str = '\\' + str
85
86 str = str.strip('"')
87 str = str.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
88 str = str.replace('"', '&quot;').replace('\'', '&apos;')
Henrik Baardfa6c2b52011-03-31 10:47:21 +020089 str = re.sub(r'\\x([a-fA-F0-9]{1,4})', r'&#x\1;', str)
Bjorn Bringertd2670652010-09-13 14:06:41 +010090
91 return str
92
93 def getEngineData(self, name):
94 """Returns an array of strings describing the specified search engine.
95
96 The returned strings are in the same order as in the Chrome source data file
97 except that the internal name of the search engine is inserted at the
98 beginning of the list.
99 """
Bjorn Bringert43465642010-09-15 20:46:42 +0100100
101 if name == "google":
102 return google_data
103
Bjorn Bringertd2670652010-09-13 14:06:41 +0100104 # Find the first occurance of this search engine name in the form
105 # " <name> =" in the chrome data file.
106 re_exp = '\s' + name + '\s*='
107 search_obj = re.search(re_exp, self.chrome_data)
108 if not search_obj:
109 print ('Unable to find data for search engine ' + name +
110 '. Please check the chrome data file for format changes.')
111 return None
112
113 # Extract the struct declaration between the curly braces.
114 start_pos = self.chrome_data.find('{', search_obj.start()) + 1;
115 end_pos = self.chrome_data.find('};', start_pos);
116 engine_data_str = self.chrome_data[start_pos:end_pos]
117
118 # Remove c++ style '//' comments at the ends of each line
119 engine_data_lines = engine_data_str.split('\n')
120 engine_data_str = ""
121 for line in engine_data_lines:
122 start_pos = line.find(' // ')
123 if start_pos != -1:
124 line = line[:start_pos]
125 engine_data_str = engine_data_str + line + '\n'
126
127 # Join multiple line strings into a single string.
128 engine_data_str = re.sub('\"\s+\"', '', engine_data_str)
129 engine_data_str = re.sub('\"\s+L\"', '', engine_data_str)
130 engine_data_str = engine_data_str.replace('"L"', '')
131
132 engine_data = engine_data_str.split(',')
133 for i in range(len(engine_data)):
134 engine_data[i] = self.getXmlString(engine_data[i])
135
136 # If the last element was an empty string (due to an extra comma at the
137 # end), ignore it.
138 if not engine_data[len(engine_data) - 1]:
139 engine_data.pop()
140
141 engine_data.insert(0, name)
142
143 return engine_data
144
145 def getSearchEnginesForCountry(self, country):
146 """Returns the list of search engine names for the given country.
147
148 The data comes from the Chrome data file.
149 """
150 # The Chrome data file has an array defined with the name 'engines_XX'
151 # where XX = country.
152 pos = self.chrome_data.find('engines_' + country)
153 if pos == -1:
154 print ('Unable to find search engine data for country ' + country + '.')
155 return
156
157 # Extract the text between the curly braces for this array declaration
158 engines_start = self.chrome_data.find('{', pos) + 1;
159 engines_end = self.chrome_data.find('}', engines_start);
160 engines_str = self.chrome_data[engines_start:engines_end]
161
162 # Remove embedded /**/ style comments, white spaces, address-of operators
163 # and the trailing comma if any.
164 engines_str = re.sub('\/\*.+\*\/', '', engines_str)
165 engines_str = re.sub('\s+', '', engines_str)
166 engines_str = engines_str.replace('&','')
167 engines_str = engines_str.rstrip(',')
168
169 # Split the array into it's elements
170 engines = engines_str.split(',')
171
172 return engines
173
174 def writeAllEngines(self):
175 """Writes all search engines to the all_search_engines.xml file.
176 """
177
178 all_search_engines_path = os.path.join(self.resdir, 'values/all_search_engines.xml')
179
180 text = []
181
182 for engine_name in self.all_engines:
183 engine_data = self.getEngineData(engine_name)
184 text.append(' <string-array name="%s" translatable="false">\n' % (engine_data[0]))
185 for i in range(1, 7):
186 text.append(' <item>%s</item>\n' % (engine_data[i]))
187 text.append(' </string-array>\n')
188 print engine_data[1] + " added to all_search_engines.xml"
189
190 self.generateXmlFromTemplate(os.path.join(sys.path[0], 'all_search_engines.template.xml'),
191 all_search_engines_path, text)
192
193 def generateDefaultList(self):
194 self.writeEngineList(os.path.join(self.resdir, 'values'), "default")
195
196 def generateListForLocale(self, locale):
Bjorn Bringert534c6072011-01-19 20:04:13 +0000197 """Creates a new locale specific donottranslate-search_engines.xml file.
Bjorn Bringertd2670652010-09-13 14:06:41 +0100198
199 The new file contains search engines specific to that country. If required
200 this function updates all_search_engines.xml file with any new search
201 engine data necessary.
202 """
203 separator_pos = locale.find('-')
204 if separator_pos == -1:
205 print ('Locale must be of format <language>-<country>. For e.g.'
206 ' "es-US" or "en-GB"')
207 return
208
209 language = locale[0:separator_pos]
210 country = locale[separator_pos + 1:].upper()
211 dir_path = os.path.join(self.resdir, 'values-' + language + '-r' + country)
212
213 self.writeEngineList(dir_path, country)
214
215 def writeEngineList(self, dir_path, country):
216 if os.path.exists(dir_path) and not os.path.isdir(dir_path):
217 print "File exists in output directory path " + dir_path + ". Please remove it and try again."
218 return
219
220 engines = self.getSearchEnginesForCountry(country)
221 if not engines:
222 return
223 for engine in engines:
224 self.all_engines.add(engine)
225
226 # Create the locale specific search_engines.xml file. Each
227 # search_engines.xml file has a hardcoded list of 7 items. If there are less
228 # than 7 search engines for this country, the remaining items are marked as
229 # enabled=false.
230 text = []
Danny Baumanna56dba12013-08-02 16:08:01 +0200231 text.append(' <string-array name="search_engines">\n');
Bjorn Bringertd2670652010-09-13 14:06:41 +0100232 for engine in engines:
233 engine_data = self.getEngineData(engine)
234 name = engine_data[0]
235 text.append(' <item>%s</item>\n' % (name))
236 text.append(' </string-array>\n');
237
238 self.generateXmlFromTemplate(os.path.join(sys.path[0], 'search_engines.template.xml'),
Bjorn Bringert534c6072011-01-19 20:04:13 +0000239 os.path.join(dir_path, 'donottranslate-search_engines.xml'),
Bjorn Bringertd2670652010-09-13 14:06:41 +0100240 text)
241
242 def generateXmlFromTemplate(self, template_path, out_path, text):
243 # Load the template file and insert the new contents before the last line.
244 template_text = open(template_path).read()
245 pos = template_text.rfind('\n', 0, -2) + 1
246 contents = template_text[0:pos] + ''.join(text) + template_text[pos:]
247
248 # Make sure what we have created is valid XML :) No need to check for errors
249 # as the script will terminate with an exception if the XML was malformed.
250 engines_dom = minidom.parseString(contents)
251
252 dir_path = os.path.dirname(out_path)
253 if not os.path.exists(dir_path):
254 os.makedirs(dir_path)
255 print 'Created directory ' + dir_path
256 file = open(out_path, 'w')
257 file.write(contents)
258 file.close()
259 print 'Wrote ' + out_path
260
261if __name__ == "__main__":
262 manager = SearchEngineManager()
263 manager.generateDefaultList()
264 for locale in locales:
265 manager.generateListForLocale(locale)
266 manager.writeAllEngines()