Bjorn Bringert | d267065 | 2010-09-13 14:06:41 +0100 | [diff] [blame] | 1 | #!/usr/bin/python2.4 |
| 2 | # |
| 3 | # Copyright (C) 2010 The Android Open Source Project |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | # you may not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| 16 | # |
| 17 | """ |
| 18 | Creates the list of search engines |
| 19 | |
| 20 | The created list is placed in the res/values-<locale> directory. Also updates |
| 21 | res/values/all_search_engines.xml if required with new data. |
| 22 | |
| 23 | Usage: get_search_engines.py |
| 24 | |
| 25 | Copyright (C) 2010 The Android Open Source Project |
| 26 | """ |
| 27 | |
| 28 | import os |
| 29 | import re |
| 30 | import sys |
| 31 | import urllib |
| 32 | from xml.dom import minidom |
| 33 | |
| 34 | # Locales to generate search engine lists for |
| 35 | locales = ["cs-CZ", "da-DK", "de-AT", "de-CH", "de-DE", "el-GR", "en-AU", |
| 36 | "en-GB", "en-IE", "en-NZ", "en-SG", "en-ZA", "es-ES", "fr-BE", "fr-FR", |
| 37 | "it-IT", "ja-JP", "ko-KR", "nb-NO", "nl-BE", "nl-NL", "pl-PL", "pt-PT", |
| 38 | "pt-BR", "ru-RU", "sv-SE", "tr-TR", "zh-CN", "zh-HK", "zh-MO", "zh-TW"] |
| 39 | |
Bjorn Bringert | 4346564 | 2010-09-15 20:46:42 +0100 | [diff] [blame] | 40 | google_data = ["google", "Google", "google.com", |
| 41 | "http://www.google.com/favicon.ico", |
Victoria Lease | e5ce1bc | 2012-03-29 10:33:39 -0700 | [diff] [blame] | 42 | "http://www.google.com/search?ie={inputEncoding}&source=android-browser&q={searchTerms}", |
Bjorn Bringert | 4346564 | 2010-09-15 20:46:42 +0100 | [diff] [blame] | 43 | "UTF-8", |
Victoria Lease | e5ce1bc | 2012-03-29 10:33:39 -0700 | [diff] [blame] | 44 | "http://www.google.com/complete/search?client=android&q={searchTerms}"] |
Bjorn Bringert | 4346564 | 2010-09-15 20:46:42 +0100 | [diff] [blame] | 45 | |
Bjorn Bringert | d267065 | 2010-09-13 14:06:41 +0100 | [diff] [blame] | 46 | class SearchEngineManager(object): |
| 47 | """Manages list of search engines and creates locale specific lists. |
| 48 | |
| 49 | The main method useful for the caller is generateListForLocale(), which |
Bjorn Bringert | 534c607 | 2011-01-19 20:04:13 +0000 | [diff] [blame] | 50 | creates a locale specific donottranslate-search_engines.xml file. |
Bjorn Bringert | d267065 | 2010-09-13 14:06:41 +0100 | [diff] [blame] | 51 | """ |
| 52 | |
| 53 | def __init__(self): |
| 54 | """Inits SearchEngineManager with relevant search engine data. |
| 55 | |
| 56 | The search engine data is downloaded from the Chrome source repository. |
| 57 | """ |
| 58 | self.chrome_data = urllib.urlopen( |
| 59 | 'http://src.chromium.org/viewvc/chrome/trunk/src/chrome/' |
| 60 | 'browser/search_engines/template_url_prepopulate_data.cc').read() |
| 61 | if self.chrome_data.lower().find('repository not found') != -1: |
| 62 | print 'Unable to get Chrome source data for search engine list.\nExiting.' |
| 63 | sys.exit(2) |
| 64 | |
| 65 | self.resdir = os.path.normpath(os.path.join(sys.path[0], '../res')) |
| 66 | |
| 67 | self.all_engines = set() |
| 68 | |
| 69 | def getXmlString(self, str): |
| 70 | """Returns an XML-safe string for the given string. |
| 71 | |
| 72 | Given a string from the search engine data structure, convert it to a |
| 73 | string suitable to write to our XML data file by stripping away NULLs, |
| 74 | unwanted quotes, wide-string declarations (L"") and replacing C-style |
| 75 | unicode characters with XML equivalents. |
| 76 | """ |
| 77 | str = str.strip() |
| 78 | if str.upper() == 'NULL': |
| 79 | return '' |
| 80 | |
| 81 | if str.startswith('L"'): |
| 82 | str = str[2:] |
| 83 | if str.startswith('@') or str.startswith('?'): |
| 84 | str = '\\' + str |
| 85 | |
| 86 | str = str.strip('"') |
| 87 | str = str.replace('&', '&').replace('<', '<').replace('>', '>') |
| 88 | str = str.replace('"', '"').replace('\'', ''') |
Henrik Baard | fa6c2b5 | 2011-03-31 10:47:21 +0200 | [diff] [blame] | 89 | str = re.sub(r'\\x([a-fA-F0-9]{1,4})', r'&#x\1;', str) |
Bjorn Bringert | d267065 | 2010-09-13 14:06:41 +0100 | [diff] [blame] | 90 | |
| 91 | return str |
| 92 | |
| 93 | def getEngineData(self, name): |
| 94 | """Returns an array of strings describing the specified search engine. |
| 95 | |
| 96 | The returned strings are in the same order as in the Chrome source data file |
| 97 | except that the internal name of the search engine is inserted at the |
| 98 | beginning of the list. |
| 99 | """ |
Bjorn Bringert | 4346564 | 2010-09-15 20:46:42 +0100 | [diff] [blame] | 100 | |
| 101 | if name == "google": |
| 102 | return google_data |
| 103 | |
Bjorn Bringert | d267065 | 2010-09-13 14:06:41 +0100 | [diff] [blame] | 104 | # Find the first occurance of this search engine name in the form |
| 105 | # " <name> =" in the chrome data file. |
| 106 | re_exp = '\s' + name + '\s*=' |
| 107 | search_obj = re.search(re_exp, self.chrome_data) |
| 108 | if not search_obj: |
| 109 | print ('Unable to find data for search engine ' + name + |
| 110 | '. Please check the chrome data file for format changes.') |
| 111 | return None |
| 112 | |
| 113 | # Extract the struct declaration between the curly braces. |
| 114 | start_pos = self.chrome_data.find('{', search_obj.start()) + 1; |
| 115 | end_pos = self.chrome_data.find('};', start_pos); |
| 116 | engine_data_str = self.chrome_data[start_pos:end_pos] |
| 117 | |
| 118 | # Remove c++ style '//' comments at the ends of each line |
| 119 | engine_data_lines = engine_data_str.split('\n') |
| 120 | engine_data_str = "" |
| 121 | for line in engine_data_lines: |
| 122 | start_pos = line.find(' // ') |
| 123 | if start_pos != -1: |
| 124 | line = line[:start_pos] |
| 125 | engine_data_str = engine_data_str + line + '\n' |
| 126 | |
| 127 | # Join multiple line strings into a single string. |
| 128 | engine_data_str = re.sub('\"\s+\"', '', engine_data_str) |
| 129 | engine_data_str = re.sub('\"\s+L\"', '', engine_data_str) |
| 130 | engine_data_str = engine_data_str.replace('"L"', '') |
| 131 | |
| 132 | engine_data = engine_data_str.split(',') |
| 133 | for i in range(len(engine_data)): |
| 134 | engine_data[i] = self.getXmlString(engine_data[i]) |
| 135 | |
| 136 | # If the last element was an empty string (due to an extra comma at the |
| 137 | # end), ignore it. |
| 138 | if not engine_data[len(engine_data) - 1]: |
| 139 | engine_data.pop() |
| 140 | |
| 141 | engine_data.insert(0, name) |
| 142 | |
| 143 | return engine_data |
| 144 | |
| 145 | def getSearchEnginesForCountry(self, country): |
| 146 | """Returns the list of search engine names for the given country. |
| 147 | |
| 148 | The data comes from the Chrome data file. |
| 149 | """ |
| 150 | # The Chrome data file has an array defined with the name 'engines_XX' |
| 151 | # where XX = country. |
| 152 | pos = self.chrome_data.find('engines_' + country) |
| 153 | if pos == -1: |
| 154 | print ('Unable to find search engine data for country ' + country + '.') |
| 155 | return |
| 156 | |
| 157 | # Extract the text between the curly braces for this array declaration |
| 158 | engines_start = self.chrome_data.find('{', pos) + 1; |
| 159 | engines_end = self.chrome_data.find('}', engines_start); |
| 160 | engines_str = self.chrome_data[engines_start:engines_end] |
| 161 | |
| 162 | # Remove embedded /**/ style comments, white spaces, address-of operators |
| 163 | # and the trailing comma if any. |
| 164 | engines_str = re.sub('\/\*.+\*\/', '', engines_str) |
| 165 | engines_str = re.sub('\s+', '', engines_str) |
| 166 | engines_str = engines_str.replace('&','') |
| 167 | engines_str = engines_str.rstrip(',') |
| 168 | |
| 169 | # Split the array into it's elements |
| 170 | engines = engines_str.split(',') |
| 171 | |
| 172 | return engines |
| 173 | |
| 174 | def writeAllEngines(self): |
| 175 | """Writes all search engines to the all_search_engines.xml file. |
| 176 | """ |
| 177 | |
| 178 | all_search_engines_path = os.path.join(self.resdir, 'values/all_search_engines.xml') |
| 179 | |
| 180 | text = [] |
| 181 | |
| 182 | for engine_name in self.all_engines: |
| 183 | engine_data = self.getEngineData(engine_name) |
| 184 | text.append(' <string-array name="%s" translatable="false">\n' % (engine_data[0])) |
| 185 | for i in range(1, 7): |
| 186 | text.append(' <item>%s</item>\n' % (engine_data[i])) |
| 187 | text.append(' </string-array>\n') |
| 188 | print engine_data[1] + " added to all_search_engines.xml" |
| 189 | |
| 190 | self.generateXmlFromTemplate(os.path.join(sys.path[0], 'all_search_engines.template.xml'), |
| 191 | all_search_engines_path, text) |
| 192 | |
| 193 | def generateDefaultList(self): |
| 194 | self.writeEngineList(os.path.join(self.resdir, 'values'), "default") |
| 195 | |
| 196 | def generateListForLocale(self, locale): |
Bjorn Bringert | 534c607 | 2011-01-19 20:04:13 +0000 | [diff] [blame] | 197 | """Creates a new locale specific donottranslate-search_engines.xml file. |
Bjorn Bringert | d267065 | 2010-09-13 14:06:41 +0100 | [diff] [blame] | 198 | |
| 199 | The new file contains search engines specific to that country. If required |
| 200 | this function updates all_search_engines.xml file with any new search |
| 201 | engine data necessary. |
| 202 | """ |
| 203 | separator_pos = locale.find('-') |
| 204 | if separator_pos == -1: |
| 205 | print ('Locale must be of format <language>-<country>. For e.g.' |
| 206 | ' "es-US" or "en-GB"') |
| 207 | return |
| 208 | |
| 209 | language = locale[0:separator_pos] |
| 210 | country = locale[separator_pos + 1:].upper() |
| 211 | dir_path = os.path.join(self.resdir, 'values-' + language + '-r' + country) |
| 212 | |
| 213 | self.writeEngineList(dir_path, country) |
| 214 | |
| 215 | def writeEngineList(self, dir_path, country): |
| 216 | if os.path.exists(dir_path) and not os.path.isdir(dir_path): |
| 217 | print "File exists in output directory path " + dir_path + ". Please remove it and try again." |
| 218 | return |
| 219 | |
| 220 | engines = self.getSearchEnginesForCountry(country) |
| 221 | if not engines: |
| 222 | return |
| 223 | for engine in engines: |
| 224 | self.all_engines.add(engine) |
| 225 | |
| 226 | # Create the locale specific search_engines.xml file. Each |
| 227 | # search_engines.xml file has a hardcoded list of 7 items. If there are less |
| 228 | # than 7 search engines for this country, the remaining items are marked as |
| 229 | # enabled=false. |
| 230 | text = [] |
Danny Baumann | a56dba1 | 2013-08-02 16:08:01 +0200 | [diff] [blame] | 231 | text.append(' <string-array name="search_engines">\n'); |
Bjorn Bringert | d267065 | 2010-09-13 14:06:41 +0100 | [diff] [blame] | 232 | for engine in engines: |
| 233 | engine_data = self.getEngineData(engine) |
| 234 | name = engine_data[0] |
| 235 | text.append(' <item>%s</item>\n' % (name)) |
| 236 | text.append(' </string-array>\n'); |
| 237 | |
| 238 | self.generateXmlFromTemplate(os.path.join(sys.path[0], 'search_engines.template.xml'), |
Bjorn Bringert | 534c607 | 2011-01-19 20:04:13 +0000 | [diff] [blame] | 239 | os.path.join(dir_path, 'donottranslate-search_engines.xml'), |
Bjorn Bringert | d267065 | 2010-09-13 14:06:41 +0100 | [diff] [blame] | 240 | text) |
| 241 | |
| 242 | def generateXmlFromTemplate(self, template_path, out_path, text): |
| 243 | # Load the template file and insert the new contents before the last line. |
| 244 | template_text = open(template_path).read() |
| 245 | pos = template_text.rfind('\n', 0, -2) + 1 |
| 246 | contents = template_text[0:pos] + ''.join(text) + template_text[pos:] |
| 247 | |
| 248 | # Make sure what we have created is valid XML :) No need to check for errors |
| 249 | # as the script will terminate with an exception if the XML was malformed. |
| 250 | engines_dom = minidom.parseString(contents) |
| 251 | |
| 252 | dir_path = os.path.dirname(out_path) |
| 253 | if not os.path.exists(dir_path): |
| 254 | os.makedirs(dir_path) |
| 255 | print 'Created directory ' + dir_path |
| 256 | file = open(out_path, 'w') |
| 257 | file.write(contents) |
| 258 | file.close() |
| 259 | print 'Wrote ' + out_path |
| 260 | |
| 261 | if __name__ == "__main__": |
| 262 | manager = SearchEngineManager() |
| 263 | manager.generateDefaultList() |
| 264 | for locale in locales: |
| 265 | manager.generateListForLocale(locale) |
| 266 | manager.writeAllEngines() |