]> git.tdb.fi Git - poefilter.git/blob - scrape-item-data.py
f5f67b488593515d4b708d3d30351d5df0cd9170
[poefilter.git] / scrape-item-data.py
1 #!/usr/bin/python3
2
3 import requests
4 import html.parser
5 import codecs
6 import argparse
7
8 class Item:
9         def __init__(self, name, kind):
10                 self.name = name
11                 self.kind = kind
12                 self.droplevel = 0
13                 self.mods = {}
14                 self.upgrades = {"attrs":None, "mods":None}
15
16         def is_compatible(self, other):
17                 return self.kind==other.kind
18
19         def compare_mods(self, other):
20                 result = 0
21                 for n, v in self.mods.items():
22                         ov = other.mods.get(n, 0)
23                         if v<ov:
24                                 return -1
25                         elif v>ov:
26                                 result = 1
27
28                 for on, ov in other.mods.items():
29                         v = self.mods.get(on, 0)
30                         if v<ov:
31                                 return -1
32                         elif v>ov:
33                                 result = 1
34                 
35                 return result
36
37         def compare_attrs(self, other):
38                 return 0
39
40         def check_upgrade(self, other):
41                 if self.droplevel<=other.droplevel:
42                         return
43                 if not self.is_compatible(other):
44                         return
45
46                 mods = self.compare_mods(other)
47                 attrs = self.compare_attrs(other)
48
49                 if not other.upgrades["mods"]:
50                         if mods>0 or (mods==0 and attrs>0):
51                                 other.upgrades["mods"] = self
52                 if not other.upgrades["attrs"]:
53                         if attrs>0 or (attrs==0 and mods>0):
54                                 other.upgrades["attrs"] = self
55
56 class Armor(Item):
57         def __init__(self, name, kind):
58                 super(Armor, self).__init__(name, kind)
59
60                 self.armor = 0
61                 self.evasion = 0
62                 self.energy_shield = 0
63
64         def is_compatible(self, other):
65                 if (self.armor!=0)!=(other.armor!=0):
66                         return False
67                 if (self.evasion!=0)!=(other.evasion!=0):
68                         return False
69                 if (self.energy_shield!=0)!=(other.energy_shield!=0):
70                         return False
71
72                 return super(Armor, self).is_compatible(other)
73
74         def compare_attrs(self, other):
75                 if self.armor<other.armor:
76                         return -1
77                 if self.evasion<other.evasion:
78                         return -1
79                 if self.energy_shield<other.energy_shield:
80                         return -1
81
82                 if self.armor>other.armor:
83                         return 1
84                 if self.evasion>other.evasion:
85                         return 1
86                 if self.energy_shield>other.energy_shield:
87                         return 1
88
89                 return 0
90
91 class Weapon(Item):
92         def __init__(self, name, kind):
93                 super(Weapon, self).__init__(name, kind)
94
95                 self.speed = 0
96                 self.dps = 0
97
98         def compare_attrs(self, other):
99                 if self.dps<other.dps:
100                         return -1
101                 if self.dps>other.dps:
102                         return 1
103
104                 return 0
105
106 class ItemDataParser(html.parser.HTMLParser):
107         def __init__(self):
108                 super(ItemDataParser, self).__init__()
109
110                 self.in_items_table = False
111                 self.column = 0
112                 self.in_cell = False
113                 self.items = []
114                 self.current_item = None
115                 self.in_heading = False
116                 self.headings = []
117                 self.current_heading = None
118                 self.mod_row = False
119                 self.mod_names = []
120                 self.mod_index = 0
121
122         def handle_starttag(self, tag, attrs):
123                 if tag=="table":
124                         for n, v in attrs:
125                                 if n=="class" and v=="itemDataTable":
126                                         self.in_items_table = True
127                 elif tag=="tr":
128                         self.mod_row = False
129                         for n, v in attrs:
130                                 if n=="class" and v.endswith("_mod"):
131                                         self.mod_row = True
132                         self.column = 0
133                         if not self.mod_row:
134                                 self.current_item = None
135                         self.mod_names = []
136                 elif tag=="td":
137                         self.column += 1
138                         self.in_cell = True
139                 elif tag=="h1":
140                         self.in_heading = True
141
142         def handle_endtag(self, tag):
143                 if tag=="table":
144                         self.in_items_table = False
145                 elif tag=="td":
146                         self.in_cell = False
147                 elif tag=="tr":
148                         if self.current_item and self.mod_row:
149                                 for it in self.items:
150                                         self.current_item.check_upgrade(it)
151                 elif tag=="h1":
152                         self.in_heading = False
153
154         def handle_data(self, data):
155                 data = data.strip()
156                 if not data:
157                         return
158
159                 if self.in_heading:
160                         self.current_heading = data
161                         self.headings.append(self.current_heading)
162                 elif self.in_items_table and self.in_cell:
163                         if self.mod_row:
164                                 if self.column==1:
165                                         self.mod_names.append(data)
166                                         self.mod_index = 0
167                                 elif self.column==2:
168                                         if " to " in data:
169                                                 value = int(data.split(" to ", 1)[1])
170                                         else:
171                                                 value = int(data)
172                                         name = self.mod_names[self.mod_index]
173                                         if name!="From Armour Movement Speed +%":
174                                                 self.current_item.mods[name] = value
175                                         self.mod_index += 1
176                         else:
177                                 if self.column==2:
178                                         self.current_item = self.create_item(data, self.current_heading)
179                                         self.items.append(self.current_item)
180                                 elif self.column==3:
181                                         self.current_item.droplevel = int(data)
182                                 elif self.column>=4:
183                                         self.handle_value(self.column, data)
184
185         def create_item(self, name, kind):
186                 pass
187
188         def handle_value(self, column, data):
189                 pass
190
191 class ArmorDataParser(ItemDataParser):
192         def create_item(self, name, kind):
193                 return Armor(name, kind)
194
195         def handle_value(self, column, data):
196                 if column==4:
197                         self.current_item.armor = int(data)
198                 elif column==5:
199                         self.current_item.evasion = int(data)
200                 elif column==6:
201                         self.current_item.energy_shield = int(data)
202
203 class WeaponDataParser(ItemDataParser):
204         def create_item(self, name, kind):
205                 return Weapon(name, kind)
206
207         def handle_value(self, column, data):
208                 if column==5:
209                         self.current_item.speed = float(data)
210                 elif column==6:
211                         self.current_item.dps = float(data)
212
213 def get_upgrade_level(item, steps):
214         level = 0
215         for p in item.upgrades.keys():
216                 upgrade = item
217                 for i in range(steps):
218                         upgrade = upgrade.upgrades.get(p)
219                         if not upgrade:
220                                 return 0
221                 level = max(level, upgrade.droplevel)
222         return level
223
224 def write_best_category(out, prefix, items, steps):
225         best = ["best", "second"]
226
227         out.write('category "{}.{}_at_level"\n'.format(prefix, best[steps-1]))
228         out.write("{\n\tor\n\t{\n")
229         for it in items:
230                 upgrade_level = get_upgrade_level(it, steps)
231                 out.write("\t\tand\n\t\t{\n")
232                 if upgrade_level:
233                         out.write('\t\t\tbase_type "{}";\n'.format(it.name))
234                         out.write("\t\t\titem_level {} {};\n".format(it.droplevel, upgrade_level-1))
235                 else:
236                         out.write('\t\t\tbase_type "{}";\n'.format(it.name))
237                         out.write("\t\t\tmin_item_level {};\n".format(it.droplevel))
238                 out.write("\t\t};\n")
239         out.write("\t};\n};\n")
240
241 def print_debug(items):
242         upgrades = set()
243         for it in items:
244                 for u in it.upgrades.values():
245                         if u:
246                                 upgrades.add(u.name)
247
248         printed = set()
249
250         queue = []
251         while 1:
252                 for it in items:
253                         if it.name not in upgrades and it.name not in printed:
254                                 queue.append(it)
255                                 break
256
257                 if not queue:
258                         break
259
260                 while queue:
261                         it = queue.pop(0)
262                         if it.name in printed:
263                                 continue
264                         printed.add(it.name)
265
266                         print(it.name)
267                         print("  level: {}".format(it.droplevel))
268                         if isinstance(it, Armor):
269                                 print("  armor: {}".format(it.armor))
270                                 print("  evasion: {}".format(it.evasion))
271                                 print("  energy shield: {}".format(it.energy_shield))
272                         elif isinstance(it, Weapon):
273                                 print("  dps: {}".format(it.dps))
274                         for n, v in it.mods.items():
275                                 print("  {}: {}".format(n, v))
276                         for t, u in it.upgrades.items():
277                                 if u:
278                                         print("  upgrade {}: {}".format(t, u.name))
279                                         if u.name not in printed:
280                                                 queue.insert(0, u)
281
282 def main():
283         parser = argparse.ArgumentParser()
284         parser.add_argument("-g", "--debug", action="store_true", dest="debug")
285         args = parser.parse_args()
286
287         r = requests.get("https://www.pathofexile.com/item-data/armour")
288         p = ArmorDataParser()
289         p.feed(codecs.decode(r.content, r.encoding))
290
291         if args.debug:
292                 print_debug(p.items)
293
294         types = {"robe": lambda i: (not i.armor and not i.evasion and i.energy_shield),
295                 "cloth": lambda i: (not i.armor and i.evasion and i.energy_shield),
296                 "leather": lambda i: (not i.armor and i.evasion and not i.energy_shield),
297                 "scale": lambda i: (i.armor and i.evasion and not i.energy_shield),
298                 "plate": lambda i: (i.armor and not i.evasion and not i.energy_shield),
299                 "chain": lambda i: (i.armor and not i.evasion and i.energy_shield)}
300
301         out = open("armor.txt", "w")
302
303         for t, f in types.items():
304                 items = [i for i in p.items if f(i)]
305
306                 for i in range(1, 3):
307                         write_best_category(out, "armor.{}".format(t), items, i)
308
309                 out.write('category "armor.{}"\n'.format(t))
310                 out.write("{\n\tor\n\t{\n")
311                 for it in items:
312                         out.write('\t\tbase_type "{}";\n'.format(it.name))
313                 out.write("\t};\n};\n")
314
315         out.write('category "armor"\n{\n\tor\n\t{\n')
316         for h in p.headings:
317                 out.write('\t\tclass "{}";\n'.format(h))
318         out.write("\t};\n};\n")
319
320         r = requests.get("https://www.pathofexile.com/item-data/weapon")
321         p = WeaponDataParser()
322         p.feed(codecs.decode(r.content, r.encoding))
323
324         if args.debug:
325                 print_debug(p.items)
326
327         out = open("weapons.txt", "w")
328
329         for h in p.headings:
330                 items = [i for i in p.items if i.kind==h]
331
332                 for i in range(1, 3):
333                         write_best_category(out, "weapon.{}".format(h.lower().replace(' ', '_')), items, i)
334
335         out.write('category "weapon"\n{\n\tor\n\t{\n')
336         for h in p.headings:
337                 out.write('\t\tclass "{}";\n'.format(h))
338         out.write("\t};\n};\n")
339
340 if __name__=="__main__":
341         main()