]> git.tdb.fi Git - poefilter.git/blob - scrape-item-data.py
Minor tweaks to the item scraping script
[poefilter.git] / scrape-item-data.py
1 #!/usr/bin/python3
2
3 import requests
4 import html.parser
5 import codecs
6 import argparse
7
8 class Item:
9         def __init__(self, name, kind):
10                 self.name = name
11                 self.kind = kind
12                 self.droplevel = 0
13                 self.mods = {}
14                 self.upgrades = {"attrs":None, "mods":None}
15
16         def is_compatible(self, other):
17                 return self.kind==other.kind
18
19         def compare_mods(self, other):
20                 result = 0
21                 for n, v in self.mods.items():
22                         ov = other.mods.get(n, 0)
23                         if v<ov:
24                                 return -1
25                         elif v>ov:
26                                 result = 1
27
28                 for on, ov in other.mods.items():
29                         v = self.mods.get(on, 0)
30                         if v<ov:
31                                 return -1
32                         elif v>ov:
33                                 result = 1
34                 
35                 return result
36
37         def compare_attrs(self, other):
38                 return 0
39
40         def check_upgrade(self, other):
41                 if not self.is_compatible(other):
42                         return
43
44                 mods = self.compare_mods(other)
45                 attrs = self.compare_attrs(other)
46
47                 if self.droplevel<other.droplevel and (mods<0 or attrs<0):
48                         return
49
50                 if not other.upgrades["mods"]:
51                         if mods>0 or (mods==0 and attrs>0):
52                                 other.upgrades["mods"] = self
53                 if not other.upgrades["attrs"]:
54                         if attrs>0 or (attrs==0 and mods>0):
55                                 other.upgrades["attrs"] = self
56
57 class Armor(Item):
58         def __init__(self, name, kind):
59                 super(Armor, self).__init__(name, kind)
60
61                 self.armor = 0
62                 self.evasion = 0
63                 self.energy_shield = 0
64
65         def is_compatible(self, other):
66                 if (self.armor!=0)!=(other.armor!=0):
67                         return False
68                 if (self.evasion!=0)!=(other.evasion!=0):
69                         return False
70                 if (self.energy_shield!=0)!=(other.energy_shield!=0):
71                         return False
72
73                 return super(Armor, self).is_compatible(other)
74
75         def compare_attrs(self, other):
76                 if self.armor<other.armor:
77                         return -1
78                 if self.evasion<other.evasion:
79                         return -1
80                 if self.energy_shield<other.energy_shield:
81                         return -1
82
83                 if self.armor>other.armor:
84                         return 1
85                 if self.evasion>other.evasion:
86                         return 1
87                 if self.energy_shield>other.energy_shield:
88                         return 1
89
90                 return 0
91
92 class Weapon(Item):
93         def __init__(self, name, kind):
94                 super(Weapon, self).__init__(name, kind)
95
96                 self.speed = 0
97                 self.dps = 0
98
99         def compare_attrs(self, other):
100                 if self.dps<other.dps:
101                         return -1
102                 if self.dps>other.dps:
103                         return 1
104
105                 return 0
106
107 class ItemDataParser(html.parser.HTMLParser):
108         def __init__(self):
109                 super(ItemDataParser, self).__init__()
110
111                 self.in_items_table = False
112                 self.column = 0
113                 self.in_cell = False
114                 self.items = []
115                 self.current_item = None
116                 self.in_heading = False
117                 self.headings = []
118                 self.current_heading = None
119                 self.mod_row = False
120                 self.mod_names = []
121                 self.mod_index = 0
122
123         def handle_starttag(self, tag, attrs):
124                 if tag=="table":
125                         for n, v in attrs:
126                                 if n=="class" and v=="itemDataTable":
127                                         self.in_items_table = True
128                 elif tag=="tr":
129                         self.mod_row = False
130                         for n, v in attrs:
131                                 if n=="class" and v.endswith("_mod"):
132                                         self.mod_row = True
133                         self.column = 0
134                         if not self.mod_row:
135                                 self.current_item = None
136                         self.mod_names = []
137                 elif tag=="td":
138                         self.column += 1
139                         if self.in_items_table:
140                                 self.in_cell = True
141                 elif tag=="h1":
142                         self.in_heading = True
143
144         def handle_endtag(self, tag):
145                 if tag=="table":
146                         self.in_items_table = False
147                 elif tag=="td":
148                         self.in_cell = False
149                 elif tag=="tr":
150                         if self.current_item and self.mod_row:
151                                 for it in self.items:
152                                         self.current_item.check_upgrade(it)
153                 elif tag=="h1":
154                         self.in_heading = False
155                 elif tag=="html":
156                         for i in range(1, len(self.items)):
157                                 for j in range(i):
158                                         self.items[j].check_upgrade(self.items[i])
159
160         def handle_data(self, data):
161                 data = data.strip()
162                 if not data:
163                         return
164
165                 if self.in_heading:
166                         self.current_heading = data
167                         if self.current_heading=="Staff":
168                                 self.current_heading = "Stave"
169                         self.headings.append(self.current_heading)
170                 elif self.in_cell:
171                         if self.mod_row:
172                                 if self.column==1:
173                                         self.mod_names.append(data)
174                                         self.mod_index = 0
175                                 elif self.column==2:
176                                         if " to " in data:
177                                                 value = int(data.split(" to ", 1)[1])
178                                         else:
179                                                 value = int(data)
180                                         name = self.mod_names[self.mod_index]
181                                         if name!="From Armour Movement Speed +%":
182                                                 self.current_item.mods[name] = value
183                                         self.mod_index += 1
184                         else:
185                                 if self.column==2:
186                                         self.current_item = self.create_item(data, self.current_heading)
187                                         self.items.append(self.current_item)
188                                 elif self.column==3:
189                                         self.current_item.droplevel = int(data)
190                                 elif self.column>=4:
191                                         self.handle_value(self.column, data)
192
193         def create_item(self, name, kind):
194                 pass
195
196         def handle_value(self, column, data):
197                 pass
198
199 class ArmorDataParser(ItemDataParser):
200         def create_item(self, name, kind):
201                 return Armor(name, kind)
202
203         def handle_value(self, column, data):
204                 if column==4:
205                         self.current_item.armor = int(data)
206                 elif column==5:
207                         self.current_item.evasion = int(data)
208                 elif column==6:
209                         self.current_item.energy_shield = int(data)
210
211 class WeaponDataParser(ItemDataParser):
212         def create_item(self, name, kind):
213                 return Weapon(name, kind)
214
215         def handle_value(self, column, data):
216                 if column==5:
217                         self.current_item.speed = float(data)
218                 elif column==6:
219                         self.current_item.dps = float(data)
220
221 def get_upgrade_level(item, steps):
222         level = 0
223         for p in item.upgrades.keys():
224                 upgrade = item
225                 for i in range(steps):
226                         upgrade = upgrade.upgrades.get(p)
227                         if not upgrade:
228                                 return 0
229                 level = max(level, upgrade.droplevel)
230         return level
231
232 def write_best_category(out, prefix, items, steps):
233         best = ["best", "second"]
234
235         out.write('category "{}.{}_at_level"\n'.format(prefix, best[steps-1]))
236         out.write("{\n\tor\n\t{\n")
237         for it in items:
238                 upgrade_level = get_upgrade_level(it, steps)
239                 out.write("\t\tand\n\t\t{\n")
240                 if upgrade_level:
241                         out.write('\t\t\tbase_type "{}";\n'.format(it.name))
242                         out.write("\t\t\titem_level {} {};\n".format(it.droplevel, upgrade_level-1))
243                 else:
244                         out.write('\t\t\tbase_type "{}";\n'.format(it.name))
245                         out.write("\t\t\tmin_item_level {};\n".format(it.droplevel))
246                 out.write("\t\t};\n")
247         out.write("\t};\n};\n")
248
249 def print_debug(items):
250         upgrades = set()
251         for it in items:
252                 for u in it.upgrades.values():
253                         if u:
254                                 upgrades.add(u.name)
255
256         printed = set()
257
258         queue = []
259         while 1:
260                 for it in items:
261                         if it.name not in upgrades and it.name not in printed:
262                                 queue.append(it)
263                                 break
264
265                 if not queue:
266                         break
267
268                 while queue:
269                         it = queue.pop(0)
270                         if it.name in printed:
271                                 continue
272                         printed.add(it.name)
273
274                         print(it.name)
275                         print("  level: {}".format(it.droplevel))
276                         if isinstance(it, Armor):
277                                 print("  armor: {}".format(it.armor))
278                                 print("  evasion: {}".format(it.evasion))
279                                 print("  energy shield: {}".format(it.energy_shield))
280                         elif isinstance(it, Weapon):
281                                 print("  dps: {}".format(it.dps))
282                         for n, v in it.mods.items():
283                                 print("  {}: {}".format(n, v))
284                         for t, u in it.upgrades.items():
285                                 if u:
286                                         print("  upgrade {}: {}".format(t, u.name))
287                                         if u.name not in printed:
288                                                 queue.insert(0, u)
289
290 def main():
291         parser = argparse.ArgumentParser()
292         parser.add_argument("-g", "--debug", action="store_true", dest="debug")
293         args = parser.parse_args()
294
295         r = requests.get("https://www.pathofexile.com/item-data/armour")
296         p = ArmorDataParser()
297         p.feed(codecs.decode(r.content, r.encoding))
298
299         if args.debug:
300                 print_debug(p.items)
301
302         types = {"robe": lambda i: (not i.armor and not i.evasion and i.energy_shield),
303                 "cloth": lambda i: (not i.armor and i.evasion and i.energy_shield),
304                 "leather": lambda i: (not i.armor and i.evasion and not i.energy_shield),
305                 "scale": lambda i: (i.armor and i.evasion and not i.energy_shield),
306                 "plate": lambda i: (i.armor and not i.evasion and not i.energy_shield),
307                 "chain": lambda i: (i.armor and not i.evasion and i.energy_shield)}
308
309         out = open("armor.txt", "w")
310
311         for t, f in types.items():
312                 items = [i for i in p.items if f(i)]
313
314                 for i in range(1, 3):
315                         write_best_category(out, "armor.{}".format(t), items, i)
316
317                 out.write('category "armor.{}"\n'.format(t))
318                 out.write("{\n\tor\n\t{\n")
319                 for it in items:
320                         out.write('\t\tbase_type "{}";\n'.format(it.name))
321                 out.write("\t};\n};\n")
322
323         for b in ("best", "second"):
324                 out.write('category "armor.{}_at_level"\n'.format(b))
325                 out.write('{\n\tor\n\t{\n')
326                 for t in types.keys():
327                         out.write('\t\tcategory "armor.{}.{}_at_level";\n'.format(t, b))
328                 out.write("\t};\n};\n")
329
330         out.write('category "armor"\n{\n\tor\n\t{\n')
331         for h in p.headings:
332                 out.write('\t\tclass "{}";\n'.format(h))
333         out.write("\t};\n};\n")
334
335         r = requests.get("https://www.pathofexile.com/item-data/weapon")
336         p = WeaponDataParser()
337         p.feed(codecs.decode(r.content, r.encoding))
338
339         if args.debug:
340                 print_debug(p.items)
341
342         out = open("weapons.txt", "w")
343
344         for h in p.headings:
345                 items = [i for i in p.items if i.kind==h]
346
347                 for i in range(1, 3):
348                         write_best_category(out, "weapon.{}".format(h.lower().replace(' ', '_')), items, i)
349
350         for b in ("best", "second"):
351                 out.write('category "weapon.{}_at_level"\n'.format(b))
352                 out.write('{\n\tor\n\t{\n')
353                 for h in p.headings:
354                         out.write('\t\tcategory "weapon.{}.{}_at_level";\n'.format(h.lower().replace(' ', '_'), b))
355                 out.write("\t};\n};\n")
356
357         out.write('category "weapon"\n{\n\tor\n\t{\n')
358         for h in p.headings:
359                 out.write('\t\tclass "{}";\n'.format(h))
360         out.write("\t};\n};\n")
361
362 if __name__=="__main__":
363         main()