223 lines
8.1 KiB
Python
223 lines
8.1 KiB
Python
import xml.etree.ElementTree as ET
|
|
from unidecode import unidecode #### ! ! ! pip install Unidecode ! ! ! !
|
|
from datetime import datetime
|
|
import time
|
|
|
|
# this code processes sms messages from locally exported xml table from "sms backup & restore" app on android
|
|
# pouzito na scitani sms hlasu o GBL akademii v roce 2024 poradane septimou a 3A, nasimi tridami, snad se povede
|
|
|
|
# ANO, vim ze by se to dalo napsat lepe a ze je v tom trochu bordel, ale funguje to a ucel to snad splni :))
|
|
# (jeste se dale v sciptru vymlouvam, proc je to napsane, jak to je)
|
|
|
|
|
|
|
|
|
|
|
|
##############################################################################################
|
|
# ! IMPORTANT SETTINGS ! #####################################################################
|
|
|
|
soubor = 'sms_export/sms-20241212163727.xml'
|
|
time_after = 1 # cas po kterem se sms budou pocitat, pred kterym se vyfiltruji
|
|
|
|
# + spravne nastavit kandidati o par radku nize!!!!###########################################
|
|
##############################################################################################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# output file, do ktere se zapisou veci na konci scriptu
|
|
ted = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
|
|
zaznam_file = f"zaznamy/bezec_{ted}.txt"
|
|
|
|
# hledane vyrazy v smskach = kandidati, dictionary s kandidatem a poctem hlasu pro kandidata
|
|
kandidati = {"prima" : 0,
|
|
"sekunda" : 0,
|
|
"tercie" : 0,
|
|
"kvarta" : 0,
|
|
"kvinta" : 0,
|
|
"sexta" : 0,
|
|
"septima" : 0,
|
|
"oktava" : 0,
|
|
"1a" : 0,
|
|
"2a" : 0,
|
|
"3a" : 0,
|
|
"4a" : 0}
|
|
|
|
kandi_alias = { "prima" : ["prima", "1p"],
|
|
"sekunda" : ["sekunda", "2s"],
|
|
"tercie" : ["tercie", "tercye", "3t"],
|
|
"kvarta" : ["kvarta", "qarta", "4k"],
|
|
"kvinta" : ["kvinta", "kvynta", "5k"],
|
|
"sexta" : ["sexta", "seksta", "6s"],
|
|
"septima" : ["septima", "septyma", "7s",],
|
|
"oktava" : ["oktava", "8o"],
|
|
"1a" : ["1.a", "1a", "1 a", "prvni a"],
|
|
"2a" : ["2.a", "2a", "2 a", "druha a"],
|
|
"3a" : ["3.a", "3a", "3 a", "treti a"],
|
|
"4a" : ["4.a", "4a", "4 a", "ctvrta a"]
|
|
}
|
|
|
|
# test
|
|
# kandidati = {"bogus" : 0,
|
|
# "sigma" : 0,
|
|
# "ahoj" : 0}
|
|
|
|
###
|
|
###
|
|
|
|
class Sms_hlas(): # jeden sms hlas, do ktereho se budou pridavat informace napric scriptem
|
|
def __init__(self, author, content, kandidati):
|
|
self.author = author
|
|
self.content = content
|
|
self.kandidati = kandidati
|
|
|
|
###
|
|
###
|
|
|
|
def zpracuj_sms(): #podle data (time_after) a podle posledni sms od telefonniho cisla
|
|
tree = ET.parse(soubor)
|
|
root = tree.getroot() # parsovani xml
|
|
|
|
hlasy = [] # = relevant sms
|
|
volici = {} # = pocet sms na volice/telefonni cislo - volic=telefoni cislo = key, value = pocet sms ktere poslal
|
|
sms_count = 0 # celkovy pocet smsek ktere prosli scriptem
|
|
relevant = 0 # pocet relevantnich sms = po time_after
|
|
|
|
for child in root: # pro sms zaznam v xml tabulce
|
|
sms_count += 1
|
|
date = int(child.get('date')) #datum smsky/mmsky v tabulce, format: epoch time
|
|
|
|
#content - zalezi na tom jestli je to sms nebo mms, pokud neco jineho tak se nepocita, pokud nejde v mms najit, tak se nepocita
|
|
if child.tag == "sms": #jestli je to sms
|
|
content = unidecode(child.get('body')).lower() #kontent sms bez hacek a carek a bez capslocku
|
|
elif child.tag == "mms": #jestli je to mms
|
|
try:
|
|
part = child[0][0]
|
|
content = unidecode(part.get("text")).lower()
|
|
except:
|
|
print("nenasel v mms")
|
|
continue
|
|
else:
|
|
print("not mms or sms, :( bruh )")
|
|
continue
|
|
#
|
|
author = child.get('address') #telefonni cislo
|
|
|
|
if date >= time_after: # pokud je datum po time_after (hlas je relevenatni)
|
|
if author not in volici:
|
|
volici[author] = 1
|
|
relevant += 1
|
|
hlasy.append(Sms_hlas(author, content, []))
|
|
# pridani sms_hlasu do listu s sms_hlasy + prirazeni autora (tel. cislo) a contentu sms
|
|
# xml tabulka je podle casu, tak vim. ze jakmile narazim na autora, ktery neni ve slovniku "volici"-
|
|
#-tak ze je to jeho posledni sms, kterou poslal
|
|
else:
|
|
volici[author] += 1
|
|
else:
|
|
continue
|
|
|
|
return(hlasy, sms_count, relevant, volici)
|
|
|
|
zpracovane = zpracuj_sms()
|
|
hlasy = zpracovane[0] # vsechny sms_hlasy
|
|
sms_count = zpracovane[1] # pocet sms ktere prosli programem
|
|
relevant = zpracovane[2] # pocet relevantnich sms
|
|
volici = zpracovane[3] # slovnik telefonich cisel a s poctem sms ktere poslali
|
|
|
|
###
|
|
###
|
|
|
|
def najdi_kandidaty(hlasy, kandidati):
|
|
|
|
for h in hlasy:
|
|
for k in kandidati:
|
|
for a in kandi_alias[k]:
|
|
if a in h.content:
|
|
h.kandidati.append(k)
|
|
else:
|
|
continue
|
|
|
|
return(hlasy) #stejny list, ale sms_hlasy jsou v nem upravene, trochu matouci jak se to vse jmenuje stejne, ja vim
|
|
|
|
zpracovane_hlasy = najdi_kandidaty(hlasy, kandidati) # list s sms_hlasy - telefoni cislo, content posledni sms, kandidati nalezeni sms
|
|
|
|
###
|
|
###
|
|
|
|
def spocitej_kandidaty(zprac_hlasy, kandidati):
|
|
neplatne_hlasy = []
|
|
platne_hlasy = [] # funkce ktera vrati listy se zarazenymi hlasy a pocet hlasu pro kandidata
|
|
prazdne_hlasy = []
|
|
|
|
for h in zprac_hlasy:
|
|
if len(h.kandidati) > 1: #vice jak 1 kandidat v hlase = neplatny hlas
|
|
neplatne_hlasy.append(h)
|
|
|
|
elif len(h.kandidati) == 1:
|
|
kandidati[h.kandidati[0]] += 1
|
|
platne_hlasy.append(h)
|
|
|
|
# toto je nepotrebne protoze vim ze kandidat v listu kandidatu a ten v sms_hlasu bude mit stejny tvar a bude jeden
|
|
#for k in kandidati:
|
|
# if k in h.kandidati:
|
|
# kandidati[k] += 1
|
|
# platne_hlasy.append(h)
|
|
# else:
|
|
# continue
|
|
|
|
else: # nenalezen zadny kandidat v sms = kandidati list v sms_hlasu je prazdny
|
|
prazdne_hlasy.append(h)
|
|
|
|
return(kandidati, neplatne_hlasy, platne_hlasy, prazdne_hlasy)
|
|
|
|
|
|
spocitane = spocitej_kandidaty(zpracovane_hlasy, kandidati)
|
|
|
|
kandidati = spocitane[0] # slovnik kandidatu = prepocitany slovnik kandidatu, zbytecne to davam sem a tam-
|
|
neplatne_hlasy = spocitane[1] #-mohla by to byt global variable a vubec bych tento program nemusel mit napsany ve funkcich-
|
|
platne_hlasy = spocitane[2] #-nechce se mi prepisovat, uz takhle to funguje a snad to bude fungovat :)
|
|
prazdne_hlasy = spocitane[3]
|
|
|
|
###
|
|
###
|
|
|
|
def zapis_do_zaznamu(zaznam, nazev, list):
|
|
global volici
|
|
zaznam.write(f"\n\n\n{nazev} - {len(list)}\n")
|
|
for i in list:
|
|
zaznam.write(f"Autor: {i.author} ----- Kandidati: {i.kandidati} ----- Content: {i.content} ----- posledni z {volici[i.author]}\n")
|
|
|
|
def prite(zaznam, msg):
|
|
zaznam.write(f"{msg}\n")
|
|
print(msg)
|
|
|
|
###
|
|
###
|
|
|
|
file = open(zaznam_file, 'a')
|
|
zapis_do_zaznamu(file, "PLATNE HLASY", platne_hlasy) # jeden kandidat
|
|
zapis_do_zaznamu(file, "NEPLTANE HLASY", neplatne_hlasy) # vice jak jeden kandidat
|
|
zapis_do_zaznamu(file, "PRAZDNE HLASY", prazdne_hlasy) # zadny kandidat
|
|
zapis_do_zaznamu(file, "VSECHNY HLASY", zpracovane_hlasy) # vsechny hlasy po urcitem datu a posledni co telefonni cislo poslalo
|
|
|
|
|
|
|
|
|
|
file.write("\n\n\n\nCOMMAND PROMPT OUTPUT")
|
|
prite(file, "\n\n- - - - - - - - - - - - - - - - - - -")
|
|
|
|
|
|
prite(file, f"sms:{sms_count} relevant:{relevant}-{len(hlasy)}")
|
|
prite(file, f"platne:{len(platne_hlasy)} neplatne:{len(neplatne_hlasy)} prazdne:{len(prazdne_hlasy)}")
|
|
prite(file, f"ze souboru: {soubor} od casu: {time_after}\n")
|
|
|
|
prite(file, "VYSLEDKY")
|
|
sorted_items = sorted(kandidati.items(), key=lambda kv: (kv[1], kv[0]))
|
|
for k in sorted_items:
|
|
prite(file, f"{k[0]} -> {k[1]}")
|
|
|
|
|
|
prite(file, "- - - - - - - - - - - - - - - - - - - ") |