Browse Source

first commit

Pierre Bourdin 2 years ago
commit
e30e84a9ee
1 changed files with 168 additions and 0 deletions
  1. 168 0
      main.py

+ 168 - 0
main.py

@@ -0,0 +1,168 @@
+#!/bin/python3
+
+# Programme assez bordélique pour parser des tickets de caisse Super U
+# Nécessite une base de donnée SQLite avec la structure suivante :
+
+'''
+CREATE TABLE "achats" (
+	"id"	INTEGER,
+	"associated_id"	INTEGER,
+	"quantite"	INTEGER NOT NULL,
+	"article"	TEXT NOT NULL,
+	"categorie"	TEXT,
+	"prix"	INTEGER NOT NULL,
+	"date"	TEXT NOT NULL,
+	"heure"	TEXT NOT NULL,
+	"ville"	TEXT NOT NULL,
+	PRIMARY KEY("id" AUTOINCREMENT)
+);
+'''
+
+import sys
+import os
+import PyPDF2
+import sqlite3
+
+def parsePDF(f):
+    with open(f, 'rb') as f:
+        reader = PyPDF2.PdfReader(f)
+        contents = reader.getPage(0).extractText().split('\n')
+
+        # Detecte si c'est un ticket de caisse ou ticket client
+        if ("CARTE BANCAIRE" in contents[4]) or ("CARTE BANCAIRE" in contents[1]):
+            print('Pattern found : don\'t look like a good ticket. Next one...\n')
+            return ''
+        else:
+            print('No pattern found : seems to be a good one. Parsing it...')
+            #print(contents)
+            x = 0
+            for i in contents:
+                print("[" + str(x) + "] " + contents[x])
+                x=x+1
+                if "==========="  in contents[x]:
+                    print("matching with END pattern")
+                    return contents[3:x]
+                elif "-----------" in contents[x]:
+                    print("matching with END pattern")
+                    return contents[3:x]
+                else:
+                    pass
+
+def parseArticles(content):
+    if len(content) == 0:
+        return ""
+
+    x = 0
+    for i in content:
+        #print("[" + str(x) + "] " + content[x])
+        x=x+1
+
+    ville = content[2]
+    info = content[12].split()
+    date = content[12].split()[2]
+    heure = content[12].split()[3]
+
+    article = None
+    z=-1
+    for line in content[14:]:
+        z=z+1
+        print(line)
+        if ">>>>" in line:
+            categorie = line.replace('>', '')[2:]
+
+        elif "Pourcentage" in line:
+            continue
+
+        elif not "€" in line:
+            article = line.split("  ")[0][1:]
+            continue
+
+        elif "€" in line:
+            #
+            # Si article pas nul, c'est qu'on est sur un multi ligne
+            #
+            if article != None:
+                print(line.split())
+                if (" x " in line) and ("€/kg" in line) and (" kg " in line):
+                    quantite = line.split()[0]
+                    prix = line.split()[3] + " €"
+
+                if (" x " in line) and ("€" in line) and (not "€/kg" in line):
+                    quantite = line.split()[0]
+                    prix = line.split()[2] + " €"
+
+            else:
+                print("AAAAAAAAAA")
+                print(line.split("  "))
+
+                # On à une ligne type poid avec ou sans prix... les relous quoi !
+                # Nom d'article, mais le reste est ligne du dessous
+                # donc on sort de la boucle quand on à notre variable
+                article = line.split("  ")[0][1:]
+                quantite = 1
+                prix = line.split("  ")[len(line.split("  ")) - 2]
+                print("on sort")
+
+
+        # On pousse la requête !
+        if (article != None) and (prix != None) and (quantite != None):
+            print("=> Date / Heure : " + date + " " + heure)
+            print("=> Catégorie : " + categorie)
+            print("=> Article : " + article)
+            print("=> Quantité/poid : " + str(quantite))
+            print("=> Prix : " + prix)
+            print("\n")
+
+            sql = ''' INSERT INTO achats(quantite,article,categorie,prix,date,heure,ville)
+                      VALUES(?,?,?,?,?,?,?) '''
+            cur = conn.cursor()
+            cur.execute(sql, (str(quantite), article, categorie, prix, date, heure, ville ))
+            conn.commit()
+
+            article = None
+            prix = None
+            quantite = None
+
+        # Comme on est des cochons et que l'algo est trop mauvais, on peut avoir des erreurs de parsing
+        # on va donc clean les entrées qui ont des valeurs nulles.
+#        elif (article != None) and (prix == "") and (quantite != None):
+#            continue
+
+def create_connection(db_file):
+    conn = None
+    try:
+        conn = sqlite3.connect(db_file)
+    except Error as e:
+        print(e)
+    return conn
+
+def ajout_article(conn, project):
+    sql = ''' INSERT INTO achats(name,begin_date,end_date)
+              VALUES(?,?,?) '''
+    cur = conn.cursor()
+    cur.execute(sql, project)
+    conn.commit()
+    return cur.lastrowid
+
+if __name__ == '__main__':
+
+    if len(sys.argv) != 2:
+        print('Erreur')
+        exit(1)
+
+    print(f'Script name is {sys.argv[0]}')
+    print(f'Path with PDF files is {sys.argv[1]}')
+
+    # to store files in a list
+    list = []
+
+    conn = create_connection("database")
+    # dirs=directories
+    for (root, dirs, file) in os.walk(sys.argv[1]):
+        for f in file:
+            if '.pdf' in f:
+                print('=> Play with file : ' + f)
+                parseArticles(parsePDF(sys.argv[1]+"/"+f))
+    conn.close()
+
+    print("terminé")