Python

String & Bytes

bytes to string

1
return b"abc123".decode("utf-8")
abc123

string to bytes

1
return "abc123".encode("utf-8")
b'abc123'

Virtual Env

create

1
python -m venv NAME

activate

1
source NAME/bin/activate

deactivate

1
deactivate

Arguments

argparse

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
  import argparse

  # Create the parser object
  parser = argparse.ArgumentParser(description="Process something")

  # Add an argument to the parser
  parser.add_argument(
      "-i",
      "--integers",
      metavar="N",
      required=True,
      type=int,
      help="an integer to be processed",
  )

  # Parse the arguments
  args = parser.parse_args()

  # Access the parsed arguments
  print(args.integers)

File

file exists

1
2
3
4
5
6
7
import os

filename = "example.txt"
if os.path.exists(filename):
    print("File exists")
else:
    print("File does not exist")

parent dir

1
2
3
import os

parent_dir = os.path.dirname(file_path)

list directory

1
2
3
4
import os

path = os.getcwd()
dir_list = os.listdir(path)

rename

1
2
3
import os

os.rename(old_name, new_name)

path join

1
2
3
4
5
6
7
import os

path1 = "/path/to/directory"
path2 = "subdirectory"
filename = "file.txt"
joined_path = os.path.join(path1, path2, filename)
print(joined_path)

directory create

1
2
3
4
5
6
7
8
9
import os

directory = "/path/to/directory"

if not os.path.exists(directory):
    os.makedirs(directory)
    print("Directory created:", directory)
else:
    print("Directory already exists:", directory)

threads

thread pool

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
import concurrent.futures

def execute_command(command):
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    stdout, stderr = process.communicate()
    return stdout, stderr

commands = [
    ['ls', '-l'],
    ['echo', 'Hello, world!'],
    ['pwd']
]

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    # Submit tasks to the thread pool
    future_results = [executor.submit(execute_command, command) for command in commands]

    # Get the results as they become available
    for future in concurrent.futures.as_completed(future_results):
        stdout, stderr = future.result()
        print("Standard Output:")
        print(stdout)

Random

choice

1
2
3
import random

random.choice([1, 2, 3, 4, 5])

uuid

1
2
3
import uuid

uuid.uuid4()

Data process

unique

1
2
3
import numpy as np

return np.unique([1, 1, 3, 2, 3])

Json

load from file

1
2
3
4
with open('data.json', 'r') as f:
    data = json.load(f)

print(data)

dump to file

1
2
with open('data.json', 'w') as f:
    json.dump(data, f)

dump class to file

1
2
3
4
5
6
7
8
9
class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age

person = Person("John Smith", 35)

with open('person.json', 'w') as f:
    json.dump(person.__dict__, f)

load class from file

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age

with open('person.json', 'r') as f:
    data = json.load(f)
    person = Person(data['name'], data['age'])

print(person.name)
print(person.age)

Time

1
2
3
4
5
6
7
from time import gmtime, localtime, strftime

def print_gmtime():
    strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())

def print_localtime():
    return strftime("%a, %d %b %Y %H:%M:%S +0000", localtime())
None

duration

1
2
3
4
5
import time
start_time = time.perf_counter()
end_time = time.perf_counter()
elapsed_time = end_time - start_time
print(elapsed_time * 1000, "ms")
0.00015999830793589354

date

1
2
3
4
import datetime
x = datetime.datetime.now()
print(x)
return str(x)
2024-04-25 20:05:59.259296

socket

server

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
# Echo server program
import socket
from time import gmtime, strftime

HOST = '127.0.0.1'                 # Symbolic name meaning all available interfaces
PORT = 50007              # Arbitrary non-privileged port
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
    s.bind((HOST, PORT))
    s.listen(1)
    conn, addr = s.accept()
    with conn:
        print('Connected by', addr)
        while True:
            data = conn.recv(1024)
            print(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + "receive: " + str(data))
            if not data: break
            conn.sendall(data)

client

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
# Echo client program
import socket
import time

HOST = '127.0.0.1'    # The remote host
PORT = 50007              # The same port as used by the server
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
    s.connect((HOST, PORT))
    while True:
        s.sendall(b'Hello, world')
        data = s.recv(1024)
        print('Received', repr(data))
        time.sleep(1)

class

super

  • 理论上 super() 可以直接 call 嗷, 返回父类

    • 调用父类函数时候, self指向的是子类
  • 多继承的时候, 直接 super().func() 会从 mro 中, 从第二个找拥有 func() 的类

    • 如果使用 super(clazz, self).func() 则会从 clazz.mro 中的第二个类开始找

enum

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
from enum import Enum

class Color(Enum):
    RED = 1
    GREEN = 2
    BLUE = 3

print(Color.RED)         # Output: Color.RED
print(Color.GREEN)       # Output: Color.GREEN
print(Color.BLUE)        # Output: Color.BLUE

print(Color.RED.value)   # Output: 1
print(Color.GREEN.value) # Output: 2
print(Color.BLUE.value)  # Output: 3

plot

  • networkx → graphviz

networkx to graphviz

1
2
3
4
5
6
7
8
9
import graphviz
import networkx as nx

G = nx.DiGraph()
# G.add_node(u)
# G.add_edge(u, v, label=label)
A = nx.nx_agraph.to_agraph(G)
A.layout("dot")
A.draw('graph.pdf')

colormesh (heatmap)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
import matplotlib.pyplot as plt
import numpy as np

# generate 2 2d grids for the x & y bounds
y, x = np.meshgrid(np.linspace(-3, 3, 100), np.linspace(-3, 3, 100))

z = (1 - x / 2. + x ** 5 + y ** 3) * np.exp(-x ** 2 - y ** 2)
# x and y are bounds, so z should be the value *inside* those bounds.
# Therefore, remove the last value from the z array.
z = z[:-1, :-1]
z_min, z_max = -np.abs(z).max(), np.abs(z).max()

fig, ax = plt.subplots()

c = ax.pcolormesh(x, y, z, cmap='RdBu', vmin=z_min, vmax=z_max)
ax.set_title('pcolormesh')
# set the limits of the plot to the limits of the data
ax.axis([x.min(), x.max(), y.min(), y.max()])
fig.colorbar(c, ax=ax)

plt.show()

standard deviation / std / error bar

1
2
3
import matplotlib.pyplot as plt

plt.errorbar(x, y_mean, y_std, linestyle='None', marker='^')

dot

  • example
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
digraph {
  rankdir=LR;
  node [shape=ellipse];
  edge [color=red];

  A [label="Start"];
  B [label="Read input"];
  C [label="Process input"];
  D [label="Write output"];
  E [label="Stop"];

  A -> B;
  B -> C;
  C -> D [label="Yes"];
  D -> E;
  C -> E [label="No"];

  {rank=same; B C}
}

cli

1
dot -Tpng -Kdot -odot.png example.dot

pandas

from list

1
2
3
4
5
6
import pandas as pd

df = pd.DataFrame(
    [["wyy", "Pad Gra Paow", 80, "good", "2023-06-05 Mon"]],
    columns=["reviewer", "dish", "score", "comment", "date"],
)

to json

1
2
3
4
5
6
7
8
import pandas as pd

df = pd.DataFrame(
    [["wyy", "Pad Gra Paow", 80, "good", "2023-06-05 Mon"]],
    columns=["reviewer", "dish", "score", "comment", "date"],
)

return df.to_json(orient='split', force_ascii=False) # use unicode

groupby

1
df = df[["restaurant", "score"]].groupby('restaurant').mean()

count

1
review_df[["reviewer"]].groupby("reviewer").size().sort_values(ascending=False)

unique

1
pd.unique(review_df["restaurant"])

reverse

1
df = df.iloc[::-1]

to markdown

1
2
3
4
5
6
7
import pandas as pd

df = pd.DataFrame(
    [["wyy", "Pad Gra Paow", 80, "good", "2023-06-05 Mon"]],
    columns=["reviewer", "dish", "score", "comment", "date"],
)
return df.to_markdown()

concat

1
2
3
import pandas as pd

pd.concat([data1, data2])

horizontal

1
2
3
import pandas as pd

pd.concat([data1, data2], axis=1)

reset index

1
2
3
import pandas as pd

pd.concat([...]).reset_index(drop=True)

for loop/iterate

1
2
3
4
5
6
7
import pandas as pd

df = pd.DataFrame({'c1': [10, 11, 12], 'c2': [100, 110, 120]})
df = df.reset_index()  # make sure indexes pair with number of rows

for index, row in df.iterrows():
    print(index, row)

convert to numeric

1
df['score'] = pd.to_numeric(df['score'])

drop

1
df.drop(index='cow', columns='small')

numpy

percent

1
2
import numpy as np
np.percentile(arr, 99.9)

std

1
2
import numpy as np
np.std(array)

type

return void

1
2
def func() -> None:
    return

regex

1
2
3
import re

re.sub('[a-z]+@', 'ABC@', s, 2)